{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:10:59.485311', 'step': 0, 'epoch': 0}
{'type': 'pplx', 'content': 155.712699148016, 'timestamp': '2025-10-02 00:10:59.489581', 'step': 0, 'epoch': 0}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:10:59.574078', 'step': 0, 'epoch': 1}
{'type': 'loss', 'content': 0.38445523381233215, 'timestamp': '2025-10-02 00:10:59.577390', 'step': 1, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:10:59.647006', 'step': 1, 'epoch': 1}
{'type': 'loss', 'content': 0.5633135437965393, 'timestamp': '2025-10-02 00:10:59.649288', 'step': 2, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:10:59.755460', 'step': 2, 'epoch': 1}
{'type': 'loss', 'content': 1.0013976097106934, 'timestamp': '2025-10-02 00:10:59.757388', 'step': 3, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:10:59.815829', 'step': 3, 'epoch': 1}
{'type': 'loss', 'content': 0.534419596195221, 'timestamp': '2025-10-02 00:10:59.861622', 'step': 4, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:10:59.918819', 'step': 4, 'epoch': 1}
{'type': 'loss', 'content': 0.44253718852996826, 'timestamp': '2025-10-02 00:10:59.926228', 'step': 5, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:10:59.981056', 'step': 5, 'epoch': 1}
{'type': 'loss', 'content': 0.47678616642951965, 'timestamp': '2025-10-02 00:10:59.983482', 'step': 6, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:00.048405', 'step': 6, 'epoch': 1}
{'type': 'loss', 'content': 0.35863012075424194, 'timestamp': '2025-10-02 00:11:00.051423', 'step': 7, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:00.113740', 'step': 7, 'epoch': 1}
{'type': 'loss', 'content': 0.43598055839538574, 'timestamp': '2025-10-02 00:11:00.119532', 'step': 8, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:00.181178', 'step': 8, 'epoch': 1}
{'type': 'loss', 'content': 0.3903904855251312, 'timestamp': '2025-10-02 00:11:00.183345', 'step': 9, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:00.244361', 'step': 9, 'epoch': 1}
{'type': 'loss', 'content': 0.43691715598106384, 'timestamp': '2025-10-02 00:11:00.246458', 'step': 10, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:00.320422', 'step': 10, 'epoch': 1}
{'type': 'loss', 'content': 0.5018488168716431, 'timestamp': '2025-10-02 00:11:00.322664', 'step': 11, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:00.376434', 'step': 11, 'epoch': 1}
{'type': 'loss', 'content': 0.28610894083976746, 'timestamp': '2025-10-02 00:11:00.382288', 'step': 12, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:00.435746', 'step': 12, 'epoch': 1}
{'type': 'loss', 'content': 0.24010944366455078, 'timestamp': '2025-10-02 00:11:00.446032', 'step': 13, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:00.510600', 'step': 13, 'epoch': 1}
{'type': 'loss', 'content': 0.15917493402957916, 'timestamp': '2025-10-02 00:11:00.517758', 'step': 14, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:00.572532', 'step': 14, 'epoch': 1}
{'type': 'loss', 'content': 0.22471998631954193, 'timestamp': '2025-10-02 00:11:00.579915', 'step': 15, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:00.633291', 'step': 15, 'epoch': 1}
{'type': 'loss', 'content': 0.44221019744873047, 'timestamp': '2025-10-02 00:11:00.638859', 'step': 16, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:00.692036', 'step': 16, 'epoch': 1}
{'type': 'loss', 'content': 0.12500858306884766, 'timestamp': '2025-10-02 00:11:00.694387', 'step': 17, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:00.748402', 'step': 17, 'epoch': 1}
{'type': 'loss', 'content': 0.13855822384357452, 'timestamp': '2025-10-02 00:11:00.750685', 'step': 18, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:00.805019', 'step': 18, 'epoch': 1}
{'type': 'loss', 'content': 0.11134219169616699, 'timestamp': '2025-10-02 00:11:00.807227', 'step': 19, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:00.861289', 'step': 19, 'epoch': 1}
{'type': 'loss', 'content': 0.3295917510986328, 'timestamp': '2025-10-02 00:11:00.867133', 'step': 20, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:00.920241', 'step': 20, 'epoch': 1}
{'type': 'loss', 'content': 0.23940391838550568, 'timestamp': '2025-10-02 00:11:00.922489', 'step': 21, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:00.976821', 'step': 21, 'epoch': 1}
{'type': 'loss', 'content': 0.21501359343528748, 'timestamp': '2025-10-02 00:11:00.979084', 'step': 22, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:01.033270', 'step': 22, 'epoch': 1}
{'type': 'loss', 'content': 0.1353156715631485, 'timestamp': '2025-10-02 00:11:01.042644', 'step': 23, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:01.096367', 'step': 23, 'epoch': 1}
{'type': 'loss', 'content': 0.10774081945419312, 'timestamp': '2025-10-02 00:11:01.102076', 'step': 24, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:01.155031', 'step': 24, 'epoch': 1}
{'type': 'loss', 'content': 0.18422193825244904, 'timestamp': '2025-10-02 00:11:01.157033', 'step': 25, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:01.224472', 'step': 25, 'epoch': 1}
{'type': 'loss', 'content': 0.11647197604179382, 'timestamp': '2025-10-02 00:11:01.226678', 'step': 26, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:01.281706', 'step': 26, 'epoch': 1}
{'type': 'loss', 'content': 0.3017221987247467, 'timestamp': '2025-10-02 00:11:01.284162', 'step': 27, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:01.339510', 'step': 27, 'epoch': 1}
{'type': 'loss', 'content': 0.3017544448375702, 'timestamp': '2025-10-02 00:11:01.345191', 'step': 28, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:01.411235', 'step': 28, 'epoch': 1}
{'type': 'loss', 'content': 0.10651066899299622, 'timestamp': '2025-10-02 00:11:01.413414', 'step': 29, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:01.466905', 'step': 29, 'epoch': 1}
{'type': 'loss', 'content': 0.14108380675315857, 'timestamp': '2025-10-02 00:11:01.469137', 'step': 30, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:01.522697', 'step': 30, 'epoch': 1}
{'type': 'loss', 'content': 0.2177417129278183, 'timestamp': '2025-10-02 00:11:01.524819', 'step': 31, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:01.585942', 'step': 31, 'epoch': 1}
{'type': 'loss', 'content': 0.11270944774150848, 'timestamp': '2025-10-02 00:11:01.597206', 'step': 32, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:01.651077', 'step': 32, 'epoch': 1}
{'type': 'loss', 'content': 0.11725369840860367, 'timestamp': '2025-10-02 00:11:01.653124', 'step': 33, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:01.706338', 'step': 33, 'epoch': 1}
{'type': 'loss', 'content': 0.17159783840179443, 'timestamp': '2025-10-02 00:11:01.708806', 'step': 34, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:11:01.778269', 'step': 34, 'epoch': 1}
{'type': 'loss', 'content': 0.2505936324596405, 'timestamp': '2025-10-02 00:11:01.780387', 'step': 35, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:01.840459', 'step': 35, 'epoch': 1}
{'type': 'loss', 'content': 0.24235427379608154, 'timestamp': '2025-10-02 00:11:01.846275', 'step': 36, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:01.898946', 'step': 36, 'epoch': 1}
{'type': 'loss', 'content': 0.11589522659778595, 'timestamp': '2025-10-02 00:11:01.901290', 'step': 37, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:01.954409', 'step': 37, 'epoch': 1}
{'type': 'loss', 'content': 0.11805759370326996, 'timestamp': '2025-10-02 00:11:01.957128', 'step': 38, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:11:02.032635', 'step': 38, 'epoch': 1}
{'type': 'loss', 'content': 0.08825335651636124, 'timestamp': '2025-10-02 00:11:02.044556', 'step': 39, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:02.098837', 'step': 39, 'epoch': 1}
{'type': 'loss', 'content': 0.12944869697093964, 'timestamp': '2025-10-02 00:11:02.109058', 'step': 40, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:02.162719', 'step': 40, 'epoch': 1}
{'type': 'loss', 'content': 0.11548452079296112, 'timestamp': '2025-10-02 00:11:02.165060', 'step': 41, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:02.218314', 'step': 41, 'epoch': 1}
{'type': 'loss', 'content': 0.2048344761133194, 'timestamp': '2025-10-02 00:11:02.220836', 'step': 42, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:11:02.294628', 'step': 42, 'epoch': 1}
{'type': 'loss', 'content': 0.07586759328842163, 'timestamp': '2025-10-02 00:11:02.305530', 'step': 43, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:02.359653', 'step': 43, 'epoch': 1}
{'type': 'loss', 'content': 0.1431766301393509, 'timestamp': '2025-10-02 00:11:02.365430', 'step': 44, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:02.418283', 'step': 44, 'epoch': 1}
{'type': 'loss', 'content': 0.2688452899456024, 'timestamp': '2025-10-02 00:11:02.420639', 'step': 45, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:02.474484', 'step': 45, 'epoch': 1}
{'type': 'loss', 'content': 0.11113300919532776, 'timestamp': '2025-10-02 00:11:02.482049', 'step': 46, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:02.536958', 'step': 46, 'epoch': 1}
{'type': 'loss', 'content': 0.06962396204471588, 'timestamp': '2025-10-02 00:11:02.539212', 'step': 47, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:02.593443', 'step': 47, 'epoch': 1}
{'type': 'loss', 'content': 0.11054862290620804, 'timestamp': '2025-10-02 00:11:02.599405', 'step': 48, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:02.652856', 'step': 48, 'epoch': 1}
{'type': 'loss', 'content': 0.18632787466049194, 'timestamp': '2025-10-02 00:11:02.663033', 'step': 49, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:02.717458', 'step': 49, 'epoch': 1}
{'type': 'loss', 'content': 0.08481894433498383, 'timestamp': '2025-10-02 00:11:02.719928', 'step': 50, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:02.774424', 'step': 50, 'epoch': 1}
{'type': 'loss', 'content': 0.16369952261447906, 'timestamp': '2025-10-02 00:11:02.779769', 'step': 51, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:02.839080', 'step': 51, 'epoch': 1}
{'type': 'loss', 'content': 0.053640227764844894, 'timestamp': '2025-10-02 00:11:02.847478', 'step': 52, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:02.908140', 'step': 52, 'epoch': 1}
{'type': 'loss', 'content': 0.20182031393051147, 'timestamp': '2025-10-02 00:11:02.914120', 'step': 53, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:11:02.996034', 'step': 53, 'epoch': 1}
{'type': 'loss', 'content': 0.08558276295661926, 'timestamp': '2025-10-02 00:11:03.010938', 'step': 54, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:03.100531', 'step': 54, 'epoch': 1}
{'type': 'loss', 'content': 0.08923403173685074, 'timestamp': '2025-10-02 00:11:03.113958', 'step': 55, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:03.186908', 'step': 55, 'epoch': 1}
{'type': 'loss', 'content': 0.0894496887922287, 'timestamp': '2025-10-02 00:11:03.197195', 'step': 56, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:03.274570', 'step': 56, 'epoch': 1}
{'type': 'loss', 'content': 0.2653721570968628, 'timestamp': '2025-10-02 00:11:03.288351', 'step': 57, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:03.371881', 'step': 57, 'epoch': 1}
{'type': 'loss', 'content': 0.07861937582492828, 'timestamp': '2025-10-02 00:11:03.381538', 'step': 58, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:03.449150', 'step': 58, 'epoch': 1}
{'type': 'loss', 'content': 0.3034895658493042, 'timestamp': '2025-10-02 00:11:03.453343', 'step': 59, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:03.528928', 'step': 59, 'epoch': 1}
{'type': 'loss', 'content': 0.19823457300662994, 'timestamp': '2025-10-02 00:11:03.539464', 'step': 60, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:03.609587', 'step': 60, 'epoch': 1}
{'type': 'loss', 'content': 0.185283362865448, 'timestamp': '2025-10-02 00:11:03.620662', 'step': 61, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:03.697685', 'step': 61, 'epoch': 1}
{'type': 'loss', 'content': 0.07087692618370056, 'timestamp': '2025-10-02 00:11:03.711646', 'step': 62, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:03.783780', 'step': 62, 'epoch': 1}
{'type': 'loss', 'content': 0.20180560648441315, 'timestamp': '2025-10-02 00:11:03.793198', 'step': 63, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:03.861504', 'step': 63, 'epoch': 1}
{'type': 'loss', 'content': 0.11565210670232773, 'timestamp': '2025-10-02 00:11:03.868426', 'step': 64, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:03.931429', 'step': 64, 'epoch': 1}
{'type': 'loss', 'content': 0.31574535369873047, 'timestamp': '2025-10-02 00:11:03.935085', 'step': 65, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:03.991727', 'step': 65, 'epoch': 1}
{'type': 'loss', 'content': 0.10686361789703369, 'timestamp': '2025-10-02 00:11:03.999213', 'step': 66, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:04.063962', 'step': 66, 'epoch': 1}
{'type': 'loss', 'content': 0.09063513576984406, 'timestamp': '2025-10-02 00:11:04.071484', 'step': 67, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:04.131346', 'step': 67, 'epoch': 1}
{'type': 'loss', 'content': 0.31557977199554443, 'timestamp': '2025-10-02 00:11:04.137964', 'step': 68, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:04.208590', 'step': 68, 'epoch': 1}
{'type': 'loss', 'content': 0.25689226388931274, 'timestamp': '2025-10-02 00:11:04.211826', 'step': 69, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:04.268796', 'step': 69, 'epoch': 1}
{'type': 'loss', 'content': 0.2695624828338623, 'timestamp': '2025-10-02 00:11:04.271504', 'step': 70, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:04.348399', 'step': 70, 'epoch': 1}
{'type': 'loss', 'content': 0.0720774456858635, 'timestamp': '2025-10-02 00:11:04.359454', 'step': 71, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:04.434021', 'step': 71, 'epoch': 1}
{'type': 'loss', 'content': 0.13031086325645447, 'timestamp': '2025-10-02 00:11:04.442363', 'step': 72, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:04.508426', 'step': 72, 'epoch': 1}
{'type': 'loss', 'content': 0.049411121755838394, 'timestamp': '2025-10-02 00:11:04.519016', 'step': 73, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:04.602210', 'step': 73, 'epoch': 1}
{'type': 'loss', 'content': 0.09970364719629288, 'timestamp': '2025-10-02 00:11:04.605268', 'step': 74, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:04.678499', 'step': 74, 'epoch': 1}
{'type': 'loss', 'content': 0.035198818892240524, 'timestamp': '2025-10-02 00:11:04.687994', 'step': 75, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:04.760276', 'step': 75, 'epoch': 1}
{'type': 'loss', 'content': 0.15423396229743958, 'timestamp': '2025-10-02 00:11:04.769308', 'step': 76, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:04.827761', 'step': 76, 'epoch': 1}
{'type': 'loss', 'content': 0.1342025101184845, 'timestamp': '2025-10-02 00:11:04.833809', 'step': 77, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:04.913389', 'step': 77, 'epoch': 1}
{'type': 'loss', 'content': 0.047482457011938095, 'timestamp': '2025-10-02 00:11:04.919577', 'step': 78, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:04.981939', 'step': 78, 'epoch': 1}
{'type': 'loss', 'content': 0.20305348932743073, 'timestamp': '2025-10-02 00:11:04.985116', 'step': 79, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:05.050258', 'step': 79, 'epoch': 1}
{'type': 'loss', 'content': 0.20494824647903442, 'timestamp': '2025-10-02 00:11:05.065974', 'step': 80, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:05.126183', 'step': 80, 'epoch': 1}
{'type': 'loss', 'content': 0.35850343108177185, 'timestamp': '2025-10-02 00:11:05.128935', 'step': 81, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:05.190911', 'step': 81, 'epoch': 1}
{'type': 'loss', 'content': 0.05001550540328026, 'timestamp': '2025-10-02 00:11:05.201143', 'step': 82, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:05.259913', 'step': 82, 'epoch': 1}
{'type': 'loss', 'content': 0.2622007429599762, 'timestamp': '2025-10-02 00:11:05.263234', 'step': 83, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:05.329637', 'step': 83, 'epoch': 1}
{'type': 'loss', 'content': 0.10172087699174881, 'timestamp': '2025-10-02 00:11:05.337825', 'step': 84, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:05.403817', 'step': 84, 'epoch': 1}
{'type': 'loss', 'content': 0.2316645234823227, 'timestamp': '2025-10-02 00:11:05.407350', 'step': 85, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:05.479102', 'step': 85, 'epoch': 1}
{'type': 'loss', 'content': 0.11928366124629974, 'timestamp': '2025-10-02 00:11:05.488607', 'step': 86, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:05.549851', 'step': 86, 'epoch': 1}
{'type': 'loss', 'content': 0.1320517212152481, 'timestamp': '2025-10-02 00:11:05.554142', 'step': 87, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:05.628318', 'step': 87, 'epoch': 1}
{'type': 'loss', 'content': 0.10211074352264404, 'timestamp': '2025-10-02 00:11:05.635021', 'step': 88, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:05.708099', 'step': 88, 'epoch': 1}
{'type': 'loss', 'content': 0.029247699305415154, 'timestamp': '2025-10-02 00:11:05.719030', 'step': 89, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:05.799622', 'step': 89, 'epoch': 1}
{'type': 'loss', 'content': 0.06935179978609085, 'timestamp': '2025-10-02 00:11:05.804015', 'step': 90, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:05.874349', 'step': 90, 'epoch': 1}
{'type': 'loss', 'content': 0.10986977070569992, 'timestamp': '2025-10-02 00:11:05.877186', 'step': 91, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:05.944688', 'step': 91, 'epoch': 1}
{'type': 'loss', 'content': 0.19912171363830566, 'timestamp': '2025-10-02 00:11:05.959615', 'step': 92, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:06.034339', 'step': 92, 'epoch': 1}
{'type': 'loss', 'content': 0.06711865216493607, 'timestamp': '2025-10-02 00:11:06.047449', 'step': 93, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:06.123463', 'step': 93, 'epoch': 1}
{'type': 'loss', 'content': 0.04206876456737518, 'timestamp': '2025-10-02 00:11:06.132848', 'step': 94, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:06.202046', 'step': 94, 'epoch': 1}
{'type': 'loss', 'content': 0.14018476009368896, 'timestamp': '2025-10-02 00:11:06.207986', 'step': 95, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:06.266304', 'step': 95, 'epoch': 1}
{'type': 'loss', 'content': 0.22599883377552032, 'timestamp': '2025-10-02 00:11:06.281043', 'step': 96, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:06.346570', 'step': 96, 'epoch': 1}
{'type': 'loss', 'content': 0.12438246607780457, 'timestamp': '2025-10-02 00:11:06.356002', 'step': 97, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:06.423701', 'step': 97, 'epoch': 1}
{'type': 'loss', 'content': 0.04272902011871338, 'timestamp': '2025-10-02 00:11:06.426758', 'step': 98, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:06.498167', 'step': 98, 'epoch': 1}
{'type': 'loss', 'content': 0.0872092992067337, 'timestamp': '2025-10-02 00:11:06.501353', 'step': 99, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:06.575326', 'step': 99, 'epoch': 1}
{'type': 'loss', 'content': 0.12793086469173431, 'timestamp': '2025-10-02 00:11:06.583599', 'step': 100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:06.640255', 'step': 100, 'epoch': 1}
{'type': 'loss', 'content': 0.20588086545467377, 'timestamp': '2025-10-02 00:11:06.652358', 'step': 101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:06.735025', 'step': 101, 'epoch': 1}
{'type': 'loss', 'content': 0.160044327378273, 'timestamp': '2025-10-02 00:11:06.739828', 'step': 102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:06.817471', 'step': 102, 'epoch': 1}
{'type': 'loss', 'content': 0.3103429079055786, 'timestamp': '2025-10-02 00:11:06.825287', 'step': 103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:11:06.892240', 'step': 103, 'epoch': 1}
{'type': 'loss', 'content': 0.059231437742710114, 'timestamp': '2025-10-02 00:11:06.903671', 'step': 104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:06.966914', 'step': 104, 'epoch': 1}
{'type': 'loss', 'content': 0.048555489629507065, 'timestamp': '2025-10-02 00:11:06.978278', 'step': 105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:07.070619', 'step': 105, 'epoch': 1}
{'type': 'loss', 'content': 0.0441594272851944, 'timestamp': '2025-10-02 00:11:07.081207', 'step': 106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:07.153793', 'step': 106, 'epoch': 1}
{'type': 'loss', 'content': 0.08104550093412399, 'timestamp': '2025-10-02 00:11:07.163190', 'step': 107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:07.221790', 'step': 107, 'epoch': 1}
{'type': 'loss', 'content': 0.13194598257541656, 'timestamp': '2025-10-02 00:11:07.228758', 'step': 108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:07.296005', 'step': 108, 'epoch': 1}
{'type': 'loss', 'content': 0.15593087673187256, 'timestamp': '2025-10-02 00:11:07.307102', 'step': 109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:07.373134', 'step': 109, 'epoch': 1}
{'type': 'loss', 'content': 0.10195916891098022, 'timestamp': '2025-10-02 00:11:07.383564', 'step': 110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:07.472314', 'step': 110, 'epoch': 1}
{'type': 'loss', 'content': 0.13244090974330902, 'timestamp': '2025-10-02 00:11:07.476243', 'step': 111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:07.546790', 'step': 111, 'epoch': 1}
{'type': 'loss', 'content': 0.20717370510101318, 'timestamp': '2025-10-02 00:11:07.553810', 'step': 112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:07.612339', 'step': 112, 'epoch': 1}
{'type': 'loss', 'content': 0.1381251960992813, 'timestamp': '2025-10-02 00:11:07.617277', 'step': 113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:07.675699', 'step': 113, 'epoch': 1}
{'type': 'loss', 'content': 0.29802218079566956, 'timestamp': '2025-10-02 00:11:07.684991', 'step': 114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:07.758654', 'step': 114, 'epoch': 1}
{'type': 'loss', 'content': 0.10256858915090561, 'timestamp': '2025-10-02 00:11:07.764492', 'step': 115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:07.830499', 'step': 115, 'epoch': 1}
{'type': 'loss', 'content': 0.07312273234128952, 'timestamp': '2025-10-02 00:11:07.838722', 'step': 116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:07.895662', 'step': 116, 'epoch': 1}
{'type': 'loss', 'content': 0.34454062581062317, 'timestamp': '2025-10-02 00:11:07.899792', 'step': 117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:07.973722', 'step': 117, 'epoch': 1}
{'type': 'loss', 'content': 0.0623149573802948, 'timestamp': '2025-10-02 00:11:07.981086', 'step': 118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:08.058366', 'step': 118, 'epoch': 1}
{'type': 'loss', 'content': 0.07432567328214645, 'timestamp': '2025-10-02 00:11:08.068544', 'step': 119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:08.135388', 'step': 119, 'epoch': 1}
{'type': 'loss', 'content': 0.364285945892334, 'timestamp': '2025-10-02 00:11:08.142399', 'step': 120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:11:08.206606', 'step': 120, 'epoch': 1}
{'type': 'loss', 'content': 0.16901032626628876, 'timestamp': '2025-10-02 00:11:08.210573', 'step': 121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:08.268903', 'step': 121, 'epoch': 1}
{'type': 'loss', 'content': 0.09252290427684784, 'timestamp': '2025-10-02 00:11:08.272271', 'step': 122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:08.355607', 'step': 122, 'epoch': 1}
{'type': 'loss', 'content': 0.2366618812084198, 'timestamp': '2025-10-02 00:11:08.360535', 'step': 123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:08.431641', 'step': 123, 'epoch': 1}
{'type': 'loss', 'content': 0.1703774780035019, 'timestamp': '2025-10-02 00:11:08.446118', 'step': 124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:08.504950', 'step': 124, 'epoch': 1}
{'type': 'loss', 'content': 0.08510531485080719, 'timestamp': '2025-10-02 00:11:08.515185', 'step': 125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:08.584775', 'step': 125, 'epoch': 1}
{'type': 'loss', 'content': 0.2641935646533966, 'timestamp': '2025-10-02 00:11:08.596067', 'step': 126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:08.666289', 'step': 126, 'epoch': 1}
{'type': 'loss', 'content': 0.05739022046327591, 'timestamp': '2025-10-02 00:11:08.675649', 'step': 127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:08.759729', 'step': 127, 'epoch': 1}
{'type': 'loss', 'content': 0.1539815217256546, 'timestamp': '2025-10-02 00:11:08.767079', 'step': 128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:08.835251', 'step': 128, 'epoch': 1}
{'type': 'loss', 'content': 0.027738695964217186, 'timestamp': '2025-10-02 00:11:08.838673', 'step': 129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:08.896061', 'step': 129, 'epoch': 1}
{'type': 'loss', 'content': 0.0442977249622345, 'timestamp': '2025-10-02 00:11:08.899960', 'step': 130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:08.956919', 'step': 130, 'epoch': 1}
{'type': 'loss', 'content': 0.2318362444639206, 'timestamp': '2025-10-02 00:11:08.961022', 'step': 131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:09.027587', 'step': 131, 'epoch': 1}
{'type': 'loss', 'content': 0.3197277784347534, 'timestamp': '2025-10-02 00:11:09.042859', 'step': 132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:09.100138', 'step': 132, 'epoch': 1}
{'type': 'loss', 'content': 0.05917879566550255, 'timestamp': '2025-10-02 00:11:09.110646', 'step': 133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:09.192741', 'step': 133, 'epoch': 1}
{'type': 'loss', 'content': 0.24453894793987274, 'timestamp': '2025-10-02 00:11:09.195857', 'step': 134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:09.258777', 'step': 134, 'epoch': 1}
{'type': 'loss', 'content': 0.07883122563362122, 'timestamp': '2025-10-02 00:11:09.268986', 'step': 135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:09.338261', 'step': 135, 'epoch': 1}
{'type': 'loss', 'content': 0.016228899359703064, 'timestamp': '2025-10-02 00:11:09.349271', 'step': 136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:09.408042', 'step': 136, 'epoch': 1}
{'type': 'loss', 'content': 0.29237866401672363, 'timestamp': '2025-10-02 00:11:09.410970', 'step': 137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:09.469375', 'step': 137, 'epoch': 1}
{'type': 'loss', 'content': 0.05586531013250351, 'timestamp': '2025-10-02 00:11:09.480108', 'step': 138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:09.541916', 'step': 138, 'epoch': 1}
{'type': 'loss', 'content': 0.2121472954750061, 'timestamp': '2025-10-02 00:11:09.552289', 'step': 139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:09.610076', 'step': 139, 'epoch': 1}
{'type': 'loss', 'content': 0.08165980875492096, 'timestamp': '2025-10-02 00:11:09.620236', 'step': 140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:11:09.695840', 'step': 140, 'epoch': 1}
{'type': 'loss', 'content': 0.07163077592849731, 'timestamp': '2025-10-02 00:11:09.708958', 'step': 141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:09.790509', 'step': 141, 'epoch': 1}
{'type': 'loss', 'content': 0.197494775056839, 'timestamp': '2025-10-02 00:11:09.794158', 'step': 142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:09.870304', 'step': 142, 'epoch': 1}
{'type': 'loss', 'content': 0.13866837322711945, 'timestamp': '2025-10-02 00:11:09.880119', 'step': 143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:09.938851', 'step': 143, 'epoch': 1}
{'type': 'loss', 'content': 0.0398692786693573, 'timestamp': '2025-10-02 00:11:09.948279', 'step': 144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:10.018627', 'step': 144, 'epoch': 1}
{'type': 'loss', 'content': 0.16651712357997894, 'timestamp': '2025-10-02 00:11:10.023494', 'step': 145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:10.110571', 'step': 145, 'epoch': 1}
{'type': 'loss', 'content': 0.058002568781375885, 'timestamp': '2025-10-02 00:11:10.116285', 'step': 146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:10.194040', 'step': 146, 'epoch': 1}
{'type': 'loss', 'content': 0.23094414174556732, 'timestamp': '2025-10-02 00:11:10.208347', 'step': 147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:10.278545', 'step': 147, 'epoch': 1}
{'type': 'loss', 'content': 0.06566828489303589, 'timestamp': '2025-10-02 00:11:10.293676', 'step': 148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:10.350290', 'step': 148, 'epoch': 1}
{'type': 'loss', 'content': 0.18647204339504242, 'timestamp': '2025-10-02 00:11:10.355072', 'step': 149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:10.425537', 'step': 149, 'epoch': 1}
{'type': 'loss', 'content': 0.15950505435466766, 'timestamp': '2025-10-02 00:11:10.430790', 'step': 150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:10.515377', 'step': 150, 'epoch': 1}
{'type': 'loss', 'content': 0.32695263624191284, 'timestamp': '2025-10-02 00:11:10.526019', 'step': 151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:10.606414', 'step': 151, 'epoch': 1}
{'type': 'loss', 'content': 0.1472368836402893, 'timestamp': '2025-10-02 00:11:10.617620', 'step': 152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:10.681922', 'step': 152, 'epoch': 1}
{'type': 'loss', 'content': 0.09180569648742676, 'timestamp': '2025-10-02 00:11:10.686338', 'step': 153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:10.763441', 'step': 153, 'epoch': 1}
{'type': 'loss', 'content': 0.05007455125451088, 'timestamp': '2025-10-02 00:11:10.770958', 'step': 154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:10.836502', 'step': 154, 'epoch': 1}
{'type': 'loss', 'content': 0.34063050150871277, 'timestamp': '2025-10-02 00:11:10.840232', 'step': 155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:10.919599', 'step': 155, 'epoch': 1}
{'type': 'loss', 'content': 0.0973629504442215, 'timestamp': '2025-10-02 00:11:10.927110', 'step': 156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:10.990086', 'step': 156, 'epoch': 1}
{'type': 'loss', 'content': 0.08397916704416275, 'timestamp': '2025-10-02 00:11:11.001093', 'step': 157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:11.065317', 'step': 157, 'epoch': 1}
{'type': 'loss', 'content': 0.24474649131298065, 'timestamp': '2025-10-02 00:11:11.069114', 'step': 158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:11.129757', 'step': 158, 'epoch': 1}
{'type': 'loss', 'content': 0.24902920424938202, 'timestamp': '2025-10-02 00:11:11.135290', 'step': 159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:11.194498', 'step': 159, 'epoch': 1}
{'type': 'loss', 'content': 0.2246226817369461, 'timestamp': '2025-10-02 00:11:11.204338', 'step': 160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:11.280165', 'step': 160, 'epoch': 1}
{'type': 'loss', 'content': 0.2696905732154846, 'timestamp': '2025-10-02 00:11:11.283811', 'step': 161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:11.356203', 'step': 161, 'epoch': 1}
{'type': 'loss', 'content': 0.10329484194517136, 'timestamp': '2025-10-02 00:11:11.358980', 'step': 162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:11.417931', 'step': 162, 'epoch': 1}
{'type': 'loss', 'content': 0.11508151888847351, 'timestamp': '2025-10-02 00:11:11.421468', 'step': 163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:11.480355', 'step': 163, 'epoch': 1}
{'type': 'loss', 'content': 0.18894825875759125, 'timestamp': '2025-10-02 00:11:11.487543', 'step': 164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:11.550720', 'step': 164, 'epoch': 1}
{'type': 'loss', 'content': 0.058527469635009766, 'timestamp': '2025-10-02 00:11:11.558395', 'step': 165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:11.629209', 'step': 165, 'epoch': 1}
{'type': 'loss', 'content': 0.04473740980029106, 'timestamp': '2025-10-02 00:11:11.638783', 'step': 166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:11.709305', 'step': 166, 'epoch': 1}
{'type': 'loss', 'content': 0.026485877111554146, 'timestamp': '2025-10-02 00:11:11.718622', 'step': 167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:11.791044', 'step': 167, 'epoch': 1}
{'type': 'loss', 'content': 0.3631105422973633, 'timestamp': '2025-10-02 00:11:11.806331', 'step': 168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:11.865251', 'step': 168, 'epoch': 1}
{'type': 'loss', 'content': 0.12759891152381897, 'timestamp': '2025-10-02 00:11:11.870823', 'step': 169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:11.937467', 'step': 169, 'epoch': 1}
{'type': 'loss', 'content': 0.04885868728160858, 'timestamp': '2025-10-02 00:11:11.948061', 'step': 170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:12.013853', 'step': 170, 'epoch': 1}
{'type': 'loss', 'content': 0.07268550246953964, 'timestamp': '2025-10-02 00:11:12.017071', 'step': 171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:12.082484', 'step': 171, 'epoch': 1}
{'type': 'loss', 'content': 0.07429437339305878, 'timestamp': '2025-10-02 00:11:12.089768', 'step': 172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:12.146961', 'step': 172, 'epoch': 1}
{'type': 'loss', 'content': 0.15870720148086548, 'timestamp': '2025-10-02 00:11:12.151601', 'step': 173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:12.210087', 'step': 173, 'epoch': 1}
{'type': 'loss', 'content': 0.1159438043832779, 'timestamp': '2025-10-02 00:11:12.216056', 'step': 174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:12.278311', 'step': 174, 'epoch': 1}
{'type': 'loss', 'content': 0.34297603368759155, 'timestamp': '2025-10-02 00:11:12.281401', 'step': 175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:12.339475', 'step': 175, 'epoch': 1}
{'type': 'loss', 'content': 0.09211788326501846, 'timestamp': '2025-10-02 00:11:12.345873', 'step': 176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:12.410450', 'step': 176, 'epoch': 1}
{'type': 'loss', 'content': 0.05592026188969612, 'timestamp': '2025-10-02 00:11:12.421844', 'step': 177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:12.481372', 'step': 177, 'epoch': 1}
{'type': 'loss', 'content': 0.32657963037490845, 'timestamp': '2025-10-02 00:11:12.485244', 'step': 178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:12.548742', 'step': 178, 'epoch': 1}
{'type': 'loss', 'content': 0.13636159896850586, 'timestamp': '2025-10-02 00:11:12.560991', 'step': 179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:12.627518', 'step': 179, 'epoch': 1}
{'type': 'loss', 'content': 0.14696569740772247, 'timestamp': '2025-10-02 00:11:12.635196', 'step': 180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:12.693819', 'step': 180, 'epoch': 1}
{'type': 'loss', 'content': 0.08144082129001617, 'timestamp': '2025-10-02 00:11:12.701617', 'step': 181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:12.757747', 'step': 181, 'epoch': 1}
{'type': 'loss', 'content': 0.13712388277053833, 'timestamp': '2025-10-02 00:11:12.763141', 'step': 182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:12.824780', 'step': 182, 'epoch': 1}
{'type': 'loss', 'content': 0.07269814610481262, 'timestamp': '2025-10-02 00:11:12.829278', 'step': 183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:12.887207', 'step': 183, 'epoch': 1}
{'type': 'loss', 'content': 0.05010001361370087, 'timestamp': '2025-10-02 00:11:12.893734', 'step': 184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:12.949230', 'step': 184, 'epoch': 1}
{'type': 'loss', 'content': 0.30103281140327454, 'timestamp': '2025-10-02 00:11:12.953122', 'step': 185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:13.017163', 'step': 185, 'epoch': 1}
{'type': 'loss', 'content': 0.056015849113464355, 'timestamp': '2025-10-02 00:11:13.020833', 'step': 186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:13.091480', 'step': 186, 'epoch': 1}
{'type': 'loss', 'content': 0.14064839482307434, 'timestamp': '2025-10-02 00:11:13.094698', 'step': 187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:13.153681', 'step': 187, 'epoch': 1}
{'type': 'loss', 'content': 0.0665532797574997, 'timestamp': '2025-10-02 00:11:13.161437', 'step': 188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:13.219129', 'step': 188, 'epoch': 1}
{'type': 'loss', 'content': 0.04223805293440819, 'timestamp': '2025-10-02 00:11:13.228908', 'step': 189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:13.289477', 'step': 189, 'epoch': 1}
{'type': 'loss', 'content': 0.09337422996759415, 'timestamp': '2025-10-02 00:11:13.293287', 'step': 190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:13.350993', 'step': 190, 'epoch': 1}
{'type': 'loss', 'content': 0.07674713432788849, 'timestamp': '2025-10-02 00:11:13.361561', 'step': 191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:13.433282', 'step': 191, 'epoch': 1}
{'type': 'loss', 'content': 0.16028930246829987, 'timestamp': '2025-10-02 00:11:13.440743', 'step': 192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:13.510554', 'step': 192, 'epoch': 1}
{'type': 'loss', 'content': 0.03337560221552849, 'timestamp': '2025-10-02 00:11:13.513985', 'step': 193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:13.573198', 'step': 193, 'epoch': 1}
{'type': 'loss', 'content': 0.15442533791065216, 'timestamp': '2025-10-02 00:11:13.576347', 'step': 194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:13.649540', 'step': 194, 'epoch': 1}
{'type': 'loss', 'content': 0.08767455071210861, 'timestamp': '2025-10-02 00:11:13.653365', 'step': 195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:13.734350', 'step': 195, 'epoch': 1}
{'type': 'loss', 'content': 0.058841489255428314, 'timestamp': '2025-10-02 00:11:13.741492', 'step': 196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:13.799731', 'step': 196, 'epoch': 1}
{'type': 'loss', 'content': 0.0785384550690651, 'timestamp': '2025-10-02 00:11:13.802993', 'step': 197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:13.865515', 'step': 197, 'epoch': 1}
{'type': 'loss', 'content': 0.2690913677215576, 'timestamp': '2025-10-02 00:11:13.869934', 'step': 198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:13.930177', 'step': 198, 'epoch': 1}
{'type': 'loss', 'content': 0.1201322078704834, 'timestamp': '2025-10-02 00:11:13.942878', 'step': 199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:14.010147', 'step': 199, 'epoch': 1}
{'type': 'loss', 'content': 0.16501541435718536, 'timestamp': '2025-10-02 00:11:14.017960', 'step': 200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:14.095115', 'step': 200, 'epoch': 1}
{'type': 'loss', 'content': 0.09168490022420883, 'timestamp': '2025-10-02 00:11:14.099128', 'step': 201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:14.160835', 'step': 201, 'epoch': 1}
{'type': 'loss', 'content': 0.03739149868488312, 'timestamp': '2025-10-02 00:11:14.170432', 'step': 202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:14.238236', 'step': 202, 'epoch': 1}
{'type': 'loss', 'content': 0.21909691393375397, 'timestamp': '2025-10-02 00:11:14.250598', 'step': 203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:14.327815', 'step': 203, 'epoch': 1}
{'type': 'loss', 'content': 0.08040744811296463, 'timestamp': '2025-10-02 00:11:14.337964', 'step': 204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:14.395922', 'step': 204, 'epoch': 1}
{'type': 'loss', 'content': 0.14523856341838837, 'timestamp': '2025-10-02 00:11:14.402116', 'step': 205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:14.469920', 'step': 205, 'epoch': 1}
{'type': 'loss', 'content': 0.048518382012844086, 'timestamp': '2025-10-02 00:11:14.480461', 'step': 206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:14.541167', 'step': 206, 'epoch': 1}
{'type': 'loss', 'content': 0.10298178344964981, 'timestamp': '2025-10-02 00:11:14.545053', 'step': 207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:14.609862', 'step': 207, 'epoch': 1}
{'type': 'loss', 'content': 0.0842752456665039, 'timestamp': '2025-10-02 00:11:14.618197', 'step': 208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:14.681470', 'step': 208, 'epoch': 1}
{'type': 'loss', 'content': 0.13626240193843842, 'timestamp': '2025-10-02 00:11:14.692421', 'step': 209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:14.755210', 'step': 209, 'epoch': 1}
{'type': 'loss', 'content': 0.03285247087478638, 'timestamp': '2025-10-02 00:11:14.764612', 'step': 210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:14.827100', 'step': 210, 'epoch': 1}
{'type': 'loss', 'content': 0.20504876971244812, 'timestamp': '2025-10-02 00:11:14.830652', 'step': 211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:14.888300', 'step': 211, 'epoch': 1}
{'type': 'loss', 'content': 0.07930683344602585, 'timestamp': '2025-10-02 00:11:14.895319', 'step': 212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:14.969942', 'step': 212, 'epoch': 1}
{'type': 'loss', 'content': 0.21188737452030182, 'timestamp': '2025-10-02 00:11:14.973181', 'step': 213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:15.030734', 'step': 213, 'epoch': 1}
{'type': 'loss', 'content': 0.23509423434734344, 'timestamp': '2025-10-02 00:11:15.037658', 'step': 214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:15.099386', 'step': 214, 'epoch': 1}
{'type': 'loss', 'content': 0.14690734446048737, 'timestamp': '2025-10-02 00:11:15.108792', 'step': 215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:15.172244', 'step': 215, 'epoch': 1}
{'type': 'loss', 'content': 0.06867408752441406, 'timestamp': '2025-10-02 00:11:15.183175', 'step': 216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:15.245370', 'step': 216, 'epoch': 1}
{'type': 'loss', 'content': 0.114994116127491, 'timestamp': '2025-10-02 00:11:15.250958', 'step': 217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:15.317480', 'step': 217, 'epoch': 1}
{'type': 'loss', 'content': 0.25088539719581604, 'timestamp': '2025-10-02 00:11:15.320971', 'step': 218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:15.379882', 'step': 218, 'epoch': 1}
{'type': 'loss', 'content': 0.2820208668708801, 'timestamp': '2025-10-02 00:11:15.381885', 'step': 219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:15.445615', 'step': 219, 'epoch': 1}
{'type': 'loss', 'content': 0.07385645806789398, 'timestamp': '2025-10-02 00:11:15.456001', 'step': 220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:15.515814', 'step': 220, 'epoch': 1}
{'type': 'loss', 'content': 0.10119719803333282, 'timestamp': '2025-10-02 00:11:15.525245', 'step': 221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:15.591690', 'step': 221, 'epoch': 1}
{'type': 'loss', 'content': 0.21309396624565125, 'timestamp': '2025-10-02 00:11:15.595236', 'step': 222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:15.655803', 'step': 222, 'epoch': 1}
{'type': 'loss', 'content': 0.15521779656410217, 'timestamp': '2025-10-02 00:11:15.659267', 'step': 223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:15.721571', 'step': 223, 'epoch': 1}
{'type': 'loss', 'content': 0.018416469916701317, 'timestamp': '2025-10-02 00:11:15.729032', 'step': 224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:15.794222', 'step': 224, 'epoch': 1}
{'type': 'loss', 'content': 0.015871239826083183, 'timestamp': '2025-10-02 00:11:15.803552', 'step': 225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:15.873544', 'step': 225, 'epoch': 1}
{'type': 'loss', 'content': 0.17613236606121063, 'timestamp': '2025-10-02 00:11:15.877492', 'step': 226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:15.935031', 'step': 226, 'epoch': 1}
{'type': 'loss', 'content': 0.05963697284460068, 'timestamp': '2025-10-02 00:11:15.944599', 'step': 227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:16.006196', 'step': 227, 'epoch': 1}
{'type': 'loss', 'content': 0.12079982459545135, 'timestamp': '2025-10-02 00:11:16.012349', 'step': 228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:16.072454', 'step': 228, 'epoch': 1}
{'type': 'loss', 'content': 0.027540916576981544, 'timestamp': '2025-10-02 00:11:16.083445', 'step': 229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:16.142006', 'step': 229, 'epoch': 1}
{'type': 'loss', 'content': 0.20259274542331696, 'timestamp': '2025-10-02 00:11:16.145534', 'step': 230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:16.211016', 'step': 230, 'epoch': 1}
{'type': 'loss', 'content': 0.09932909160852432, 'timestamp': '2025-10-02 00:11:16.214002', 'step': 231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:16.277139', 'step': 231, 'epoch': 1}
{'type': 'loss', 'content': 0.2635992765426636, 'timestamp': '2025-10-02 00:11:16.284582', 'step': 232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:16.351348', 'step': 232, 'epoch': 1}
{'type': 'loss', 'content': 0.0515718013048172, 'timestamp': '2025-10-02 00:11:16.354257', 'step': 233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:16.414556', 'step': 233, 'epoch': 1}
{'type': 'loss', 'content': 0.08684667199850082, 'timestamp': '2025-10-02 00:11:16.417740', 'step': 234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:16.476601', 'step': 234, 'epoch': 1}
{'type': 'loss', 'content': 0.11396744102239609, 'timestamp': '2025-10-02 00:11:16.479553', 'step': 235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:16.543003', 'step': 235, 'epoch': 1}
{'type': 'loss', 'content': 0.022357909008860588, 'timestamp': '2025-10-02 00:11:16.549566', 'step': 236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:16.609946', 'step': 236, 'epoch': 1}
{'type': 'loss', 'content': 0.1227215901017189, 'timestamp': '2025-10-02 00:11:16.620248', 'step': 237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:16.680306', 'step': 237, 'epoch': 1}
{'type': 'loss', 'content': 0.09386574476957321, 'timestamp': '2025-10-02 00:11:16.683094', 'step': 238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:16.745735', 'step': 238, 'epoch': 1}
{'type': 'loss', 'content': 0.20058360695838928, 'timestamp': '2025-10-02 00:11:16.749324', 'step': 239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:16.806089', 'step': 239, 'epoch': 1}
{'type': 'loss', 'content': 0.08752275258302689, 'timestamp': '2025-10-02 00:11:16.816253', 'step': 240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:11:16.878735', 'step': 240, 'epoch': 1}
{'type': 'loss', 'content': 0.049149442464113235, 'timestamp': '2025-10-02 00:11:16.890276', 'step': 241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:16.950534', 'step': 241, 'epoch': 1}
{'type': 'loss', 'content': 0.10253529995679855, 'timestamp': '2025-10-02 00:11:16.956607', 'step': 242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:17.016892', 'step': 242, 'epoch': 1}
{'type': 'loss', 'content': 0.10514000803232193, 'timestamp': '2025-10-02 00:11:17.024452', 'step': 243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:17.079991', 'step': 243, 'epoch': 1}
{'type': 'loss', 'content': 0.1546289473772049, 'timestamp': '2025-10-02 00:11:17.086764', 'step': 244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:11:17.154245', 'step': 244, 'epoch': 1}
{'type': 'loss', 'content': 0.023153983056545258, 'timestamp': '2025-10-02 00:11:17.167275', 'step': 245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:17.223282', 'step': 245, 'epoch': 1}
{'type': 'loss', 'content': 0.15558770298957825, 'timestamp': '2025-10-02 00:11:17.227583', 'step': 246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:17.284545', 'step': 246, 'epoch': 1}
{'type': 'loss', 'content': 0.08034957200288773, 'timestamp': '2025-10-02 00:11:17.290329', 'step': 247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:17.353744', 'step': 247, 'epoch': 1}
{'type': 'loss', 'content': 0.02870616316795349, 'timestamp': '2025-10-02 00:11:17.359458', 'step': 248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:17.413698', 'step': 248, 'epoch': 1}
{'type': 'loss', 'content': 0.053265076130628586, 'timestamp': '2025-10-02 00:11:17.423950', 'step': 249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:17.482956', 'step': 249, 'epoch': 1}
{'type': 'loss', 'content': 0.03321902081370354, 'timestamp': '2025-10-02 00:11:17.493160', 'step': 250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:17.547295', 'step': 250, 'epoch': 1}
{'type': 'loss', 'content': 0.06787335127592087, 'timestamp': '2025-10-02 00:11:17.549492', 'step': 251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:17.602861', 'step': 251, 'epoch': 1}
{'type': 'loss', 'content': 0.14639250934123993, 'timestamp': '2025-10-02 00:11:17.608709', 'step': 252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:17.661623', 'step': 252, 'epoch': 1}
{'type': 'loss', 'content': 0.13923537731170654, 'timestamp': '2025-10-02 00:11:17.663995', 'step': 253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:11:17.725596', 'step': 253, 'epoch': 1}
{'type': 'loss', 'content': 0.014778929762542248, 'timestamp': '2025-10-02 00:11:17.736274', 'step': 254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:17.793154', 'step': 254, 'epoch': 1}
{'type': 'loss', 'content': 0.15145698189735413, 'timestamp': '2025-10-02 00:11:17.794977', 'step': 255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:17.862995', 'step': 255, 'epoch': 1}
{'type': 'loss', 'content': 0.2703268826007843, 'timestamp': '2025-10-02 00:11:17.868570', 'step': 256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:17.923921', 'step': 256, 'epoch': 1}
{'type': 'loss', 'content': 0.07063202559947968, 'timestamp': '2025-10-02 00:11:17.928171', 'step': 257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:17.987545', 'step': 257, 'epoch': 1}
{'type': 'loss', 'content': 0.05676848068833351, 'timestamp': '2025-10-02 00:11:17.991151', 'step': 258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:18.050800', 'step': 258, 'epoch': 1}
{'type': 'loss', 'content': 0.11717494577169418, 'timestamp': '2025-10-02 00:11:18.053640', 'step': 259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:18.113274', 'step': 259, 'epoch': 1}
{'type': 'loss', 'content': 0.04447538033127785, 'timestamp': '2025-10-02 00:11:18.120546', 'step': 260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:18.182663', 'step': 260, 'epoch': 1}
{'type': 'loss', 'content': 0.0662505105137825, 'timestamp': '2025-10-02 00:11:18.189492', 'step': 261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:18.254961', 'step': 261, 'epoch': 1}
{'type': 'loss', 'content': 0.14954224228858948, 'timestamp': '2025-10-02 00:11:18.257375', 'step': 262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:18.317420', 'step': 262, 'epoch': 1}
{'type': 'loss', 'content': 0.13173505663871765, 'timestamp': '2025-10-02 00:11:18.319837', 'step': 263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:18.378725', 'step': 263, 'epoch': 1}
{'type': 'loss', 'content': 0.04268476366996765, 'timestamp': '2025-10-02 00:11:18.385373', 'step': 264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:18.439645', 'step': 264, 'epoch': 1}
{'type': 'loss', 'content': 0.09066121280193329, 'timestamp': '2025-10-02 00:11:18.441905', 'step': 265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:18.496414', 'step': 265, 'epoch': 1}
{'type': 'loss', 'content': 0.06935857981443405, 'timestamp': '2025-10-02 00:11:18.505647', 'step': 266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:18.559268', 'step': 266, 'epoch': 1}
{'type': 'loss', 'content': 0.09002020210027695, 'timestamp': '2025-10-02 00:11:18.561571', 'step': 267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:18.615440', 'step': 267, 'epoch': 1}
{'type': 'loss', 'content': 0.08030810952186584, 'timestamp': '2025-10-02 00:11:18.621271', 'step': 268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:18.675303', 'step': 268, 'epoch': 1}
{'type': 'loss', 'content': 0.014099336229264736, 'timestamp': '2025-10-02 00:11:18.682987', 'step': 269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:18.743600', 'step': 269, 'epoch': 1}
{'type': 'loss', 'content': 0.17430157959461212, 'timestamp': '2025-10-02 00:11:18.745740', 'step': 270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:18.799089', 'step': 270, 'epoch': 1}
{'type': 'loss', 'content': 0.20452408492565155, 'timestamp': '2025-10-02 00:11:18.801299', 'step': 271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:18.854466', 'step': 271, 'epoch': 1}
{'type': 'loss', 'content': 0.1485537886619568, 'timestamp': '2025-10-02 00:11:18.861115', 'step': 272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:18.914112', 'step': 272, 'epoch': 1}
{'type': 'loss', 'content': 0.15750756859779358, 'timestamp': '2025-10-02 00:11:18.916405', 'step': 273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:18.970588', 'step': 273, 'epoch': 1}
{'type': 'loss', 'content': 0.15186716616153717, 'timestamp': '2025-10-02 00:11:18.973212', 'step': 274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:11:19.035489', 'step': 274, 'epoch': 1}
{'type': 'loss', 'content': 0.05336378887295723, 'timestamp': '2025-10-02 00:11:19.046090', 'step': 275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:19.100236', 'step': 275, 'epoch': 1}
{'type': 'loss', 'content': 0.14654380083084106, 'timestamp': '2025-10-02 00:11:19.105954', 'step': 276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:19.160285', 'step': 276, 'epoch': 1}
{'type': 'loss', 'content': 0.13696099817752838, 'timestamp': '2025-10-02 00:11:19.170848', 'step': 277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:19.225075', 'step': 277, 'epoch': 1}
{'type': 'loss', 'content': 0.05625919625163078, 'timestamp': '2025-10-02 00:11:19.227354', 'step': 278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:19.281503', 'step': 278, 'epoch': 1}
{'type': 'loss', 'content': 0.2346935123205185, 'timestamp': '2025-10-02 00:11:19.291044', 'step': 279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:19.348731', 'step': 279, 'epoch': 1}
{'type': 'loss', 'content': 0.044272296130657196, 'timestamp': '2025-10-02 00:11:19.355262', 'step': 280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:19.418770', 'step': 280, 'epoch': 1}
{'type': 'loss', 'content': 0.012259364128112793, 'timestamp': '2025-10-02 00:11:19.424856', 'step': 281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:19.480837', 'step': 281, 'epoch': 1}
{'type': 'loss', 'content': 0.16181030869483948, 'timestamp': '2025-10-02 00:11:19.483462', 'step': 282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:19.538253', 'step': 282, 'epoch': 1}
{'type': 'loss', 'content': 0.13393935561180115, 'timestamp': '2025-10-02 00:11:19.540412', 'step': 283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:19.605493', 'step': 283, 'epoch': 1}
{'type': 'loss', 'content': 0.029222210869193077, 'timestamp': '2025-10-02 00:11:19.613416', 'step': 284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:19.667331', 'step': 284, 'epoch': 1}
{'type': 'loss', 'content': 0.12153854221105576, 'timestamp': '2025-10-02 00:11:19.669507', 'step': 285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:19.724761', 'step': 285, 'epoch': 1}
{'type': 'loss', 'content': 0.11501727253198624, 'timestamp': '2025-10-02 00:11:19.730557', 'step': 286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:19.785395', 'step': 286, 'epoch': 1}
{'type': 'loss', 'content': 0.20757906138896942, 'timestamp': '2025-10-02 00:11:19.787450', 'step': 287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:19.841192', 'step': 287, 'epoch': 1}
{'type': 'loss', 'content': 0.2701753079891205, 'timestamp': '2025-10-02 00:11:19.847066', 'step': 288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:19.900665', 'step': 288, 'epoch': 1}
{'type': 'loss', 'content': 0.062213778495788574, 'timestamp': '2025-10-02 00:11:19.903602', 'step': 289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:19.964848', 'step': 289, 'epoch': 1}
{'type': 'loss', 'content': 0.09204915910959244, 'timestamp': '2025-10-02 00:11:19.975034', 'step': 290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:20.029047', 'step': 290, 'epoch': 1}
{'type': 'loss', 'content': 0.13561224937438965, 'timestamp': '2025-10-02 00:11:20.031240', 'step': 291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:20.093045', 'step': 291, 'epoch': 1}
{'type': 'loss', 'content': 0.25175631046295166, 'timestamp': '2025-10-02 00:11:20.099206', 'step': 292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:20.152854', 'step': 292, 'epoch': 1}
{'type': 'loss', 'content': 0.047579310834407806, 'timestamp': '2025-10-02 00:11:20.160184', 'step': 293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:20.219207', 'step': 293, 'epoch': 1}
{'type': 'loss', 'content': 0.20230813324451447, 'timestamp': '2025-10-02 00:11:20.221438', 'step': 294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:20.275759', 'step': 294, 'epoch': 1}
{'type': 'loss', 'content': 0.2704741954803467, 'timestamp': '2025-10-02 00:11:20.277881', 'step': 295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:20.331234', 'step': 295, 'epoch': 1}
{'type': 'loss', 'content': 0.11247733235359192, 'timestamp': '2025-10-02 00:11:20.337093', 'step': 296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:20.396309', 'step': 296, 'epoch': 1}
{'type': 'loss', 'content': 0.032078009098768234, 'timestamp': '2025-10-02 00:11:20.407630', 'step': 297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:20.461014', 'step': 297, 'epoch': 1}
{'type': 'loss', 'content': 0.37369033694267273, 'timestamp': '2025-10-02 00:11:20.463375', 'step': 298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:20.516770', 'step': 298, 'epoch': 1}
{'type': 'loss', 'content': 0.06834372878074646, 'timestamp': '2025-10-02 00:11:20.526116', 'step': 299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:20.580158', 'step': 299, 'epoch': 1}
{'type': 'loss', 'content': 0.06244168058037758, 'timestamp': '2025-10-02 00:11:20.585874', 'step': 300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:20.638727', 'step': 300, 'epoch': 1}
{'type': 'loss', 'content': 0.020870786160230637, 'timestamp': '2025-10-02 00:11:20.648516', 'step': 301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:20.702595', 'step': 301, 'epoch': 1}
{'type': 'loss', 'content': 0.12277762591838837, 'timestamp': '2025-10-02 00:11:20.704836', 'step': 302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:20.759862', 'step': 302, 'epoch': 1}
{'type': 'loss', 'content': 0.027832994237542152, 'timestamp': '2025-10-02 00:11:20.762709', 'step': 303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:20.817472', 'step': 303, 'epoch': 1}
{'type': 'loss', 'content': 0.028865767642855644, 'timestamp': '2025-10-02 00:11:20.825912', 'step': 304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:20.878794', 'step': 304, 'epoch': 1}
{'type': 'loss', 'content': 0.09412996470928192, 'timestamp': '2025-10-02 00:11:20.885045', 'step': 305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:20.938864', 'step': 305, 'epoch': 1}
{'type': 'loss', 'content': 0.11016891151666641, 'timestamp': '2025-10-02 00:11:20.941113', 'step': 306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:20.996417', 'step': 306, 'epoch': 1}
{'type': 'loss', 'content': 0.08660584688186646, 'timestamp': '2025-10-02 00:11:20.998679', 'step': 307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:21.054004', 'step': 307, 'epoch': 1}
{'type': 'loss', 'content': 0.1592773199081421, 'timestamp': '2025-10-02 00:11:21.060164', 'step': 308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:21.120860', 'step': 308, 'epoch': 1}
{'type': 'loss', 'content': 0.05090348422527313, 'timestamp': '2025-10-02 00:11:21.132243', 'step': 309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:21.189117', 'step': 309, 'epoch': 1}
{'type': 'loss', 'content': 0.08850336819887161, 'timestamp': '2025-10-02 00:11:21.198466', 'step': 310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:21.253773', 'step': 310, 'epoch': 1}
{'type': 'loss', 'content': 0.17288130521774292, 'timestamp': '2025-10-02 00:11:21.256688', 'step': 311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:21.311655', 'step': 311, 'epoch': 1}
{'type': 'loss', 'content': 0.06121654435992241, 'timestamp': '2025-10-02 00:11:21.318002', 'step': 312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:11:21.381307', 'step': 312, 'epoch': 1}
{'type': 'loss', 'content': 0.026402991265058517, 'timestamp': '2025-10-02 00:11:21.392873', 'step': 313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:21.447366', 'step': 313, 'epoch': 1}
{'type': 'loss', 'content': 0.09750372916460037, 'timestamp': '2025-10-02 00:11:21.456738', 'step': 314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:21.521531', 'step': 314, 'epoch': 1}
{'type': 'loss', 'content': 0.03696506842970848, 'timestamp': '2025-10-02 00:11:21.532015', 'step': 315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:21.586218', 'step': 315, 'epoch': 1}
{'type': 'loss', 'content': 0.1453489512205124, 'timestamp': '2025-10-02 00:11:21.592326', 'step': 316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:21.646441', 'step': 316, 'epoch': 1}
{'type': 'loss', 'content': 0.0882583037018776, 'timestamp': '2025-10-02 00:11:21.649734', 'step': 317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:21.710636', 'step': 317, 'epoch': 1}
{'type': 'loss', 'content': 0.04399992525577545, 'timestamp': '2025-10-02 00:11:21.721135', 'step': 318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:21.777641', 'step': 318, 'epoch': 1}
{'type': 'loss', 'content': 0.11846581846475601, 'timestamp': '2025-10-02 00:11:21.780321', 'step': 319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:21.835254', 'step': 319, 'epoch': 1}
{'type': 'loss', 'content': 0.18164221942424774, 'timestamp': '2025-10-02 00:11:21.841567', 'step': 320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:21.895443', 'step': 320, 'epoch': 1}
{'type': 'loss', 'content': 0.17517399787902832, 'timestamp': '2025-10-02 00:11:21.897953', 'step': 321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:11:21.961251', 'step': 321, 'epoch': 1}
{'type': 'loss', 'content': 0.04959671199321747, 'timestamp': '2025-10-02 00:11:21.971921', 'step': 322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:22.026710', 'step': 322, 'epoch': 1}
{'type': 'loss', 'content': 0.18180397152900696, 'timestamp': '2025-10-02 00:11:22.032687', 'step': 323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:22.088110', 'step': 323, 'epoch': 1}
{'type': 'loss', 'content': 0.01908867061138153, 'timestamp': '2025-10-02 00:11:22.096586', 'step': 324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:22.150664', 'step': 324, 'epoch': 1}
{'type': 'loss', 'content': 0.0576678141951561, 'timestamp': '2025-10-02 00:11:22.158441', 'step': 325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:22.214034', 'step': 325, 'epoch': 1}
{'type': 'loss', 'content': 0.190551295876503, 'timestamp': '2025-10-02 00:11:22.216684', 'step': 326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:22.271214', 'step': 326, 'epoch': 1}
{'type': 'loss', 'content': 0.0594630241394043, 'timestamp': '2025-10-02 00:11:22.274343', 'step': 327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:22.337354', 'step': 327, 'epoch': 1}
{'type': 'loss', 'content': 0.02897288277745247, 'timestamp': '2025-10-02 00:11:22.348673', 'step': 328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:22.402020', 'step': 328, 'epoch': 1}
{'type': 'loss', 'content': 0.10228381305932999, 'timestamp': '2025-10-02 00:11:22.408188', 'step': 329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:22.463836', 'step': 329, 'epoch': 1}
{'type': 'loss', 'content': 0.04547495022416115, 'timestamp': '2025-10-02 00:11:22.466298', 'step': 330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:22.527509', 'step': 330, 'epoch': 1}
{'type': 'loss', 'content': 0.039824169129133224, 'timestamp': '2025-10-02 00:11:22.537746', 'step': 331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:22.599155', 'step': 331, 'epoch': 1}
{'type': 'loss', 'content': 0.2805514931678772, 'timestamp': '2025-10-02 00:11:22.605923', 'step': 332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:22.661323', 'step': 332, 'epoch': 1}
{'type': 'loss', 'content': 0.03409866988658905, 'timestamp': '2025-10-02 00:11:22.670871', 'step': 333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:22.726211', 'step': 333, 'epoch': 1}
{'type': 'loss', 'content': 0.11340586841106415, 'timestamp': '2025-10-02 00:11:22.728655', 'step': 334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:22.787719', 'step': 334, 'epoch': 1}
{'type': 'loss', 'content': 0.143733948469162, 'timestamp': '2025-10-02 00:11:22.797938', 'step': 335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:22.852256', 'step': 335, 'epoch': 1}
{'type': 'loss', 'content': 0.03623557090759277, 'timestamp': '2025-10-02 00:11:22.858965', 'step': 336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:22.913632', 'step': 336, 'epoch': 1}
{'type': 'loss', 'content': 0.0479462556540966, 'timestamp': '2025-10-02 00:11:22.921413', 'step': 337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:22.976023', 'step': 337, 'epoch': 1}
{'type': 'loss', 'content': 0.052445072680711746, 'timestamp': '2025-10-02 00:11:22.978386', 'step': 338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:23.032225', 'step': 338, 'epoch': 1}
{'type': 'loss', 'content': 0.11817781627178192, 'timestamp': '2025-10-02 00:11:23.034242', 'step': 339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:23.087319', 'step': 339, 'epoch': 1}
{'type': 'loss', 'content': 0.09513160586357117, 'timestamp': '2025-10-02 00:11:23.093095', 'step': 340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:23.145794', 'step': 340, 'epoch': 1}
{'type': 'loss', 'content': 0.286667138338089, 'timestamp': '2025-10-02 00:11:23.147814', 'step': 341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:11:23.223817', 'step': 341, 'epoch': 1}
{'type': 'loss', 'content': 0.05460444092750549, 'timestamp': '2025-10-02 00:11:23.237464', 'step': 342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:23.291651', 'step': 342, 'epoch': 1}
{'type': 'loss', 'content': 0.020355649292469025, 'timestamp': '2025-10-02 00:11:23.299236', 'step': 343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:23.354994', 'step': 343, 'epoch': 1}
{'type': 'loss', 'content': 0.02205040492117405, 'timestamp': '2025-10-02 00:11:23.365188', 'step': 344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:23.421482', 'step': 344, 'epoch': 1}
{'type': 'loss', 'content': 0.055494751781225204, 'timestamp': '2025-10-02 00:11:23.427450', 'step': 345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:23.480746', 'step': 345, 'epoch': 1}
{'type': 'loss', 'content': 0.19868892431259155, 'timestamp': '2025-10-02 00:11:23.483261', 'step': 346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:23.537407', 'step': 346, 'epoch': 1}
{'type': 'loss', 'content': 0.09451756626367569, 'timestamp': '2025-10-02 00:11:23.539518', 'step': 347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:23.593037', 'step': 347, 'epoch': 1}
{'type': 'loss', 'content': 0.04554686322808266, 'timestamp': '2025-10-02 00:11:23.603190', 'step': 348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:23.656566', 'step': 348, 'epoch': 1}
{'type': 'loss', 'content': 0.054018259048461914, 'timestamp': '2025-10-02 00:11:23.658959', 'step': 349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:23.712984', 'step': 349, 'epoch': 1}
{'type': 'loss', 'content': 0.20753473043441772, 'timestamp': '2025-10-02 00:11:23.715121', 'step': 350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:23.768337', 'step': 350, 'epoch': 1}
{'type': 'loss', 'content': 0.07510988414287567, 'timestamp': '2025-10-02 00:11:23.770862', 'step': 351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:23.825587', 'step': 351, 'epoch': 1}
{'type': 'loss', 'content': 0.1834808737039566, 'timestamp': '2025-10-02 00:11:23.835926', 'step': 352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:23.888832', 'step': 352, 'epoch': 1}
{'type': 'loss', 'content': 0.0769295021891594, 'timestamp': '2025-10-02 00:11:23.891229', 'step': 353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:23.952256', 'step': 353, 'epoch': 1}
{'type': 'loss', 'content': 0.06525850296020508, 'timestamp': '2025-10-02 00:11:23.962724', 'step': 354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:24.016303', 'step': 354, 'epoch': 1}
{'type': 'loss', 'content': 0.2556503117084503, 'timestamp': '2025-10-02 00:11:24.018502', 'step': 355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:24.071684', 'step': 355, 'epoch': 1}
{'type': 'loss', 'content': 0.15928258001804352, 'timestamp': '2025-10-02 00:11:24.077309', 'step': 356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:24.129668', 'step': 356, 'epoch': 1}
{'type': 'loss', 'content': 0.2086145132780075, 'timestamp': '2025-10-02 00:11:24.131996', 'step': 357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:11:24.209819', 'step': 357, 'epoch': 1}
{'type': 'loss', 'content': 0.032507941126823425, 'timestamp': '2025-10-02 00:11:24.223048', 'step': 358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:24.276202', 'step': 358, 'epoch': 1}
{'type': 'loss', 'content': 0.2446693480014801, 'timestamp': '2025-10-02 00:11:24.278288', 'step': 359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:24.332195', 'step': 359, 'epoch': 1}
{'type': 'loss', 'content': 0.01538788340985775, 'timestamp': '2025-10-02 00:11:24.340531', 'step': 360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:24.394475', 'step': 360, 'epoch': 1}
{'type': 'loss', 'content': 0.1728680431842804, 'timestamp': '2025-10-02 00:11:24.402052', 'step': 361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:24.463278', 'step': 361, 'epoch': 1}
{'type': 'loss', 'content': 0.0213965754956007, 'timestamp': '2025-10-02 00:11:24.473754', 'step': 362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:24.528118', 'step': 362, 'epoch': 1}
{'type': 'loss', 'content': 0.14367689192295074, 'timestamp': '2025-10-02 00:11:24.530922', 'step': 363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:24.591915', 'step': 363, 'epoch': 1}
{'type': 'loss', 'content': 0.04559297859668732, 'timestamp': '2025-10-02 00:11:24.603219', 'step': 364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:24.655817', 'step': 364, 'epoch': 1}
{'type': 'loss', 'content': 0.29495689272880554, 'timestamp': '2025-10-02 00:11:24.658222', 'step': 365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:24.712462', 'step': 365, 'epoch': 1}
{'type': 'loss', 'content': 0.18787544965744019, 'timestamp': '2025-10-02 00:11:24.714740', 'step': 366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:24.768665', 'step': 366, 'epoch': 1}
{'type': 'loss', 'content': 0.04645780101418495, 'timestamp': '2025-10-02 00:11:24.770790', 'step': 367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:24.825754', 'step': 367, 'epoch': 1}
{'type': 'loss', 'content': 0.1312587410211563, 'timestamp': '2025-10-02 00:11:24.833775', 'step': 368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:24.886464', 'step': 368, 'epoch': 1}
{'type': 'loss', 'content': 0.14767302572727203, 'timestamp': '2025-10-02 00:11:24.892476', 'step': 369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:24.946641', 'step': 369, 'epoch': 1}
{'type': 'loss', 'content': 0.09205959737300873, 'timestamp': '2025-10-02 00:11:24.952611', 'step': 370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:25.006164', 'step': 370, 'epoch': 1}
{'type': 'loss', 'content': 0.15655478835105896, 'timestamp': '2025-10-02 00:11:25.008768', 'step': 371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:25.064061', 'step': 371, 'epoch': 1}
{'type': 'loss', 'content': 0.0880405455827713, 'timestamp': '2025-10-02 00:11:25.074230', 'step': 372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:25.129177', 'step': 372, 'epoch': 1}
{'type': 'loss', 'content': 0.05400000885128975, 'timestamp': '2025-10-02 00:11:25.131585', 'step': 373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:25.186136', 'step': 373, 'epoch': 1}
{'type': 'loss', 'content': 0.11638415604829788, 'timestamp': '2025-10-02 00:11:25.188464', 'step': 374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:25.249115', 'step': 374, 'epoch': 1}
{'type': 'loss', 'content': 0.08458342403173447, 'timestamp': '2025-10-02 00:11:25.259624', 'step': 375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:25.313642', 'step': 375, 'epoch': 1}
{'type': 'loss', 'content': 0.11694245785474777, 'timestamp': '2025-10-02 00:11:25.323787', 'step': 376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:25.379755', 'step': 376, 'epoch': 1}
{'type': 'loss', 'content': 0.10088789463043213, 'timestamp': '2025-10-02 00:11:25.383400', 'step': 377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:25.436901', 'step': 377, 'epoch': 1}
{'type': 'loss', 'content': 0.060609206557273865, 'timestamp': '2025-10-02 00:11:25.439417', 'step': 378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:25.493100', 'step': 378, 'epoch': 1}
{'type': 'loss', 'content': 0.11611692607402802, 'timestamp': '2025-10-02 00:11:25.495230', 'step': 379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:25.548632', 'step': 379, 'epoch': 1}
{'type': 'loss', 'content': 0.09407931566238403, 'timestamp': '2025-10-02 00:11:25.554310', 'step': 380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:25.607278', 'step': 380, 'epoch': 1}
{'type': 'loss', 'content': 0.16607484221458435, 'timestamp': '2025-10-02 00:11:25.609704', 'step': 381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:25.665470', 'step': 381, 'epoch': 1}
{'type': 'loss', 'content': 0.04670419543981552, 'timestamp': '2025-10-02 00:11:25.675003', 'step': 382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:25.730117', 'step': 382, 'epoch': 1}
{'type': 'loss', 'content': 0.03047057054936886, 'timestamp': '2025-10-02 00:11:25.736222', 'step': 383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:11:25.803070', 'step': 383, 'epoch': 1}
{'type': 'loss', 'content': 0.04687697812914848, 'timestamp': '2025-10-02 00:11:25.815850', 'step': 384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:25.869595', 'step': 384, 'epoch': 1}
{'type': 'loss', 'content': 0.01611207239329815, 'timestamp': '2025-10-02 00:11:25.877138', 'step': 385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:25.931242', 'step': 385, 'epoch': 1}
{'type': 'loss', 'content': 0.11626721173524857, 'timestamp': '2025-10-02 00:11:25.934388', 'step': 386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:25.988246', 'step': 386, 'epoch': 1}
{'type': 'loss', 'content': 0.24067306518554688, 'timestamp': '2025-10-02 00:11:25.990765', 'step': 387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:26.044078', 'step': 387, 'epoch': 1}
{'type': 'loss', 'content': 0.22462286055088043, 'timestamp': '2025-10-02 00:11:26.049764', 'step': 388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:26.103034', 'step': 388, 'epoch': 1}
{'type': 'loss', 'content': 0.15897990763187408, 'timestamp': '2025-10-02 00:11:26.109024', 'step': 389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:26.164375', 'step': 389, 'epoch': 1}
{'type': 'loss', 'content': 0.08558935672044754, 'timestamp': '2025-10-02 00:11:26.173745', 'step': 390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:26.228005', 'step': 390, 'epoch': 1}
{'type': 'loss', 'content': 0.16990475356578827, 'timestamp': '2025-10-02 00:11:26.230454', 'step': 391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:26.285098', 'step': 391, 'epoch': 1}
{'type': 'loss', 'content': 0.06832819432020187, 'timestamp': '2025-10-02 00:11:26.291000', 'step': 392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:26.344687', 'step': 392, 'epoch': 1}
{'type': 'loss', 'content': 0.08776260912418365, 'timestamp': '2025-10-02 00:11:26.346708', 'step': 393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:26.400373', 'step': 393, 'epoch': 1}
{'type': 'loss', 'content': 0.046871431171894073, 'timestamp': '2025-10-02 00:11:26.407980', 'step': 394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:26.461725', 'step': 394, 'epoch': 1}
{'type': 'loss', 'content': 0.10332932323217392, 'timestamp': '2025-10-02 00:11:26.463943', 'step': 395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:26.517538', 'step': 395, 'epoch': 1}
{'type': 'loss', 'content': 0.08232751488685608, 'timestamp': '2025-10-02 00:11:26.523402', 'step': 396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:26.576170', 'step': 396, 'epoch': 1}
{'type': 'loss', 'content': 0.0852091908454895, 'timestamp': '2025-10-02 00:11:26.578503', 'step': 397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:26.633096', 'step': 397, 'epoch': 1}
{'type': 'loss', 'content': 0.1337316930294037, 'timestamp': '2025-10-02 00:11:26.635546', 'step': 398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:26.690933', 'step': 398, 'epoch': 1}
{'type': 'loss', 'content': 0.03447144851088524, 'timestamp': '2025-10-02 00:11:26.700316', 'step': 399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:26.753907', 'step': 399, 'epoch': 1}
{'type': 'loss', 'content': 0.27079179883003235, 'timestamp': '2025-10-02 00:11:26.759881', 'step': 400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:26.813834', 'step': 400, 'epoch': 1}
{'type': 'loss', 'content': 0.057400669902563095, 'timestamp': '2025-10-02 00:11:26.821484', 'step': 401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:26.875126', 'step': 401, 'epoch': 1}
{'type': 'loss', 'content': 0.10437104105949402, 'timestamp': '2025-10-02 00:11:26.877280', 'step': 402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:26.940766', 'step': 402, 'epoch': 1}
{'type': 'loss', 'content': 0.08291789889335632, 'timestamp': '2025-10-02 00:11:26.950978', 'step': 403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:27.005019', 'step': 403, 'epoch': 1}
{'type': 'loss', 'content': 0.13447736203670502, 'timestamp': '2025-10-02 00:11:27.010408', 'step': 404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:27.064136', 'step': 404, 'epoch': 1}
{'type': 'loss', 'content': 0.05266616865992546, 'timestamp': '2025-10-02 00:11:27.071809', 'step': 405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:27.126024', 'step': 405, 'epoch': 1}
{'type': 'loss', 'content': 0.1880318820476532, 'timestamp': '2025-10-02 00:11:27.128406', 'step': 406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:27.190743', 'step': 406, 'epoch': 1}
{'type': 'loss', 'content': 0.06047005578875542, 'timestamp': '2025-10-02 00:11:27.201261', 'step': 407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:27.257544', 'step': 407, 'epoch': 1}
{'type': 'loss', 'content': 0.03117848001420498, 'timestamp': '2025-10-02 00:11:27.267850', 'step': 408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:27.321044', 'step': 408, 'epoch': 1}
{'type': 'loss', 'content': 0.030430683866143227, 'timestamp': '2025-10-02 00:11:27.323089', 'step': 409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:27.376641', 'step': 409, 'epoch': 1}
{'type': 'loss', 'content': 0.08633057028055191, 'timestamp': '2025-10-02 00:11:27.378951', 'step': 410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:27.432483', 'step': 410, 'epoch': 1}
{'type': 'loss', 'content': 0.07700379192829132, 'timestamp': '2025-10-02 00:11:27.434880', 'step': 411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:27.493192', 'step': 411, 'epoch': 1}
{'type': 'loss', 'content': 0.06910832226276398, 'timestamp': '2025-10-02 00:11:27.504164', 'step': 412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:27.557066', 'step': 412, 'epoch': 1}
{'type': 'loss', 'content': 0.2502760887145996, 'timestamp': '2025-10-02 00:11:27.559387', 'step': 413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:27.612909', 'step': 413, 'epoch': 1}
{'type': 'loss', 'content': 0.15641669929027557, 'timestamp': '2025-10-02 00:11:27.615146', 'step': 414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:27.668594', 'step': 414, 'epoch': 1}
{'type': 'loss', 'content': 0.18125411868095398, 'timestamp': '2025-10-02 00:11:27.670838', 'step': 415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:27.724292', 'step': 415, 'epoch': 1}
{'type': 'loss', 'content': 0.0738913044333458, 'timestamp': '2025-10-02 00:11:27.729858', 'step': 416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:27.782730', 'step': 416, 'epoch': 1}
{'type': 'loss', 'content': 0.1693756878376007, 'timestamp': '2025-10-02 00:11:27.784811', 'step': 417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:27.838187', 'step': 417, 'epoch': 1}
{'type': 'loss', 'content': 0.08762345463037491, 'timestamp': '2025-10-02 00:11:27.840745', 'step': 418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:27.893817', 'step': 418, 'epoch': 1}
{'type': 'loss', 'content': 0.10998352617025375, 'timestamp': '2025-10-02 00:11:27.896090', 'step': 419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:27.957804', 'step': 419, 'epoch': 1}
{'type': 'loss', 'content': 0.051644206047058105, 'timestamp': '2025-10-02 00:11:27.969077', 'step': 420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:28.022223', 'step': 420, 'epoch': 1}
{'type': 'loss', 'content': 0.13529856503009796, 'timestamp': '2025-10-02 00:11:28.024988', 'step': 421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:28.078495', 'step': 421, 'epoch': 1}
{'type': 'loss', 'content': 0.08919667452573776, 'timestamp': '2025-10-02 00:11:28.084263', 'step': 422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:28.138771', 'step': 422, 'epoch': 1}
{'type': 'loss', 'content': 0.10529790073633194, 'timestamp': '2025-10-02 00:11:28.140876', 'step': 423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:28.194070', 'step': 423, 'epoch': 1}
{'type': 'loss', 'content': 0.17700377106666565, 'timestamp': '2025-10-02 00:11:28.200474', 'step': 424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:28.252525', 'step': 424, 'epoch': 1}
{'type': 'loss', 'content': 0.14424213767051697, 'timestamp': '2025-10-02 00:11:28.255360', 'step': 425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:28.312160', 'step': 425, 'epoch': 1}
{'type': 'loss', 'content': 0.1748153567314148, 'timestamp': '2025-10-02 00:11:28.318264', 'step': 426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:28.371742', 'step': 426, 'epoch': 1}
{'type': 'loss', 'content': 0.07736826688051224, 'timestamp': '2025-10-02 00:11:28.374348', 'step': 427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:28.427721', 'step': 427, 'epoch': 1}
{'type': 'loss', 'content': 0.09139131754636765, 'timestamp': '2025-10-02 00:11:28.434314', 'step': 428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:28.487600', 'step': 428, 'epoch': 1}
{'type': 'loss', 'content': 0.1321246325969696, 'timestamp': '2025-10-02 00:11:28.489548', 'step': 429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:28.543480', 'step': 429, 'epoch': 1}
{'type': 'loss', 'content': 0.0950055792927742, 'timestamp': '2025-10-02 00:11:28.551086', 'step': 430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:28.605202', 'step': 430, 'epoch': 1}
{'type': 'loss', 'content': 0.09702668339014053, 'timestamp': '2025-10-02 00:11:28.607155', 'step': 431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:28.660052', 'step': 431, 'epoch': 1}
{'type': 'loss', 'content': 0.0921998843550682, 'timestamp': '2025-10-02 00:11:28.665670', 'step': 432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:28.717998', 'step': 432, 'epoch': 1}
{'type': 'loss', 'content': 0.053584855049848557, 'timestamp': '2025-10-02 00:11:28.719929', 'step': 433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:28.773859', 'step': 433, 'epoch': 1}
{'type': 'loss', 'content': 0.07685467600822449, 'timestamp': '2025-10-02 00:11:28.779844', 'step': 434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:28.833902', 'step': 434, 'epoch': 1}
{'type': 'loss', 'content': 0.08100619912147522, 'timestamp': '2025-10-02 00:11:28.841395', 'step': 435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:28.895316', 'step': 435, 'epoch': 1}
{'type': 'loss', 'content': 0.11632216721773148, 'timestamp': '2025-10-02 00:11:28.901044', 'step': 436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:28.954691', 'step': 436, 'epoch': 1}
{'type': 'loss', 'content': 0.06800761073827744, 'timestamp': '2025-10-02 00:11:28.964353', 'step': 437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:29.017999', 'step': 437, 'epoch': 1}
{'type': 'loss', 'content': 0.17938286066055298, 'timestamp': '2025-10-02 00:11:29.021411', 'step': 438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:29.076138', 'step': 438, 'epoch': 1}
{'type': 'loss', 'content': 0.13878366351127625, 'timestamp': '2025-10-02 00:11:29.085692', 'step': 439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:11:29.157929', 'step': 439, 'epoch': 1}
{'type': 'loss', 'content': 0.03470864146947861, 'timestamp': '2025-10-02 00:11:29.171107', 'step': 440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:29.230851', 'step': 440, 'epoch': 1}
{'type': 'loss', 'content': 0.08363429456949234, 'timestamp': '2025-10-02 00:11:29.242182', 'step': 441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:29.296014', 'step': 441, 'epoch': 1}
{'type': 'loss', 'content': 0.10738449543714523, 'timestamp': '2025-10-02 00:11:29.298055', 'step': 442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:29.351332', 'step': 442, 'epoch': 1}
{'type': 'loss', 'content': 0.06831781566143036, 'timestamp': '2025-10-02 00:11:29.359044', 'step': 443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:29.412877', 'step': 443, 'epoch': 1}
{'type': 'loss', 'content': 0.10095526278018951, 'timestamp': '2025-10-02 00:11:29.418470', 'step': 444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:29.471100', 'step': 444, 'epoch': 1}
{'type': 'loss', 'content': 0.26214319467544556, 'timestamp': '2025-10-02 00:11:29.473232', 'step': 445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:29.526716', 'step': 445, 'epoch': 1}
{'type': 'loss', 'content': 0.06589756906032562, 'timestamp': '2025-10-02 00:11:29.534282', 'step': 446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:29.590489', 'step': 446, 'epoch': 1}
{'type': 'loss', 'content': 0.0805986076593399, 'timestamp': '2025-10-02 00:11:29.600030', 'step': 447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:29.654033', 'step': 447, 'epoch': 1}
{'type': 'loss', 'content': 0.06937337666749954, 'timestamp': '2025-10-02 00:11:29.660870', 'step': 448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:29.714167', 'step': 448, 'epoch': 1}
{'type': 'loss', 'content': 0.03970632702112198, 'timestamp': '2025-10-02 00:11:29.724452', 'step': 449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:29.778390', 'step': 449, 'epoch': 1}
{'type': 'loss', 'content': 0.252096563577652, 'timestamp': '2025-10-02 00:11:29.780777', 'step': 450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:29.834942', 'step': 450, 'epoch': 1}
{'type': 'loss', 'content': 0.06315124034881592, 'timestamp': '2025-10-02 00:11:29.840987', 'step': 451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:29.894898', 'step': 451, 'epoch': 1}
{'type': 'loss', 'content': 0.07626432925462723, 'timestamp': '2025-10-02 00:11:29.900833', 'step': 452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:29.953097', 'step': 452, 'epoch': 1}
{'type': 'loss', 'content': 0.20268070697784424, 'timestamp': '2025-10-02 00:11:29.955385', 'step': 453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:30.008960', 'step': 453, 'epoch': 1}
{'type': 'loss', 'content': 0.07321389764547348, 'timestamp': '2025-10-02 00:11:30.011219', 'step': 454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:30.064329', 'step': 454, 'epoch': 1}
{'type': 'loss', 'content': 0.1420927196741104, 'timestamp': '2025-10-02 00:11:30.066458', 'step': 455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:30.120134', 'step': 455, 'epoch': 1}
{'type': 'loss', 'content': 0.0815303772687912, 'timestamp': '2025-10-02 00:11:30.126919', 'step': 456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:30.181377', 'step': 456, 'epoch': 1}
{'type': 'loss', 'content': 0.14739826321601868, 'timestamp': '2025-10-02 00:11:30.191004', 'step': 457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:30.245047', 'step': 457, 'epoch': 1}
{'type': 'loss', 'content': 0.08826444298028946, 'timestamp': '2025-10-02 00:11:30.252621', 'step': 458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:30.306662', 'step': 458, 'epoch': 1}
{'type': 'loss', 'content': 0.07893890142440796, 'timestamp': '2025-10-02 00:11:30.309117', 'step': 459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:30.364980', 'step': 459, 'epoch': 1}
{'type': 'loss', 'content': 0.05938787758350372, 'timestamp': '2025-10-02 00:11:30.375347', 'step': 460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:30.428471', 'step': 460, 'epoch': 1}
{'type': 'loss', 'content': 0.1453683227300644, 'timestamp': '2025-10-02 00:11:30.434614', 'step': 461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:30.488120', 'step': 461, 'epoch': 1}
{'type': 'loss', 'content': 0.09224832057952881, 'timestamp': '2025-10-02 00:11:30.495631', 'step': 462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:30.549955', 'step': 462, 'epoch': 1}
{'type': 'loss', 'content': 0.1914239376783371, 'timestamp': '2025-10-02 00:11:30.552092', 'step': 463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:30.606073', 'step': 463, 'epoch': 1}
{'type': 'loss', 'content': 0.12093067169189453, 'timestamp': '2025-10-02 00:11:30.616208', 'step': 464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:30.668800', 'step': 464, 'epoch': 1}
{'type': 'loss', 'content': 0.14015597105026245, 'timestamp': '2025-10-02 00:11:30.671327', 'step': 465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:30.725083', 'step': 465, 'epoch': 1}
{'type': 'loss', 'content': 0.08569205552339554, 'timestamp': '2025-10-02 00:11:30.732701', 'step': 466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:30.787050', 'step': 466, 'epoch': 1}
{'type': 'loss', 'content': 0.07521188259124756, 'timestamp': '2025-10-02 00:11:30.794649', 'step': 467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:30.848218', 'step': 467, 'epoch': 1}
{'type': 'loss', 'content': 0.09490519016981125, 'timestamp': '2025-10-02 00:11:30.855059', 'step': 468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:30.908833', 'step': 468, 'epoch': 1}
{'type': 'loss', 'content': 0.09125518798828125, 'timestamp': '2025-10-02 00:11:30.910727', 'step': 469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:30.963882', 'step': 469, 'epoch': 1}
{'type': 'loss', 'content': 0.12152348458766937, 'timestamp': '2025-10-02 00:11:30.971538', 'step': 470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:31.025357', 'step': 470, 'epoch': 1}
{'type': 'loss', 'content': 0.04519608989357948, 'timestamp': '2025-10-02 00:11:31.034695', 'step': 471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:31.089159', 'step': 471, 'epoch': 1}
{'type': 'loss', 'content': 0.07115032523870468, 'timestamp': '2025-10-02 00:11:31.094766', 'step': 472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:31.147814', 'step': 472, 'epoch': 1}
{'type': 'loss', 'content': 0.09509455412626266, 'timestamp': '2025-10-02 00:11:31.149822', 'step': 473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:31.205127', 'step': 473, 'epoch': 1}
{'type': 'loss', 'content': 0.03578947111964226, 'timestamp': '2025-10-02 00:11:31.214673', 'step': 474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:31.271041', 'step': 474, 'epoch': 1}
{'type': 'loss', 'content': 0.05753999575972557, 'timestamp': '2025-10-02 00:11:31.273082', 'step': 475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:31.327136', 'step': 475, 'epoch': 1}
{'type': 'loss', 'content': 0.11909959465265274, 'timestamp': '2025-10-02 00:11:31.337299', 'step': 476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:31.391239', 'step': 476, 'epoch': 1}
{'type': 'loss', 'content': 0.0849241390824318, 'timestamp': '2025-10-02 00:11:31.393464', 'step': 477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:31.447659', 'step': 477, 'epoch': 1}
{'type': 'loss', 'content': 0.09954016655683517, 'timestamp': '2025-10-02 00:11:31.449822', 'step': 478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:31.505050', 'step': 478, 'epoch': 1}
{'type': 'loss', 'content': 0.11692099273204803, 'timestamp': '2025-10-02 00:11:31.510976', 'step': 479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:31.565238', 'step': 479, 'epoch': 1}
{'type': 'loss', 'content': 0.07821603864431381, 'timestamp': '2025-10-02 00:11:31.571135', 'step': 480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:31.625269', 'step': 480, 'epoch': 1}
{'type': 'loss', 'content': 0.12431617826223373, 'timestamp': '2025-10-02 00:11:31.627530', 'step': 481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:31.680991', 'step': 481, 'epoch': 1}
{'type': 'loss', 'content': 0.07463380694389343, 'timestamp': '2025-10-02 00:11:31.690369', 'step': 482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:31.745366', 'step': 482, 'epoch': 1}
{'type': 'loss', 'content': 0.2259702831506729, 'timestamp': '2025-10-02 00:11:31.748114', 'step': 483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:31.802550', 'step': 483, 'epoch': 1}
{'type': 'loss', 'content': 0.12207312136888504, 'timestamp': '2025-10-02 00:11:31.808754', 'step': 484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:31.864116', 'step': 484, 'epoch': 1}
{'type': 'loss', 'content': 0.06580836325883865, 'timestamp': '2025-10-02 00:11:31.866881', 'step': 485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:31.922071', 'step': 485, 'epoch': 1}
{'type': 'loss', 'content': 0.10901843011379242, 'timestamp': '2025-10-02 00:11:31.924587', 'step': 486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:31.978811', 'step': 486, 'epoch': 1}
{'type': 'loss', 'content': 0.09262576699256897, 'timestamp': '2025-10-02 00:11:31.982123', 'step': 487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:32.043416', 'step': 487, 'epoch': 1}
{'type': 'loss', 'content': 0.09722230583429337, 'timestamp': '2025-10-02 00:11:32.054733', 'step': 488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:32.109592', 'step': 488, 'epoch': 1}
{'type': 'loss', 'content': 0.18671958148479462, 'timestamp': '2025-10-02 00:11:32.112391', 'step': 489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:32.167349', 'step': 489, 'epoch': 1}
{'type': 'loss', 'content': 0.09573034942150116, 'timestamp': '2025-10-02 00:11:32.169918', 'step': 490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:32.228011', 'step': 490, 'epoch': 1}
{'type': 'loss', 'content': 0.0626470297574997, 'timestamp': '2025-10-02 00:11:32.231038', 'step': 491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:32.287153', 'step': 491, 'epoch': 1}
{'type': 'loss', 'content': 0.08657897263765335, 'timestamp': '2025-10-02 00:11:32.293322', 'step': 492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:32.347000', 'step': 492, 'epoch': 1}
{'type': 'loss', 'content': 0.12700006365776062, 'timestamp': '2025-10-02 00:11:32.349237', 'step': 493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:32.403852', 'step': 493, 'epoch': 1}
{'type': 'loss', 'content': 0.13037338852882385, 'timestamp': '2025-10-02 00:11:32.406974', 'step': 494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:32.463149', 'step': 494, 'epoch': 1}
{'type': 'loss', 'content': 0.10822837054729462, 'timestamp': '2025-10-02 00:11:32.472726', 'step': 495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:32.527669', 'step': 495, 'epoch': 1}
{'type': 'loss', 'content': 0.07360774278640747, 'timestamp': '2025-10-02 00:11:32.536062', 'step': 496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:32.591619', 'step': 496, 'epoch': 1}
{'type': 'loss', 'content': 0.016088614240288734, 'timestamp': '2025-10-02 00:11:32.597731', 'step': 497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:32.653405', 'step': 497, 'epoch': 1}
{'type': 'loss', 'content': 0.0228459183126688, 'timestamp': '2025-10-02 00:11:32.655817', 'step': 498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:32.710618', 'step': 498, 'epoch': 1}
{'type': 'loss', 'content': 0.08280499279499054, 'timestamp': '2025-10-02 00:11:32.716476', 'step': 499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:32.773004', 'step': 499, 'epoch': 1}
{'type': 'loss', 'content': 0.07810495048761368, 'timestamp': '2025-10-02 00:11:32.779273', 'step': 500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-10-02 00:11:33.212344', 'step': 500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:33.265866', 'step': 500, 'epoch': 1}
{'type': 'loss', 'content': 0.12302172929048538, 'timestamp': '2025-10-02 00:11:33.268812', 'step': 501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:33.325987', 'step': 501, 'epoch': 1}
{'type': 'loss', 'content': 0.04959145560860634, 'timestamp': '2025-10-02 00:11:33.328887', 'step': 502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:33.383744', 'step': 502, 'epoch': 1}
{'type': 'loss', 'content': 0.056184448301792145, 'timestamp': '2025-10-02 00:11:33.385888', 'step': 503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:33.444712', 'step': 503, 'epoch': 1}
{'type': 'loss', 'content': 0.12532483041286469, 'timestamp': '2025-10-02 00:11:33.453111', 'step': 504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:33.508001', 'step': 504, 'epoch': 1}
{'type': 'loss', 'content': 0.052444927394390106, 'timestamp': '2025-10-02 00:11:33.510580', 'step': 505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:33.566096', 'step': 505, 'epoch': 1}
{'type': 'loss', 'content': 0.12675194442272186, 'timestamp': '2025-10-02 00:11:33.568644', 'step': 506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:33.623676', 'step': 506, 'epoch': 1}
{'type': 'loss', 'content': 0.2618630826473236, 'timestamp': '2025-10-02 00:11:33.626116', 'step': 507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:33.680822', 'step': 507, 'epoch': 1}
{'type': 'loss', 'content': 0.11953696608543396, 'timestamp': '2025-10-02 00:11:33.688605', 'step': 508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:33.744416', 'step': 508, 'epoch': 1}
{'type': 'loss', 'content': 0.05588759481906891, 'timestamp': '2025-10-02 00:11:33.746693', 'step': 509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:11:33.814034', 'step': 509, 'epoch': 1}
{'type': 'loss', 'content': 0.03252720087766647, 'timestamp': '2025-10-02 00:11:33.825949', 'step': 510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:33.881301', 'step': 510, 'epoch': 1}
{'type': 'loss', 'content': 0.07350829243659973, 'timestamp': '2025-10-02 00:11:33.890654', 'step': 511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:33.944724', 'step': 511, 'epoch': 1}
{'type': 'loss', 'content': 0.20573928952217102, 'timestamp': '2025-10-02 00:11:33.950545', 'step': 512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:34.004590', 'step': 512, 'epoch': 1}
{'type': 'loss', 'content': 0.18987618386745453, 'timestamp': '2025-10-02 00:11:34.006707', 'step': 513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:34.060882', 'step': 513, 'epoch': 1}
{'type': 'loss', 'content': 0.09568673372268677, 'timestamp': '2025-10-02 00:11:34.063495', 'step': 514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:34.118311', 'step': 514, 'epoch': 1}
{'type': 'loss', 'content': 0.06742393970489502, 'timestamp': '2025-10-02 00:11:34.124261', 'step': 515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:11:34.194758', 'step': 515, 'epoch': 1}
{'type': 'loss', 'content': 0.02596512995660305, 'timestamp': '2025-10-02 00:11:34.208192', 'step': 516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:34.263466', 'step': 516, 'epoch': 1}
{'type': 'loss', 'content': 0.13249395787715912, 'timestamp': '2025-10-02 00:11:34.271047', 'step': 517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:34.324594', 'step': 517, 'epoch': 1}
{'type': 'loss', 'content': 0.1286345273256302, 'timestamp': '2025-10-02 00:11:34.326693', 'step': 518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:34.380771', 'step': 518, 'epoch': 1}
{'type': 'loss', 'content': 0.1758238822221756, 'timestamp': '2025-10-02 00:11:34.382962', 'step': 519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:34.437337', 'step': 519, 'epoch': 1}
{'type': 'loss', 'content': 0.055304042994976044, 'timestamp': '2025-10-02 00:11:34.445636', 'step': 520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:34.498808', 'step': 520, 'epoch': 1}
{'type': 'loss', 'content': 0.1340712457895279, 'timestamp': '2025-10-02 00:11:34.506381', 'step': 521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:34.560072', 'step': 521, 'epoch': 1}
{'type': 'loss', 'content': 0.23136930167675018, 'timestamp': '2025-10-02 00:11:34.567590', 'step': 522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:34.623081', 'step': 522, 'epoch': 1}
{'type': 'loss', 'content': 0.08817306160926819, 'timestamp': '2025-10-02 00:11:34.630536', 'step': 523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:34.684274', 'step': 523, 'epoch': 1}
{'type': 'loss', 'content': 0.21373336017131805, 'timestamp': '2025-10-02 00:11:34.690366', 'step': 524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:11:34.751924', 'step': 524, 'epoch': 1}
{'type': 'loss', 'content': 0.03831281512975693, 'timestamp': '2025-10-02 00:11:34.763702', 'step': 525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:34.821491', 'step': 525, 'epoch': 1}
{'type': 'loss', 'content': 0.03976704180240631, 'timestamp': '2025-10-02 00:11:34.829139', 'step': 526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:34.882783', 'step': 526, 'epoch': 1}
{'type': 'loss', 'content': 0.06531321257352829, 'timestamp': '2025-10-02 00:11:34.885052', 'step': 527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:34.939613', 'step': 527, 'epoch': 1}
{'type': 'loss', 'content': 0.11180976033210754, 'timestamp': '2025-10-02 00:11:34.945271', 'step': 528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:35.000242', 'step': 528, 'epoch': 1}
{'type': 'loss', 'content': 0.127069354057312, 'timestamp': '2025-10-02 00:11:35.002471', 'step': 529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:35.056584', 'step': 529, 'epoch': 1}
{'type': 'loss', 'content': 0.11741971969604492, 'timestamp': '2025-10-02 00:11:35.058805', 'step': 530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:35.114154', 'step': 530, 'epoch': 1}
{'type': 'loss', 'content': 0.08256848901510239, 'timestamp': '2025-10-02 00:11:35.123718', 'step': 531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:35.182236', 'step': 531, 'epoch': 1}
{'type': 'loss', 'content': 0.19813936948776245, 'timestamp': '2025-10-02 00:11:35.188075', 'step': 532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:35.240775', 'step': 532, 'epoch': 1}
{'type': 'loss', 'content': 0.10950178653001785, 'timestamp': '2025-10-02 00:11:35.247041', 'step': 533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:35.315189', 'step': 533, 'epoch': 1}
{'type': 'loss', 'content': 0.07153470069169998, 'timestamp': '2025-10-02 00:11:35.325684', 'step': 534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:35.379632', 'step': 534, 'epoch': 1}
{'type': 'loss', 'content': 0.17930366098880768, 'timestamp': '2025-10-02 00:11:35.381784', 'step': 535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:35.435629', 'step': 535, 'epoch': 1}
{'type': 'loss', 'content': 0.17624260485172272, 'timestamp': '2025-10-02 00:11:35.441452', 'step': 536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:35.494792', 'step': 536, 'epoch': 1}
{'type': 'loss', 'content': 0.07536282390356064, 'timestamp': '2025-10-02 00:11:35.496942', 'step': 537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:35.550509', 'step': 537, 'epoch': 1}
{'type': 'loss', 'content': 0.1672978550195694, 'timestamp': '2025-10-02 00:11:35.553039', 'step': 538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:35.607328', 'step': 538, 'epoch': 1}
{'type': 'loss', 'content': 0.1517428606748581, 'timestamp': '2025-10-02 00:11:35.609724', 'step': 539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:35.662758', 'step': 539, 'epoch': 1}
{'type': 'loss', 'content': 0.2705797851085663, 'timestamp': '2025-10-02 00:11:35.668645', 'step': 540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:35.722501', 'step': 540, 'epoch': 1}
{'type': 'loss', 'content': 0.08485691994428635, 'timestamp': '2025-10-02 00:11:35.732785', 'step': 541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:35.786673', 'step': 541, 'epoch': 1}
{'type': 'loss', 'content': 0.05005917325615883, 'timestamp': '2025-10-02 00:11:35.796042', 'step': 542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:35.849708', 'step': 542, 'epoch': 1}
{'type': 'loss', 'content': 0.12047284841537476, 'timestamp': '2025-10-02 00:11:35.852236', 'step': 543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:35.907204', 'step': 543, 'epoch': 1}
{'type': 'loss', 'content': 0.44737666845321655, 'timestamp': '2025-10-02 00:11:35.912888', 'step': 544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:35.966375', 'step': 544, 'epoch': 1}
{'type': 'loss', 'content': 0.11887554824352264, 'timestamp': '2025-10-02 00:11:35.970776', 'step': 545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:36.023980', 'step': 545, 'epoch': 1}
{'type': 'loss', 'content': 0.20450975000858307, 'timestamp': '2025-10-02 00:11:36.026234', 'step': 546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:36.080281', 'step': 546, 'epoch': 1}
{'type': 'loss', 'content': 0.051447730511426926, 'timestamp': '2025-10-02 00:11:36.083218', 'step': 547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:36.136738', 'step': 547, 'epoch': 1}
{'type': 'loss', 'content': 0.10444629192352295, 'timestamp': '2025-10-02 00:11:36.142480', 'step': 548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:36.195492', 'step': 548, 'epoch': 1}
{'type': 'loss', 'content': 0.07755739986896515, 'timestamp': '2025-10-02 00:11:36.197922', 'step': 549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:36.251992', 'step': 549, 'epoch': 1}
{'type': 'loss', 'content': 0.028386900201439857, 'timestamp': '2025-10-02 00:11:36.261370', 'step': 550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:36.320074', 'step': 550, 'epoch': 1}
{'type': 'loss', 'content': 0.1524670571088791, 'timestamp': '2025-10-02 00:11:36.327460', 'step': 551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:36.381016', 'step': 551, 'epoch': 1}
{'type': 'loss', 'content': 0.1055147722363472, 'timestamp': '2025-10-02 00:11:36.391338', 'step': 552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:36.444459', 'step': 552, 'epoch': 1}
{'type': 'loss', 'content': 0.28586187958717346, 'timestamp': '2025-10-02 00:11:36.446580', 'step': 553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:11:36.508318', 'step': 553, 'epoch': 1}
{'type': 'loss', 'content': 0.08412958681583405, 'timestamp': '2025-10-02 00:11:36.519008', 'step': 554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:36.572995', 'step': 554, 'epoch': 1}
{'type': 'loss', 'content': 0.09649454057216644, 'timestamp': '2025-10-02 00:11:36.575239', 'step': 555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:36.630830', 'step': 555, 'epoch': 1}
{'type': 'loss', 'content': 0.17985643446445465, 'timestamp': '2025-10-02 00:11:36.638953', 'step': 556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:36.697327', 'step': 556, 'epoch': 1}
{'type': 'loss', 'content': 0.08718332648277283, 'timestamp': '2025-10-02 00:11:36.699800', 'step': 557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:36.764103', 'step': 557, 'epoch': 1}
{'type': 'loss', 'content': 0.05180863291025162, 'timestamp': '2025-10-02 00:11:36.771567', 'step': 558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:36.826091', 'step': 558, 'epoch': 1}
{'type': 'loss', 'content': 0.07069841772317886, 'timestamp': '2025-10-02 00:11:36.831637', 'step': 559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:36.892813', 'step': 559, 'epoch': 1}
{'type': 'loss', 'content': 0.08177642524242401, 'timestamp': '2025-10-02 00:11:36.900817', 'step': 560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:36.954907', 'step': 560, 'epoch': 1}
{'type': 'loss', 'content': 0.23451295495033264, 'timestamp': '2025-10-02 00:11:36.960059', 'step': 561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:37.017570', 'step': 561, 'epoch': 1}
{'type': 'loss', 'content': 0.12864220142364502, 'timestamp': '2025-10-02 00:11:37.020008', 'step': 562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:37.086420', 'step': 562, 'epoch': 1}
{'type': 'loss', 'content': 0.031464941799640656, 'timestamp': '2025-10-02 00:11:37.096889', 'step': 563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:37.150252', 'step': 563, 'epoch': 1}
{'type': 'loss', 'content': 0.12568290531635284, 'timestamp': '2025-10-02 00:11:37.156179', 'step': 564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:11:37.209892', 'step': 564, 'epoch': 1}
{'type': 'loss', 'content': 0.17436885833740234, 'timestamp': '2025-10-02 00:11:37.212361', 'step': 565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:37.266482', 'step': 565, 'epoch': 1}
{'type': 'loss', 'content': 0.07009751349687576, 'timestamp': '2025-10-02 00:11:37.268472', 'step': 566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:11:37.341908', 'step': 566, 'epoch': 1}
{'type': 'loss', 'content': 0.03566509485244751, 'timestamp': '2025-10-02 00:11:37.355129', 'step': 567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:37.409425', 'step': 567, 'epoch': 1}
{'type': 'loss', 'content': 0.12252183258533478, 'timestamp': '2025-10-02 00:11:37.414921', 'step': 568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:37.468414', 'step': 568, 'epoch': 1}
{'type': 'loss', 'content': 0.05574728548526764, 'timestamp': '2025-10-02 00:11:37.470587', 'step': 569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:37.524435', 'step': 569, 'epoch': 1}
{'type': 'loss', 'content': 0.17943212389945984, 'timestamp': '2025-10-02 00:11:37.528123', 'step': 570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:37.585547', 'step': 570, 'epoch': 1}
{'type': 'loss', 'content': 0.08750160783529282, 'timestamp': '2025-10-02 00:11:37.587885', 'step': 571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:37.641651', 'step': 571, 'epoch': 1}
{'type': 'loss', 'content': 0.0813092291355133, 'timestamp': '2025-10-02 00:11:37.651641', 'step': 572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:37.705419', 'step': 572, 'epoch': 1}
{'type': 'loss', 'content': 0.039408475160598755, 'timestamp': '2025-10-02 00:11:37.714798', 'step': 573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:37.769655', 'step': 573, 'epoch': 1}
{'type': 'loss', 'content': 0.012618200853466988, 'timestamp': '2025-10-02 00:11:37.779097', 'step': 574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:37.832997', 'step': 574, 'epoch': 1}
{'type': 'loss', 'content': 0.13095510005950928, 'timestamp': '2025-10-02 00:11:37.835718', 'step': 575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:37.889637', 'step': 575, 'epoch': 1}
{'type': 'loss', 'content': 0.13443253934383392, 'timestamp': '2025-10-02 00:11:37.899569', 'step': 576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:37.953578', 'step': 576, 'epoch': 1}
{'type': 'loss', 'content': 0.06165934354066849, 'timestamp': '2025-10-02 00:11:37.959393', 'step': 577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:38.020246', 'step': 577, 'epoch': 1}
{'type': 'loss', 'content': 0.2606107294559479, 'timestamp': '2025-10-02 00:11:38.022494', 'step': 578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:38.079076', 'step': 578, 'epoch': 1}
{'type': 'loss', 'content': 0.12391055375337601, 'timestamp': '2025-10-02 00:11:38.081195', 'step': 579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:38.134610', 'step': 579, 'epoch': 1}
{'type': 'loss', 'content': 0.10032559931278229, 'timestamp': '2025-10-02 00:11:38.144701', 'step': 580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:38.198031', 'step': 580, 'epoch': 1}
{'type': 'loss', 'content': 0.09662759304046631, 'timestamp': '2025-10-02 00:11:38.200487', 'step': 581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:38.254056', 'step': 581, 'epoch': 1}
{'type': 'loss', 'content': 0.24113918840885162, 'timestamp': '2025-10-02 00:11:38.256218', 'step': 582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:38.311708', 'step': 582, 'epoch': 1}
{'type': 'loss', 'content': 0.20747241377830505, 'timestamp': '2025-10-02 00:11:38.314105', 'step': 583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:38.373688', 'step': 583, 'epoch': 1}
{'type': 'loss', 'content': 0.07343069463968277, 'timestamp': '2025-10-02 00:11:38.380678', 'step': 584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:38.440293', 'step': 584, 'epoch': 1}
{'type': 'loss', 'content': 0.2023283690214157, 'timestamp': '2025-10-02 00:11:38.443791', 'step': 585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:38.504296', 'step': 585, 'epoch': 1}
{'type': 'loss', 'content': 0.14786650240421295, 'timestamp': '2025-10-02 00:11:38.506689', 'step': 586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:38.566245', 'step': 586, 'epoch': 1}
{'type': 'loss', 'content': 0.1323065310716629, 'timestamp': '2025-10-02 00:11:38.569614', 'step': 587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:38.628919', 'step': 587, 'epoch': 1}
{'type': 'loss', 'content': 0.13418465852737427, 'timestamp': '2025-10-02 00:11:38.637214', 'step': 588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:38.695716', 'step': 588, 'epoch': 1}
{'type': 'loss', 'content': 0.15571923553943634, 'timestamp': '2025-10-02 00:11:38.697917', 'step': 589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:38.757551', 'step': 589, 'epoch': 1}
{'type': 'loss', 'content': 0.08761882036924362, 'timestamp': '2025-10-02 00:11:38.762548', 'step': 590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:11:38.823504', 'step': 590, 'epoch': 1}
{'type': 'loss', 'content': 0.10539977997541428, 'timestamp': '2025-10-02 00:11:38.825740', 'step': 591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:38.880938', 'step': 591, 'epoch': 1}
{'type': 'loss', 'content': 0.07625097036361694, 'timestamp': '2025-10-02 00:11:38.888620', 'step': 592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:11:38.948490', 'step': 592, 'epoch': 1}
{'type': 'loss', 'content': 0.09850216656923294, 'timestamp': '2025-10-02 00:11:38.959932', 'step': 593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:39.013185', 'step': 593, 'epoch': 1}
{'type': 'loss', 'content': 0.19489051401615143, 'timestamp': '2025-10-02 00:11:39.017243', 'step': 594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:39.071762', 'step': 594, 'epoch': 1}
{'type': 'loss', 'content': 0.17613957822322845, 'timestamp': '2025-10-02 00:11:39.073779', 'step': 595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:39.129131', 'step': 595, 'epoch': 1}
{'type': 'loss', 'content': 0.058355093002319336, 'timestamp': '2025-10-02 00:11:39.139438', 'step': 596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:39.193003', 'step': 596, 'epoch': 1}
{'type': 'loss', 'content': 0.09448414295911789, 'timestamp': '2025-10-02 00:11:39.195112', 'step': 597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:11:39.257355', 'step': 597, 'epoch': 1}
{'type': 'loss', 'content': 0.07130878418684006, 'timestamp': '2025-10-02 00:11:39.267973', 'step': 598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:39.323051', 'step': 598, 'epoch': 1}
{'type': 'loss', 'content': 0.15229299664497375, 'timestamp': '2025-10-02 00:11:39.326390', 'step': 599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:39.379579', 'step': 599, 'epoch': 1}
{'type': 'loss', 'content': 0.11640724539756775, 'timestamp': '2025-10-02 00:11:39.386798', 'step': 600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:39.440360', 'step': 600, 'epoch': 1}
{'type': 'loss', 'content': 0.24196185171604156, 'timestamp': '2025-10-02 00:11:39.442642', 'step': 601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:39.498643', 'step': 601, 'epoch': 1}
{'type': 'loss', 'content': 0.07248673588037491, 'timestamp': '2025-10-02 00:11:39.502879', 'step': 602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:39.557869', 'step': 602, 'epoch': 1}
{'type': 'loss', 'content': 0.06135110929608345, 'timestamp': '2025-10-02 00:11:39.566421', 'step': 603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:39.620204', 'step': 603, 'epoch': 1}
{'type': 'loss', 'content': 0.23393604159355164, 'timestamp': '2025-10-02 00:11:39.627046', 'step': 604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:39.681660', 'step': 604, 'epoch': 1}
{'type': 'loss', 'content': 0.09530816972255707, 'timestamp': '2025-10-02 00:11:39.683752', 'step': 605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:39.737754', 'step': 605, 'epoch': 1}
{'type': 'loss', 'content': 0.04689537733793259, 'timestamp': '2025-10-02 00:11:39.739992', 'step': 606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:39.795209', 'step': 606, 'epoch': 1}
{'type': 'loss', 'content': 0.15466219186782837, 'timestamp': '2025-10-02 00:11:39.797414', 'step': 607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:39.853330', 'step': 607, 'epoch': 1}
{'type': 'loss', 'content': 0.08029171079397202, 'timestamp': '2025-10-02 00:11:39.859557', 'step': 608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:39.914155', 'step': 608, 'epoch': 1}
{'type': 'loss', 'content': 0.11984093487262726, 'timestamp': '2025-10-02 00:11:39.924096', 'step': 609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:39.978892', 'step': 609, 'epoch': 1}
{'type': 'loss', 'content': 0.10803433507680893, 'timestamp': '2025-10-02 00:11:39.981661', 'step': 610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:40.040100', 'step': 610, 'epoch': 1}
{'type': 'loss', 'content': 0.038137223571538925, 'timestamp': '2025-10-02 00:11:40.050239', 'step': 611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:40.105215', 'step': 611, 'epoch': 1}
{'type': 'loss', 'content': 0.08928937464952469, 'timestamp': '2025-10-02 00:11:40.111167', 'step': 612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:40.165134', 'step': 612, 'epoch': 1}
{'type': 'loss', 'content': 0.09884228557348251, 'timestamp': '2025-10-02 00:11:40.174295', 'step': 613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:40.229913', 'step': 613, 'epoch': 1}
{'type': 'loss', 'content': 0.11968141794204712, 'timestamp': '2025-10-02 00:11:40.232357', 'step': 614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:40.287541', 'step': 614, 'epoch': 1}
{'type': 'loss', 'content': 0.052334465086460114, 'timestamp': '2025-10-02 00:11:40.294638', 'step': 615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:40.349327', 'step': 615, 'epoch': 1}
{'type': 'loss', 'content': 0.13035589456558228, 'timestamp': '2025-10-02 00:11:40.355578', 'step': 616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:40.409822', 'step': 616, 'epoch': 1}
{'type': 'loss', 'content': 0.03045954927802086, 'timestamp': '2025-10-02 00:11:40.411886', 'step': 617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:11:40.485467', 'step': 617, 'epoch': 1}
{'type': 'loss', 'content': 0.0520268976688385, 'timestamp': '2025-10-02 00:11:40.498178', 'step': 618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:40.569000', 'step': 618, 'epoch': 1}
{'type': 'loss', 'content': 0.1777045726776123, 'timestamp': '2025-10-02 00:11:40.571043', 'step': 619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:40.624805', 'step': 619, 'epoch': 1}
{'type': 'loss', 'content': 0.02169783040881157, 'timestamp': '2025-10-02 00:11:40.630509', 'step': 620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:40.683703', 'step': 620, 'epoch': 1}
{'type': 'loss', 'content': 0.09773711115121841, 'timestamp': '2025-10-02 00:11:40.685708', 'step': 621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:40.745986', 'step': 621, 'epoch': 1}
{'type': 'loss', 'content': 0.12446682900190353, 'timestamp': '2025-10-02 00:11:40.748147', 'step': 622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:40.802827', 'step': 622, 'epoch': 1}
{'type': 'loss', 'content': 0.10427346080541611, 'timestamp': '2025-10-02 00:11:40.805599', 'step': 623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:40.862223', 'step': 623, 'epoch': 1}
{'type': 'loss', 'content': 0.13244543969631195, 'timestamp': '2025-10-02 00:11:40.868571', 'step': 624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:40.922947', 'step': 624, 'epoch': 1}
{'type': 'loss', 'content': 0.061342380940914154, 'timestamp': '2025-10-02 00:11:40.930284', 'step': 625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:40.984851', 'step': 625, 'epoch': 1}
{'type': 'loss', 'content': 0.09667332470417023, 'timestamp': '2025-10-02 00:11:40.990424', 'step': 626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:41.053703', 'step': 626, 'epoch': 1}
{'type': 'loss', 'content': 0.14851970970630646, 'timestamp': '2025-10-02 00:11:41.055617', 'step': 627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:41.111980', 'step': 627, 'epoch': 1}
{'type': 'loss', 'content': 0.11126261949539185, 'timestamp': '2025-10-02 00:11:41.122236', 'step': 628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:41.179717', 'step': 628, 'epoch': 1}
{'type': 'loss', 'content': 0.1364777535200119, 'timestamp': '2025-10-02 00:11:41.182535', 'step': 629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:41.238931', 'step': 629, 'epoch': 1}
{'type': 'loss', 'content': 0.016564730554819107, 'timestamp': '2025-10-02 00:11:41.241515', 'step': 630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:41.298111', 'step': 630, 'epoch': 1}
{'type': 'loss', 'content': 0.15396597981452942, 'timestamp': '2025-10-02 00:11:41.302948', 'step': 631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:41.366644', 'step': 631, 'epoch': 1}
{'type': 'loss', 'content': 0.06313767284154892, 'timestamp': '2025-10-02 00:11:41.372352', 'step': 632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:41.426305', 'step': 632, 'epoch': 1}
{'type': 'loss', 'content': 0.16673292219638824, 'timestamp': '2025-10-02 00:11:41.428435', 'step': 633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:41.482746', 'step': 633, 'epoch': 1}
{'type': 'loss', 'content': 0.08878381550312042, 'timestamp': '2025-10-02 00:11:41.484725', 'step': 634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:41.539798', 'step': 634, 'epoch': 1}
{'type': 'loss', 'content': 0.08163583278656006, 'timestamp': '2025-10-02 00:11:41.542467', 'step': 635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:41.599665', 'step': 635, 'epoch': 1}
{'type': 'loss', 'content': 0.07019293308258057, 'timestamp': '2025-10-02 00:11:41.606220', 'step': 636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:41.661934', 'step': 636, 'epoch': 1}
{'type': 'loss', 'content': 0.0764555037021637, 'timestamp': '2025-10-02 00:11:41.665059', 'step': 637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:41.723152', 'step': 637, 'epoch': 1}
{'type': 'loss', 'content': 0.16246312856674194, 'timestamp': '2025-10-02 00:11:41.725722', 'step': 638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:41.780428', 'step': 638, 'epoch': 1}
{'type': 'loss', 'content': 0.06149870529770851, 'timestamp': '2025-10-02 00:11:41.787098', 'step': 639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:11:41.849672', 'step': 639, 'epoch': 1}
{'type': 'loss', 'content': 0.024791359901428223, 'timestamp': '2025-10-02 00:11:41.861011', 'step': 640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:41.916844', 'step': 640, 'epoch': 1}
{'type': 'loss', 'content': 0.08929190784692764, 'timestamp': '2025-10-02 00:11:41.919295', 'step': 641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:41.974760', 'step': 641, 'epoch': 1}
{'type': 'loss', 'content': 0.12853990495204926, 'timestamp': '2025-10-02 00:11:41.977464', 'step': 642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:42.033681', 'step': 642, 'epoch': 1}
{'type': 'loss', 'content': 0.09172578901052475, 'timestamp': '2025-10-02 00:11:42.039125', 'step': 643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:42.096663', 'step': 643, 'epoch': 1}
{'type': 'loss', 'content': 0.10909496247768402, 'timestamp': '2025-10-02 00:11:42.104460', 'step': 644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:11:42.159490', 'step': 644, 'epoch': 1}
{'type': 'loss', 'content': 0.15176042914390564, 'timestamp': '2025-10-02 00:11:42.163525', 'step': 645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:42.223424', 'step': 645, 'epoch': 1}
{'type': 'loss', 'content': 0.07429898530244827, 'timestamp': '2025-10-02 00:11:42.229007', 'step': 646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:42.284352', 'step': 646, 'epoch': 1}
{'type': 'loss', 'content': 0.10473895072937012, 'timestamp': '2025-10-02 00:11:42.291152', 'step': 647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:42.347961', 'step': 647, 'epoch': 1}
{'type': 'loss', 'content': 0.31640273332595825, 'timestamp': '2025-10-02 00:11:42.353604', 'step': 648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:11:42.407193', 'step': 648, 'epoch': 1}
{'type': 'loss', 'content': 0.2023734301328659, 'timestamp': '2025-10-02 00:11:42.408683', 'step': 649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:42.461387', 'step': 649, 'epoch': 1}
{'type': 'loss', 'content': 0.07666939496994019, 'timestamp': '2025-10-02 00:11:42.463086', 'step': 650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:42.517516', 'step': 650, 'epoch': 1}
{'type': 'loss', 'content': 0.057420749217271805, 'timestamp': '2025-10-02 00:11:42.519722', 'step': 651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:42.577993', 'step': 651, 'epoch': 1}
{'type': 'loss', 'content': 0.08085086196660995, 'timestamp': '2025-10-02 00:11:42.589021', 'step': 652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:42.642546', 'step': 652, 'epoch': 1}
{'type': 'loss', 'content': 0.20824846625328064, 'timestamp': '2025-10-02 00:11:42.644915', 'step': 653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:42.699477', 'step': 653, 'epoch': 1}
{'type': 'loss', 'content': 0.12327947467565536, 'timestamp': '2025-10-02 00:11:42.704941', 'step': 654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:42.758915', 'step': 654, 'epoch': 1}
{'type': 'loss', 'content': 0.18833908438682556, 'timestamp': '2025-10-02 00:11:42.761383', 'step': 655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:42.816010', 'step': 655, 'epoch': 1}
{'type': 'loss', 'content': 0.02778777852654457, 'timestamp': '2025-10-02 00:11:42.821478', 'step': 656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:42.875083', 'step': 656, 'epoch': 1}
{'type': 'loss', 'content': 0.041026923805475235, 'timestamp': '2025-10-02 00:11:42.876942', 'step': 657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:42.930380', 'step': 657, 'epoch': 1}
{'type': 'loss', 'content': 0.08813035488128662, 'timestamp': '2025-10-02 00:11:42.932940', 'step': 658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:42.986846', 'step': 658, 'epoch': 1}
{'type': 'loss', 'content': 0.1511516571044922, 'timestamp': '2025-10-02 00:11:42.988707', 'step': 659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:43.042498', 'step': 659, 'epoch': 1}
{'type': 'loss', 'content': 0.03707790747284889, 'timestamp': '2025-10-02 00:11:43.049556', 'step': 660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:43.103461', 'step': 660, 'epoch': 1}
{'type': 'loss', 'content': 0.09516967833042145, 'timestamp': '2025-10-02 00:11:43.106164', 'step': 661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:43.160063', 'step': 661, 'epoch': 1}
{'type': 'loss', 'content': 0.0700274184346199, 'timestamp': '2025-10-02 00:11:43.167400', 'step': 662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:43.221963', 'step': 662, 'epoch': 1}
{'type': 'loss', 'content': 0.2090800702571869, 'timestamp': '2025-10-02 00:11:43.224047', 'step': 663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:43.277033', 'step': 663, 'epoch': 1}
{'type': 'loss', 'content': 0.16293048858642578, 'timestamp': '2025-10-02 00:11:43.282522', 'step': 664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:43.335979', 'step': 664, 'epoch': 1}
{'type': 'loss', 'content': 0.09835540503263474, 'timestamp': '2025-10-02 00:11:43.341589', 'step': 665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:11:43.397665', 'step': 665, 'epoch': 1}
{'type': 'loss', 'content': 0.0718080997467041, 'timestamp': '2025-10-02 00:11:43.406464', 'step': 666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:43.460986', 'step': 666, 'epoch': 1}
{'type': 'loss', 'content': 0.0958610475063324, 'timestamp': '2025-10-02 00:11:43.463569', 'step': 667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:43.521075', 'step': 667, 'epoch': 1}
{'type': 'loss', 'content': 0.10045494884252548, 'timestamp': '2025-10-02 00:11:43.527215', 'step': 668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:11:43.581649', 'step': 668, 'epoch': 1}
{'type': 'loss', 'content': 0.04183086380362511, 'timestamp': '2025-10-02 00:11:43.584036', 'step': 669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:43.638323', 'step': 669, 'epoch': 1}
{'type': 'loss', 'content': 0.22829918563365936, 'timestamp': '2025-10-02 00:11:43.640578', 'step': 670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:43.694850', 'step': 670, 'epoch': 1}
{'type': 'loss', 'content': 0.07870450615882874, 'timestamp': '2025-10-02 00:11:43.697119', 'step': 671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:11:43.751086', 'step': 671, 'epoch': 1}
{'type': 'loss', 'content': 0.11623509228229523, 'timestamp': '2025-10-02 00:11:43.756553', 'step': 672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:43.810317', 'step': 672, 'epoch': 1}
{'type': 'loss', 'content': 0.09930054843425751, 'timestamp': '2025-10-02 00:11:43.817602', 'step': 673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:11:43.876391', 'step': 673, 'epoch': 1}
{'type': 'loss', 'content': 0.062489770352840424, 'timestamp': '2025-10-02 00:11:43.882021', 'step': 674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:43.941255', 'step': 674, 'epoch': 1}
{'type': 'loss', 'content': 0.23502099514007568, 'timestamp': '2025-10-02 00:11:43.944287', 'step': 675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:11:44.024364', 'step': 675, 'epoch': 1}
{'type': 'loss', 'content': 0.010909347794950008, 'timestamp': '2025-10-02 00:11:44.038332', 'step': 676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:44.093465', 'step': 676, 'epoch': 1}
{'type': 'loss', 'content': 0.22279784083366394, 'timestamp': '2025-10-02 00:11:44.096031', 'step': 677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:44.154666', 'step': 677, 'epoch': 1}
{'type': 'loss', 'content': 0.2112465649843216, 'timestamp': '2025-10-02 00:11:44.157147', 'step': 678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:44.230519', 'step': 678, 'epoch': 1}
{'type': 'loss', 'content': 0.13119126856327057, 'timestamp': '2025-10-02 00:11:44.232995', 'step': 679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:44.289152', 'step': 679, 'epoch': 1}
{'type': 'loss', 'content': 0.10012675076723099, 'timestamp': '2025-10-02 00:11:44.294950', 'step': 680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:44.349774', 'step': 680, 'epoch': 1}
{'type': 'loss', 'content': 0.12884226441383362, 'timestamp': '2025-10-02 00:11:44.352819', 'step': 681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:44.409947', 'step': 681, 'epoch': 1}
{'type': 'loss', 'content': 0.09243209660053253, 'timestamp': '2025-10-02 00:11:44.413177', 'step': 682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:44.485424', 'step': 682, 'epoch': 1}
{'type': 'loss', 'content': 0.09544263780117035, 'timestamp': '2025-10-02 00:11:44.495051', 'step': 683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:11:44.557882', 'step': 683, 'epoch': 1}
{'type': 'loss', 'content': 0.14099301397800446, 'timestamp': '2025-10-02 00:11:44.573188', 'step': 684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:44.650548', 'step': 684, 'epoch': 1}
{'type': 'loss', 'content': 0.05826186388731003, 'timestamp': '2025-10-02 00:11:44.658088', 'step': 685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:11:44.714944', 'step': 685, 'epoch': 1}
{'type': 'loss', 'content': 0.16603399813175201, 'timestamp': '2025-10-02 00:11:44.718757', 'step': 686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:11:44.791422', 'step': 686, 'epoch': 1}
{'type': 'loss', 'content': 0.03896354138851166, 'timestamp': '2025-10-02 00:11:44.803477', 'step': 687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:11:44.865352', 'step': 687, 'epoch': 1}
{'type': 'loss', 'content': 0.1150774136185646, 'timestamp': '2025-10-02 00:11:44.875773', 'step': 688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:11:44.942525', 'step': 688, 'epoch': 1}
{'type': 'loss', 'content': 0.15910910069942474, 'timestamp': '2025-10-02 00:11:44.945502', 'step': 689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:11:45.020884', 'step': 689, 'epoch': 1}
{'type': 'loss', 'content': 0.12873896956443787, 'timestamp': '2025-10-02 00:11:45.023606', 'step': 690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:11:45.093278', 'step': 690, 'epoch': 1}
{'type': 'loss', 'content': 0.11334296315908432, 'timestamp': '2025-10-02 00:11:45.096792', 'step': 691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:45.157324', 'step': 691, 'epoch': 1}
{'type': 'loss', 'content': 0.055833783000707626, 'timestamp': '2025-10-02 00:11:45.168191', 'step': 692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:11:45.229206', 'step': 692, 'epoch': 1}
{'type': 'loss', 'content': 0.033265773206949234, 'timestamp': '2025-10-02 00:11:45.239254', 'step': 693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:11:45.313038', 'step': 693, 'epoch': 1}
{'type': 'loss', 'content': 0.06514056026935577, 'timestamp': '2025-10-02 00:11:45.319781', 'step': 694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:11:45.389655', 'step': 694, 'epoch': 1}
{'type': 'loss', 'content': 0.042694609612226486, 'timestamp': '2025-10-02 00:11:45.399800', 'step': 695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:11:45.464966', 'step': 695, 'epoch': 1}
{'type': 'loss', 'content': 0.05018635094165802, 'timestamp': '2025-10-02 00:11:45.476171', 'step': 696, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:12:15.261000', 'step': 696, 'epoch': 1}
{'type': 'pplx', 'content': 86.53616801966776, 'timestamp': '2025-10-02 00:12:15.269886', 'step': 696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:15.327640', 'step': 696, 'epoch': 1}
{'type': 'loss', 'content': 0.09781474620103836, 'timestamp': '2025-10-02 00:12:15.330972', 'step': 697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:15.393175', 'step': 697, 'epoch': 1}
{'type': 'loss', 'content': 0.0366421677172184, 'timestamp': '2025-10-02 00:12:15.403397', 'step': 698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:15.460263', 'step': 698, 'epoch': 1}
{'type': 'loss', 'content': 0.277670681476593, 'timestamp': '2025-10-02 00:12:15.463376', 'step': 699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:15.518843', 'step': 699, 'epoch': 1}
{'type': 'loss', 'content': 0.17389638721942902, 'timestamp': '2025-10-02 00:12:15.528117', 'step': 700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:15.590988', 'step': 700, 'epoch': 1}
{'type': 'loss', 'content': 0.12128690630197525, 'timestamp': '2025-10-02 00:12:15.600490', 'step': 701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:12:15.679893', 'step': 701, 'epoch': 1}
{'type': 'loss', 'content': 0.05141567811369896, 'timestamp': '2025-10-02 00:12:15.690786', 'step': 702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:15.748812', 'step': 702, 'epoch': 1}
{'type': 'loss', 'content': 0.22622476518154144, 'timestamp': '2025-10-02 00:12:15.754788', 'step': 703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:15.820778', 'step': 703, 'epoch': 1}
{'type': 'loss', 'content': 0.07010379433631897, 'timestamp': '2025-10-02 00:12:15.828946', 'step': 704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:15.893602', 'step': 704, 'epoch': 1}
{'type': 'loss', 'content': 0.14135710895061493, 'timestamp': '2025-10-02 00:12:15.895728', 'step': 705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:15.950721', 'step': 705, 'epoch': 1}
{'type': 'loss', 'content': 0.046746402978897095, 'timestamp': '2025-10-02 00:12:15.956537', 'step': 706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:16.028473', 'step': 706, 'epoch': 1}
{'type': 'loss', 'content': 0.06869406998157501, 'timestamp': '2025-10-02 00:12:16.038944', 'step': 707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:16.108384', 'step': 707, 'epoch': 1}
{'type': 'loss', 'content': 0.14790746569633484, 'timestamp': '2025-10-02 00:12:16.118223', 'step': 708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:16.188285', 'step': 708, 'epoch': 1}
{'type': 'loss', 'content': 0.0738481879234314, 'timestamp': '2025-10-02 00:12:16.191622', 'step': 709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:16.251327', 'step': 709, 'epoch': 1}
{'type': 'loss', 'content': 0.049959566444158554, 'timestamp': '2025-10-02 00:12:16.256230', 'step': 710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:16.314255', 'step': 710, 'epoch': 1}
{'type': 'loss', 'content': 0.08970271050930023, 'timestamp': '2025-10-02 00:12:16.317349', 'step': 711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:16.383030', 'step': 711, 'epoch': 1}
{'type': 'loss', 'content': 0.021042581647634506, 'timestamp': '2025-10-02 00:12:16.394421', 'step': 712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:16.451013', 'step': 712, 'epoch': 1}
{'type': 'loss', 'content': 0.2130015790462494, 'timestamp': '2025-10-02 00:12:16.457795', 'step': 713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:16.513118', 'step': 713, 'epoch': 1}
{'type': 'loss', 'content': 0.10553880780935287, 'timestamp': '2025-10-02 00:12:16.517898', 'step': 714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:16.580540', 'step': 714, 'epoch': 1}
{'type': 'loss', 'content': 0.16934631764888763, 'timestamp': '2025-10-02 00:12:16.585126', 'step': 715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:16.648754', 'step': 715, 'epoch': 1}
{'type': 'loss', 'content': 0.16568434238433838, 'timestamp': '2025-10-02 00:12:16.662784', 'step': 716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:16.733601', 'step': 716, 'epoch': 1}
{'type': 'loss', 'content': 0.2534528374671936, 'timestamp': '2025-10-02 00:12:16.742244', 'step': 717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:16.805695', 'step': 717, 'epoch': 1}
{'type': 'loss', 'content': 0.23275908827781677, 'timestamp': '2025-10-02 00:12:16.808937', 'step': 718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:16.885382', 'step': 718, 'epoch': 1}
{'type': 'loss', 'content': 0.14229366183280945, 'timestamp': '2025-10-02 00:12:16.892458', 'step': 719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:12:16.971645', 'step': 719, 'epoch': 1}
{'type': 'loss', 'content': 0.0166873000562191, 'timestamp': '2025-10-02 00:12:16.984392', 'step': 720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:17.044335', 'step': 720, 'epoch': 1}
{'type': 'loss', 'content': 0.10835476964712143, 'timestamp': '2025-10-02 00:12:17.047829', 'step': 721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:17.106808', 'step': 721, 'epoch': 1}
{'type': 'loss', 'content': 0.12388791143894196, 'timestamp': '2025-10-02 00:12:17.109335', 'step': 722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:17.164779', 'step': 722, 'epoch': 1}
{'type': 'loss', 'content': 0.22369614243507385, 'timestamp': '2025-10-02 00:12:17.167676', 'step': 723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:17.234319', 'step': 723, 'epoch': 1}
{'type': 'loss', 'content': 0.13052795827388763, 'timestamp': '2025-10-02 00:12:17.241091', 'step': 724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:17.300196', 'step': 724, 'epoch': 1}
{'type': 'loss', 'content': 0.05227818712592125, 'timestamp': '2025-10-02 00:12:17.304884', 'step': 725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:17.363610', 'step': 725, 'epoch': 1}
{'type': 'loss', 'content': 0.13672934472560883, 'timestamp': '2025-10-02 00:12:17.367722', 'step': 726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:17.427479', 'step': 726, 'epoch': 1}
{'type': 'loss', 'content': 0.04558968171477318, 'timestamp': '2025-10-02 00:12:17.436762', 'step': 727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:17.517233', 'step': 727, 'epoch': 1}
{'type': 'loss', 'content': 0.11334975063800812, 'timestamp': '2025-10-02 00:12:17.523842', 'step': 728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:17.593405', 'step': 728, 'epoch': 1}
{'type': 'loss', 'content': 0.07479029148817062, 'timestamp': '2025-10-02 00:12:17.599270', 'step': 729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:17.657487', 'step': 729, 'epoch': 1}
{'type': 'loss', 'content': 0.0333620049059391, 'timestamp': '2025-10-02 00:12:17.667008', 'step': 730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:17.729565', 'step': 730, 'epoch': 1}
{'type': 'loss', 'content': 0.2110416442155838, 'timestamp': '2025-10-02 00:12:17.739871', 'step': 731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:17.817282', 'step': 731, 'epoch': 1}
{'type': 'loss', 'content': 0.2461136430501938, 'timestamp': '2025-10-02 00:12:17.829541', 'step': 732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:12:17.906789', 'step': 732, 'epoch': 1}
{'type': 'loss', 'content': 0.01521347463130951, 'timestamp': '2025-10-02 00:12:17.918534', 'step': 733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:17.999646', 'step': 733, 'epoch': 1}
{'type': 'loss', 'content': 0.11518965661525726, 'timestamp': '2025-10-02 00:12:18.007221', 'step': 734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:18.069921', 'step': 734, 'epoch': 1}
{'type': 'loss', 'content': 0.12497598677873611, 'timestamp': '2025-10-02 00:12:18.072903', 'step': 735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:18.130352', 'step': 735, 'epoch': 1}
{'type': 'loss', 'content': 0.027478396892547607, 'timestamp': '2025-10-02 00:12:18.140474', 'step': 736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:18.207482', 'step': 736, 'epoch': 1}
{'type': 'loss', 'content': 0.17622287571430206, 'timestamp': '2025-10-02 00:12:18.214774', 'step': 737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:18.281475', 'step': 737, 'epoch': 1}
{'type': 'loss', 'content': 0.09862948954105377, 'timestamp': '2025-10-02 00:12:18.287230', 'step': 738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:18.352782', 'step': 738, 'epoch': 1}
{'type': 'loss', 'content': 0.11673926562070847, 'timestamp': '2025-10-02 00:12:18.359833', 'step': 739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:18.432830', 'step': 739, 'epoch': 1}
{'type': 'loss', 'content': 0.0828649178147316, 'timestamp': '2025-10-02 00:12:18.439532', 'step': 740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:18.496331', 'step': 740, 'epoch': 1}
{'type': 'loss', 'content': 0.03154575079679489, 'timestamp': '2025-10-02 00:12:18.502198', 'step': 741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:18.572768', 'step': 741, 'epoch': 1}
{'type': 'loss', 'content': 0.08340521901845932, 'timestamp': '2025-10-02 00:12:18.584340', 'step': 742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:18.655020', 'step': 742, 'epoch': 1}
{'type': 'loss', 'content': 0.13882748782634735, 'timestamp': '2025-10-02 00:12:18.658118', 'step': 743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:18.732260', 'step': 743, 'epoch': 1}
{'type': 'loss', 'content': 0.20026014745235443, 'timestamp': '2025-10-02 00:12:18.739295', 'step': 744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:18.811345', 'step': 744, 'epoch': 1}
{'type': 'loss', 'content': 0.06733954697847366, 'timestamp': '2025-10-02 00:12:18.815048', 'step': 745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:18.880804', 'step': 745, 'epoch': 1}
{'type': 'loss', 'content': 0.15194305777549744, 'timestamp': '2025-10-02 00:12:18.890076', 'step': 746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:18.955264', 'step': 746, 'epoch': 1}
{'type': 'loss', 'content': 0.09946879744529724, 'timestamp': '2025-10-02 00:12:18.964694', 'step': 747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:19.041138', 'step': 747, 'epoch': 1}
{'type': 'loss', 'content': 0.06356875598430634, 'timestamp': '2025-10-02 00:12:19.052077', 'step': 748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:19.111631', 'step': 748, 'epoch': 1}
{'type': 'loss', 'content': 0.23005925118923187, 'timestamp': '2025-10-02 00:12:19.118712', 'step': 749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:19.176478', 'step': 749, 'epoch': 1}
{'type': 'loss', 'content': 0.049657102674245834, 'timestamp': '2025-10-02 00:12:19.183325', 'step': 750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:19.248531', 'step': 750, 'epoch': 1}
{'type': 'loss', 'content': 0.2244214117527008, 'timestamp': '2025-10-02 00:12:19.254382', 'step': 751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:19.325655', 'step': 751, 'epoch': 1}
{'type': 'loss', 'content': 0.11855454742908478, 'timestamp': '2025-10-02 00:12:19.333130', 'step': 752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:12:19.405719', 'step': 752, 'epoch': 1}
{'type': 'loss', 'content': 0.06739044934511185, 'timestamp': '2025-10-02 00:12:19.418825', 'step': 753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:19.481938', 'step': 753, 'epoch': 1}
{'type': 'loss', 'content': 0.04824138432741165, 'timestamp': '2025-10-02 00:12:19.486725', 'step': 754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:19.554846', 'step': 754, 'epoch': 1}
{'type': 'loss', 'content': 0.2237464338541031, 'timestamp': '2025-10-02 00:12:19.561408', 'step': 755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:19.632425', 'step': 755, 'epoch': 1}
{'type': 'loss', 'content': 0.3549615740776062, 'timestamp': '2025-10-02 00:12:19.640731', 'step': 756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:19.711357', 'step': 756, 'epoch': 1}
{'type': 'loss', 'content': 0.08539985120296478, 'timestamp': '2025-10-02 00:12:19.716369', 'step': 757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:12:19.786848', 'step': 757, 'epoch': 1}
{'type': 'loss', 'content': 0.029408477246761322, 'timestamp': '2025-10-02 00:12:19.798872', 'step': 758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:19.879033', 'step': 758, 'epoch': 1}
{'type': 'loss', 'content': 0.0743568167090416, 'timestamp': '2025-10-02 00:12:19.889253', 'step': 759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:19.966649', 'step': 759, 'epoch': 1}
{'type': 'loss', 'content': 0.06276047974824905, 'timestamp': '2025-10-02 00:12:19.976527', 'step': 760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:20.044366', 'step': 760, 'epoch': 1}
{'type': 'loss', 'content': 0.12682460248470306, 'timestamp': '2025-10-02 00:12:20.047863', 'step': 761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:20.112870', 'step': 761, 'epoch': 1}
{'type': 'loss', 'content': 0.13666808605194092, 'timestamp': '2025-10-02 00:12:20.116338', 'step': 762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:20.195220', 'step': 762, 'epoch': 1}
{'type': 'loss', 'content': 0.22771458327770233, 'timestamp': '2025-10-02 00:12:20.205357', 'step': 763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:20.262219', 'step': 763, 'epoch': 1}
{'type': 'loss', 'content': 0.1154981330037117, 'timestamp': '2025-10-02 00:12:20.269733', 'step': 764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:20.338852', 'step': 764, 'epoch': 1}
{'type': 'loss', 'content': 0.026682212948799133, 'timestamp': '2025-10-02 00:12:20.342141', 'step': 765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:20.413379', 'step': 765, 'epoch': 1}
{'type': 'loss', 'content': 0.14574210345745087, 'timestamp': '2025-10-02 00:12:20.416492', 'step': 766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:20.485822', 'step': 766, 'epoch': 1}
{'type': 'loss', 'content': 0.19244126975536346, 'timestamp': '2025-10-02 00:12:20.491003', 'step': 767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:20.561556', 'step': 767, 'epoch': 1}
{'type': 'loss', 'content': 0.14389844238758087, 'timestamp': '2025-10-02 00:12:20.571903', 'step': 768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:20.635731', 'step': 768, 'epoch': 1}
{'type': 'loss', 'content': 0.0745764896273613, 'timestamp': '2025-10-02 00:12:20.638874', 'step': 769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:20.707488', 'step': 769, 'epoch': 1}
{'type': 'loss', 'content': 0.07720737159252167, 'timestamp': '2025-10-02 00:12:20.717605', 'step': 770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:20.790752', 'step': 770, 'epoch': 1}
{'type': 'loss', 'content': 0.08039547502994537, 'timestamp': '2025-10-02 00:12:20.796117', 'step': 771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:20.859249', 'step': 771, 'epoch': 1}
{'type': 'loss', 'content': 0.15246665477752686, 'timestamp': '2025-10-02 00:12:20.868094', 'step': 772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:20.927249', 'step': 772, 'epoch': 1}
{'type': 'loss', 'content': 0.06913596391677856, 'timestamp': '2025-10-02 00:12:20.937051', 'step': 773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:20.998838', 'step': 773, 'epoch': 1}
{'type': 'loss', 'content': 0.1382821798324585, 'timestamp': '2025-10-02 00:12:21.001862', 'step': 774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:21.058280', 'step': 774, 'epoch': 1}
{'type': 'loss', 'content': 0.1628580391407013, 'timestamp': '2025-10-02 00:12:21.062261', 'step': 775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:21.124554', 'step': 775, 'epoch': 1}
{'type': 'loss', 'content': 0.11441235989332199, 'timestamp': '2025-10-02 00:12:21.134092', 'step': 776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:21.196994', 'step': 776, 'epoch': 1}
{'type': 'loss', 'content': 0.11314429342746735, 'timestamp': '2025-10-02 00:12:21.206640', 'step': 777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:21.283955', 'step': 777, 'epoch': 1}
{'type': 'loss', 'content': 0.24501365423202515, 'timestamp': '2025-10-02 00:12:21.291844', 'step': 778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:21.367293', 'step': 778, 'epoch': 1}
{'type': 'loss', 'content': 0.0373896025121212, 'timestamp': '2025-10-02 00:12:21.370912', 'step': 779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:21.430561', 'step': 779, 'epoch': 1}
{'type': 'loss', 'content': 0.08782020211219788, 'timestamp': '2025-10-02 00:12:21.438051', 'step': 780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:21.509930', 'step': 780, 'epoch': 1}
{'type': 'loss', 'content': 0.0967240110039711, 'timestamp': '2025-10-02 00:12:21.517177', 'step': 781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:12:21.593312', 'step': 781, 'epoch': 1}
{'type': 'loss', 'content': 0.01299062930047512, 'timestamp': '2025-10-02 00:12:21.606544', 'step': 782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:21.666440', 'step': 782, 'epoch': 1}
{'type': 'loss', 'content': 0.028283296152949333, 'timestamp': '2025-10-02 00:12:21.673454', 'step': 783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:21.762115', 'step': 783, 'epoch': 1}
{'type': 'loss', 'content': 0.023209895938634872, 'timestamp': '2025-10-02 00:12:21.773108', 'step': 784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:21.829331', 'step': 784, 'epoch': 1}
{'type': 'loss', 'content': 0.2744857370853424, 'timestamp': '2025-10-02 00:12:21.839717', 'step': 785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:21.900230', 'step': 785, 'epoch': 1}
{'type': 'loss', 'content': 0.08255510777235031, 'timestamp': '2025-10-02 00:12:21.903388', 'step': 786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:21.969295', 'step': 786, 'epoch': 1}
{'type': 'loss', 'content': 0.09926855564117432, 'timestamp': '2025-10-02 00:12:21.975244', 'step': 787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:22.038544', 'step': 787, 'epoch': 1}
{'type': 'loss', 'content': 0.04134475439786911, 'timestamp': '2025-10-02 00:12:22.049981', 'step': 788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:22.111657', 'step': 788, 'epoch': 1}
{'type': 'loss', 'content': 0.04813094809651375, 'timestamp': '2025-10-02 00:12:22.114338', 'step': 789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:22.181231', 'step': 789, 'epoch': 1}
{'type': 'loss', 'content': 0.01972077414393425, 'timestamp': '2025-10-02 00:12:22.191470', 'step': 790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:22.258476', 'step': 790, 'epoch': 1}
{'type': 'loss', 'content': 0.038353946059942245, 'timestamp': '2025-10-02 00:12:22.267861', 'step': 791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:22.331208', 'step': 791, 'epoch': 1}
{'type': 'loss', 'content': 0.11207344383001328, 'timestamp': '2025-10-02 00:12:22.342863', 'step': 792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:22.414581', 'step': 792, 'epoch': 1}
{'type': 'loss', 'content': 0.08920096606016159, 'timestamp': '2025-10-02 00:12:22.418062', 'step': 793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:22.478331', 'step': 793, 'epoch': 1}
{'type': 'loss', 'content': 0.1960105001926422, 'timestamp': '2025-10-02 00:12:22.480833', 'step': 794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:22.556990', 'step': 794, 'epoch': 1}
{'type': 'loss', 'content': 0.02715197578072548, 'timestamp': '2025-10-02 00:12:22.565575', 'step': 795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:22.630187', 'step': 795, 'epoch': 1}
{'type': 'loss', 'content': 0.03495882824063301, 'timestamp': '2025-10-02 00:12:22.640356', 'step': 796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:22.696453', 'step': 796, 'epoch': 1}
{'type': 'loss', 'content': 0.059615444391965866, 'timestamp': '2025-10-02 00:12:22.704097', 'step': 797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:22.781726', 'step': 797, 'epoch': 1}
{'type': 'loss', 'content': 0.04907678812742233, 'timestamp': '2025-10-02 00:12:22.791950', 'step': 798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:22.854740', 'step': 798, 'epoch': 1}
{'type': 'loss', 'content': 0.12748141586780548, 'timestamp': '2025-10-02 00:12:22.859251', 'step': 799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:22.922229', 'step': 799, 'epoch': 1}
{'type': 'loss', 'content': 0.1013760194182396, 'timestamp': '2025-10-02 00:12:22.928832', 'step': 800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:22.991448', 'step': 800, 'epoch': 1}
{'type': 'loss', 'content': 0.1811145842075348, 'timestamp': '2025-10-02 00:12:22.994111', 'step': 801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:23.064225', 'step': 801, 'epoch': 1}
{'type': 'loss', 'content': 0.03844572976231575, 'timestamp': '2025-10-02 00:12:23.074920', 'step': 802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:23.131646', 'step': 802, 'epoch': 1}
{'type': 'loss', 'content': 0.1434520184993744, 'timestamp': '2025-10-02 00:12:23.134132', 'step': 803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:23.190861', 'step': 803, 'epoch': 1}
{'type': 'loss', 'content': 0.12442659586668015, 'timestamp': '2025-10-02 00:12:23.200591', 'step': 804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:23.260085', 'step': 804, 'epoch': 1}
{'type': 'loss', 'content': 0.12725575268268585, 'timestamp': '2025-10-02 00:12:23.264088', 'step': 805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:23.323591', 'step': 805, 'epoch': 1}
{'type': 'loss', 'content': 0.07944830507040024, 'timestamp': '2025-10-02 00:12:23.333125', 'step': 806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:23.403328', 'step': 806, 'epoch': 1}
{'type': 'loss', 'content': 0.04928073659539223, 'timestamp': '2025-10-02 00:12:23.412459', 'step': 807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:23.476703', 'step': 807, 'epoch': 1}
{'type': 'loss', 'content': 0.06699314713478088, 'timestamp': '2025-10-02 00:12:23.487309', 'step': 808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:23.554175', 'step': 808, 'epoch': 1}
{'type': 'loss', 'content': 0.10431141406297684, 'timestamp': '2025-10-02 00:12:23.564143', 'step': 809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:23.628606', 'step': 809, 'epoch': 1}
{'type': 'loss', 'content': 0.03846897557377815, 'timestamp': '2025-10-02 00:12:23.640070', 'step': 810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:23.711664', 'step': 810, 'epoch': 1}
{'type': 'loss', 'content': 0.11171845346689224, 'timestamp': '2025-10-02 00:12:23.721033', 'step': 811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:23.777242', 'step': 811, 'epoch': 1}
{'type': 'loss', 'content': 0.1511550396680832, 'timestamp': '2025-10-02 00:12:23.791946', 'step': 812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:23.860832', 'step': 812, 'epoch': 1}
{'type': 'loss', 'content': 0.1296699196100235, 'timestamp': '2025-10-02 00:12:23.869776', 'step': 813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:23.929768', 'step': 813, 'epoch': 1}
{'type': 'loss', 'content': 0.13675355911254883, 'timestamp': '2025-10-02 00:12:23.937441', 'step': 814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:23.996580', 'step': 814, 'epoch': 1}
{'type': 'loss', 'content': 0.23933029174804688, 'timestamp': '2025-10-02 00:12:23.999812', 'step': 815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:24.064446', 'step': 815, 'epoch': 1}
{'type': 'loss', 'content': 0.02506805770099163, 'timestamp': '2025-10-02 00:12:24.075897', 'step': 816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:24.138325', 'step': 816, 'epoch': 1}
{'type': 'loss', 'content': 0.06052996218204498, 'timestamp': '2025-10-02 00:12:24.140782', 'step': 817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:24.197489', 'step': 817, 'epoch': 1}
{'type': 'loss', 'content': 0.14323784410953522, 'timestamp': '2025-10-02 00:12:24.207026', 'step': 818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:24.268309', 'step': 818, 'epoch': 1}
{'type': 'loss', 'content': 0.0711829736828804, 'timestamp': '2025-10-02 00:12:24.274332', 'step': 819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:24.330705', 'step': 819, 'epoch': 1}
{'type': 'loss', 'content': 0.2041015625, 'timestamp': '2025-10-02 00:12:24.336713', 'step': 820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:24.394229', 'step': 820, 'epoch': 1}
{'type': 'loss', 'content': 0.2727045714855194, 'timestamp': '2025-10-02 00:12:24.396701', 'step': 821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:24.457718', 'step': 821, 'epoch': 1}
{'type': 'loss', 'content': 0.10279510170221329, 'timestamp': '2025-10-02 00:12:24.461913', 'step': 822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:24.521535', 'step': 822, 'epoch': 1}
{'type': 'loss', 'content': 0.09708564728498459, 'timestamp': '2025-10-02 00:12:24.526035', 'step': 823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:24.586400', 'step': 823, 'epoch': 1}
{'type': 'loss', 'content': 0.028605513274669647, 'timestamp': '2025-10-02 00:12:24.594623', 'step': 824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:24.656984', 'step': 824, 'epoch': 1}
{'type': 'loss', 'content': 0.24998131394386292, 'timestamp': '2025-10-02 00:12:24.667894', 'step': 825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:24.734589', 'step': 825, 'epoch': 1}
{'type': 'loss', 'content': 0.1382567286491394, 'timestamp': '2025-10-02 00:12:24.740347', 'step': 826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:24.807752', 'step': 826, 'epoch': 1}
{'type': 'loss', 'content': 0.22956807911396027, 'timestamp': '2025-10-02 00:12:24.812165', 'step': 827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:24.878163', 'step': 827, 'epoch': 1}
{'type': 'loss', 'content': 0.14385709166526794, 'timestamp': '2025-10-02 00:12:24.885152', 'step': 828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:24.941566', 'step': 828, 'epoch': 1}
{'type': 'loss', 'content': 0.04147951677441597, 'timestamp': '2025-10-02 00:12:24.951143', 'step': 829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:25.011356', 'step': 829, 'epoch': 1}
{'type': 'loss', 'content': 0.08102274686098099, 'timestamp': '2025-10-02 00:12:25.020913', 'step': 830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:25.083060', 'step': 830, 'epoch': 1}
{'type': 'loss', 'content': 0.14774033427238464, 'timestamp': '2025-10-02 00:12:25.093736', 'step': 831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:25.167036', 'step': 831, 'epoch': 1}
{'type': 'loss', 'content': 0.1821885108947754, 'timestamp': '2025-10-02 00:12:25.173311', 'step': 832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:25.239802', 'step': 832, 'epoch': 1}
{'type': 'loss', 'content': 0.13127009570598602, 'timestamp': '2025-10-02 00:12:25.243624', 'step': 833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:25.301230', 'step': 833, 'epoch': 1}
{'type': 'loss', 'content': 0.15695993602275848, 'timestamp': '2025-10-02 00:12:25.303879', 'step': 834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:25.362438', 'step': 834, 'epoch': 1}
{'type': 'loss', 'content': 0.061339396983385086, 'timestamp': '2025-10-02 00:12:25.364866', 'step': 835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:25.420948', 'step': 835, 'epoch': 1}
{'type': 'loss', 'content': 0.1677016317844391, 'timestamp': '2025-10-02 00:12:25.426655', 'step': 836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:25.480174', 'step': 836, 'epoch': 1}
{'type': 'loss', 'content': 0.09708891063928604, 'timestamp': '2025-10-02 00:12:25.487950', 'step': 837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:25.542056', 'step': 837, 'epoch': 1}
{'type': 'loss', 'content': 0.06290717422962189, 'timestamp': '2025-10-02 00:12:25.548078', 'step': 838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:25.609276', 'step': 838, 'epoch': 1}
{'type': 'loss', 'content': 0.07098450511693954, 'timestamp': '2025-10-02 00:12:25.616602', 'step': 839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:25.676770', 'step': 839, 'epoch': 1}
{'type': 'loss', 'content': 0.09052867442369461, 'timestamp': '2025-10-02 00:12:25.684911', 'step': 840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:25.738644', 'step': 840, 'epoch': 1}
{'type': 'loss', 'content': 0.18324050307273865, 'timestamp': '2025-10-02 00:12:25.742039', 'step': 841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:25.801311', 'step': 841, 'epoch': 1}
{'type': 'loss', 'content': 0.06501784920692444, 'timestamp': '2025-10-02 00:12:25.810691', 'step': 842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:25.867999', 'step': 842, 'epoch': 1}
{'type': 'loss', 'content': 0.25073012709617615, 'timestamp': '2025-10-02 00:12:25.873748', 'step': 843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:25.929061', 'step': 843, 'epoch': 1}
{'type': 'loss', 'content': 0.06296172738075256, 'timestamp': '2025-10-02 00:12:25.937383', 'step': 844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:25.992760', 'step': 844, 'epoch': 1}
{'type': 'loss', 'content': 0.017711812630295753, 'timestamp': '2025-10-02 00:12:25.998896', 'step': 845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:26.061986', 'step': 845, 'epoch': 1}
{'type': 'loss', 'content': 0.050647035241127014, 'timestamp': '2025-10-02 00:12:26.072540', 'step': 846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:26.141292', 'step': 846, 'epoch': 1}
{'type': 'loss', 'content': 0.055374037474393845, 'timestamp': '2025-10-02 00:12:26.148877', 'step': 847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:26.206581', 'step': 847, 'epoch': 1}
{'type': 'loss', 'content': 0.22243548929691315, 'timestamp': '2025-10-02 00:12:26.214287', 'step': 848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:26.284875', 'step': 848, 'epoch': 1}
{'type': 'loss', 'content': 0.10505408048629761, 'timestamp': '2025-10-02 00:12:26.293329', 'step': 849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:26.363039', 'step': 849, 'epoch': 1}
{'type': 'loss', 'content': 0.08897357434034348, 'timestamp': '2025-10-02 00:12:26.372938', 'step': 850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:26.444089', 'step': 850, 'epoch': 1}
{'type': 'loss', 'content': 0.10243653506040573, 'timestamp': '2025-10-02 00:12:26.446489', 'step': 851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:26.511098', 'step': 851, 'epoch': 1}
{'type': 'loss', 'content': 0.14998364448547363, 'timestamp': '2025-10-02 00:12:26.517850', 'step': 852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:26.572572', 'step': 852, 'epoch': 1}
{'type': 'loss', 'content': 0.10419193655252457, 'timestamp': '2025-10-02 00:12:26.578318', 'step': 853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:26.638225', 'step': 853, 'epoch': 1}
{'type': 'loss', 'content': 0.11907869577407837, 'timestamp': '2025-10-02 00:12:26.641870', 'step': 854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:26.710933', 'step': 854, 'epoch': 1}
{'type': 'loss', 'content': 0.06575936824083328, 'timestamp': '2025-10-02 00:12:26.715431', 'step': 855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:26.776946', 'step': 855, 'epoch': 1}
{'type': 'loss', 'content': 0.22937586903572083, 'timestamp': '2025-10-02 00:12:26.783472', 'step': 856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:26.838721', 'step': 856, 'epoch': 1}
{'type': 'loss', 'content': 0.05849626660346985, 'timestamp': '2025-10-02 00:12:26.844629', 'step': 857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:26.907816', 'step': 857, 'epoch': 1}
{'type': 'loss', 'content': 0.11951285600662231, 'timestamp': '2025-10-02 00:12:26.910777', 'step': 858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:26.966817', 'step': 858, 'epoch': 1}
{'type': 'loss', 'content': 0.04791993275284767, 'timestamp': '2025-10-02 00:12:26.969806', 'step': 859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:27.037653', 'step': 859, 'epoch': 1}
{'type': 'loss', 'content': 0.04165647551417351, 'timestamp': '2025-10-02 00:12:27.044853', 'step': 860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:27.112359', 'step': 860, 'epoch': 1}
{'type': 'loss', 'content': 0.25892728567123413, 'timestamp': '2025-10-02 00:12:27.120878', 'step': 861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:27.185450', 'step': 861, 'epoch': 1}
{'type': 'loss', 'content': 0.06409042328596115, 'timestamp': '2025-10-02 00:12:27.194236', 'step': 862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:27.271923', 'step': 862, 'epoch': 1}
{'type': 'loss', 'content': 0.042692169547080994, 'timestamp': '2025-10-02 00:12:27.280487', 'step': 863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:27.356477', 'step': 863, 'epoch': 1}
{'type': 'loss', 'content': 0.13144086301326752, 'timestamp': '2025-10-02 00:12:27.368576', 'step': 864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:27.432491', 'step': 864, 'epoch': 1}
{'type': 'loss', 'content': 0.16398383677005768, 'timestamp': '2025-10-02 00:12:27.435332', 'step': 865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:27.498583', 'step': 865, 'epoch': 1}
{'type': 'loss', 'content': 0.04355151206254959, 'timestamp': '2025-10-02 00:12:27.506165', 'step': 866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:27.576687', 'step': 866, 'epoch': 1}
{'type': 'loss', 'content': 0.2361249476671219, 'timestamp': '2025-10-02 00:12:27.584037', 'step': 867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:27.658196', 'step': 867, 'epoch': 1}
{'type': 'loss', 'content': 0.030002126470208168, 'timestamp': '2025-10-02 00:12:27.669441', 'step': 868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:27.733921', 'step': 868, 'epoch': 1}
{'type': 'loss', 'content': 0.1240885928273201, 'timestamp': '2025-10-02 00:12:27.737556', 'step': 869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:27.795926', 'step': 869, 'epoch': 1}
{'type': 'loss', 'content': 0.21755971014499664, 'timestamp': '2025-10-02 00:12:27.798951', 'step': 870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:27.858335', 'step': 870, 'epoch': 1}
{'type': 'loss', 'content': 0.09390908479690552, 'timestamp': '2025-10-02 00:12:27.867850', 'step': 871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:27.938660', 'step': 871, 'epoch': 1}
{'type': 'loss', 'content': 0.1032605990767479, 'timestamp': '2025-10-02 00:12:27.949601', 'step': 872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:28.010447', 'step': 872, 'epoch': 1}
{'type': 'loss', 'content': 0.06874869018793106, 'timestamp': '2025-10-02 00:12:28.016309', 'step': 873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:28.077264', 'step': 873, 'epoch': 1}
{'type': 'loss', 'content': 0.12821683287620544, 'timestamp': '2025-10-02 00:12:28.080230', 'step': 874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:28.143230', 'step': 874, 'epoch': 1}
{'type': 'loss', 'content': 0.09087321162223816, 'timestamp': '2025-10-02 00:12:28.149087', 'step': 875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:28.208565', 'step': 875, 'epoch': 1}
{'type': 'loss', 'content': 0.07019586116075516, 'timestamp': '2025-10-02 00:12:28.216237', 'step': 876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:28.273653', 'step': 876, 'epoch': 1}
{'type': 'loss', 'content': 0.17037823796272278, 'timestamp': '2025-10-02 00:12:28.281389', 'step': 877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:28.343119', 'step': 877, 'epoch': 1}
{'type': 'loss', 'content': 0.0688369944691658, 'timestamp': '2025-10-02 00:12:28.352486', 'step': 878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:28.415116', 'step': 878, 'epoch': 1}
{'type': 'loss', 'content': 0.2386600822210312, 'timestamp': '2025-10-02 00:12:28.418861', 'step': 879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:28.474757', 'step': 879, 'epoch': 1}
{'type': 'loss', 'content': 0.13248376548290253, 'timestamp': '2025-10-02 00:12:28.481159', 'step': 880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:28.536650', 'step': 880, 'epoch': 1}
{'type': 'loss', 'content': 0.07216126471757889, 'timestamp': '2025-10-02 00:12:28.542589', 'step': 881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:28.597647', 'step': 881, 'epoch': 1}
{'type': 'loss', 'content': 0.09915295243263245, 'timestamp': '2025-10-02 00:12:28.604107', 'step': 882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:28.660594', 'step': 882, 'epoch': 1}
{'type': 'loss', 'content': 0.10250035673379898, 'timestamp': '2025-10-02 00:12:28.663969', 'step': 883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:28.729509', 'step': 883, 'epoch': 1}
{'type': 'loss', 'content': 0.1043047234416008, 'timestamp': '2025-10-02 00:12:28.736560', 'step': 884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:28.800028', 'step': 884, 'epoch': 1}
{'type': 'loss', 'content': 0.06425026804208755, 'timestamp': '2025-10-02 00:12:28.803769', 'step': 885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:28.862244', 'step': 885, 'epoch': 1}
{'type': 'loss', 'content': 0.10420985519886017, 'timestamp': '2025-10-02 00:12:28.865479', 'step': 886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:12:28.923991', 'step': 886, 'epoch': 1}
{'type': 'loss', 'content': 0.17733211815357208, 'timestamp': '2025-10-02 00:12:28.927727', 'step': 887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:28.984896', 'step': 887, 'epoch': 1}
{'type': 'loss', 'content': 0.150129035115242, 'timestamp': '2025-10-02 00:12:28.992945', 'step': 888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:29.053433', 'step': 888, 'epoch': 1}
{'type': 'loss', 'content': 0.16740339994430542, 'timestamp': '2025-10-02 00:12:29.056881', 'step': 889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:29.114124', 'step': 889, 'epoch': 1}
{'type': 'loss', 'content': 0.0920652374625206, 'timestamp': '2025-10-02 00:12:29.117044', 'step': 890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:29.187485', 'step': 890, 'epoch': 1}
{'type': 'loss', 'content': 0.14927580952644348, 'timestamp': '2025-10-02 00:12:29.194470', 'step': 891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:29.262873', 'step': 891, 'epoch': 1}
{'type': 'loss', 'content': 0.03360224887728691, 'timestamp': '2025-10-02 00:12:29.274047', 'step': 892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:29.336946', 'step': 892, 'epoch': 1}
{'type': 'loss', 'content': 0.09633053839206696, 'timestamp': '2025-10-02 00:12:29.340998', 'step': 893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:29.399939', 'step': 893, 'epoch': 1}
{'type': 'loss', 'content': 0.04054170101881027, 'timestamp': '2025-10-02 00:12:29.409336', 'step': 894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:29.472212', 'step': 894, 'epoch': 1}
{'type': 'loss', 'content': 0.07776736468076706, 'timestamp': '2025-10-02 00:12:29.481756', 'step': 895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:29.537765', 'step': 895, 'epoch': 1}
{'type': 'loss', 'content': 0.05624210461974144, 'timestamp': '2025-10-02 00:12:29.544324', 'step': 896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:29.600561', 'step': 896, 'epoch': 1}
{'type': 'loss', 'content': 0.05561885982751846, 'timestamp': '2025-10-02 00:12:29.610305', 'step': 897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:29.665220', 'step': 897, 'epoch': 1}
{'type': 'loss', 'content': 0.08741828799247742, 'timestamp': '2025-10-02 00:12:29.668223', 'step': 898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:29.723169', 'step': 898, 'epoch': 1}
{'type': 'loss', 'content': 0.05896736681461334, 'timestamp': '2025-10-02 00:12:29.726729', 'step': 899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:29.786754', 'step': 899, 'epoch': 1}
{'type': 'loss', 'content': 0.09449686855077744, 'timestamp': '2025-10-02 00:12:29.794224', 'step': 900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:29.855671', 'step': 900, 'epoch': 1}
{'type': 'loss', 'content': 0.2394474744796753, 'timestamp': '2025-10-02 00:12:29.858332', 'step': 901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:29.913737', 'step': 901, 'epoch': 1}
{'type': 'loss', 'content': 0.1044061928987503, 'timestamp': '2025-10-02 00:12:29.923122', 'step': 902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:29.978828', 'step': 902, 'epoch': 1}
{'type': 'loss', 'content': 0.045519888401031494, 'timestamp': '2025-10-02 00:12:29.981119', 'step': 903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:30.036109', 'step': 903, 'epoch': 1}
{'type': 'loss', 'content': 0.05653504282236099, 'timestamp': '2025-10-02 00:12:30.041665', 'step': 904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:30.095965', 'step': 904, 'epoch': 1}
{'type': 'loss', 'content': 0.19129642844200134, 'timestamp': '2025-10-02 00:12:30.098165', 'step': 905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:30.154127', 'step': 905, 'epoch': 1}
{'type': 'loss', 'content': 0.05495869368314743, 'timestamp': '2025-10-02 00:12:30.161608', 'step': 906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:30.215790', 'step': 906, 'epoch': 1}
{'type': 'loss', 'content': 0.12521716952323914, 'timestamp': '2025-10-02 00:12:30.218557', 'step': 907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:30.275515', 'step': 907, 'epoch': 1}
{'type': 'loss', 'content': 0.08792457729578018, 'timestamp': '2025-10-02 00:12:30.281355', 'step': 908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:30.335480', 'step': 908, 'epoch': 1}
{'type': 'loss', 'content': 0.07669613510370255, 'timestamp': '2025-10-02 00:12:30.337752', 'step': 909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:30.392977', 'step': 909, 'epoch': 1}
{'type': 'loss', 'content': 0.05418825149536133, 'timestamp': '2025-10-02 00:12:30.400404', 'step': 910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:30.467665', 'step': 910, 'epoch': 1}
{'type': 'loss', 'content': 0.06266538798809052, 'timestamp': '2025-10-02 00:12:30.470221', 'step': 911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:30.534072', 'step': 911, 'epoch': 1}
{'type': 'loss', 'content': 0.08228221535682678, 'timestamp': '2025-10-02 00:12:30.541375', 'step': 912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:30.607244', 'step': 912, 'epoch': 1}
{'type': 'loss', 'content': 0.12199357151985168, 'timestamp': '2025-10-02 00:12:30.614257', 'step': 913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:30.673173', 'step': 913, 'epoch': 1}
{'type': 'loss', 'content': 0.08855020254850388, 'timestamp': '2025-10-02 00:12:30.677782', 'step': 914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:30.737732', 'step': 914, 'epoch': 1}
{'type': 'loss', 'content': 0.0322769470512867, 'timestamp': '2025-10-02 00:12:30.747080', 'step': 915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:30.804036', 'step': 915, 'epoch': 1}
{'type': 'loss', 'content': 0.057426728308200836, 'timestamp': '2025-10-02 00:12:30.814383', 'step': 916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:30.876210', 'step': 916, 'epoch': 1}
{'type': 'loss', 'content': 0.015615579672157764, 'timestamp': '2025-10-02 00:12:30.883848', 'step': 917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:30.942348', 'step': 917, 'epoch': 1}
{'type': 'loss', 'content': 0.053606294095516205, 'timestamp': '2025-10-02 00:12:30.948291', 'step': 918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:31.009643', 'step': 918, 'epoch': 1}
{'type': 'loss', 'content': 0.05389627814292908, 'timestamp': '2025-10-02 00:12:31.019881', 'step': 919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:31.075211', 'step': 919, 'epoch': 1}
{'type': 'loss', 'content': 0.06416338682174683, 'timestamp': '2025-10-02 00:12:31.082939', 'step': 920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:12:31.156446', 'step': 920, 'epoch': 1}
{'type': 'loss', 'content': 0.01829216443002224, 'timestamp': '2025-10-02 00:12:31.169873', 'step': 921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:31.226644', 'step': 921, 'epoch': 1}
{'type': 'loss', 'content': 0.13527874648571014, 'timestamp': '2025-10-02 00:12:31.229610', 'step': 922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:31.286772', 'step': 922, 'epoch': 1}
{'type': 'loss', 'content': 0.16609807312488556, 'timestamp': '2025-10-02 00:12:31.290634', 'step': 923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:31.346758', 'step': 923, 'epoch': 1}
{'type': 'loss', 'content': 0.032862238585948944, 'timestamp': '2025-10-02 00:12:31.353364', 'step': 924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:12:31.418138', 'step': 924, 'epoch': 1}
{'type': 'loss', 'content': 0.029565483331680298, 'timestamp': '2025-10-02 00:12:31.429889', 'step': 925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:31.486415', 'step': 925, 'epoch': 1}
{'type': 'loss', 'content': 0.07382716983556747, 'timestamp': '2025-10-02 00:12:31.489230', 'step': 926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:31.545683', 'step': 926, 'epoch': 1}
{'type': 'loss', 'content': 0.29078158736228943, 'timestamp': '2025-10-02 00:12:31.549144', 'step': 927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:31.605717', 'step': 927, 'epoch': 1}
{'type': 'loss', 'content': 0.20046864449977875, 'timestamp': '2025-10-02 00:12:31.612054', 'step': 928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:31.668222', 'step': 928, 'epoch': 1}
{'type': 'loss', 'content': 0.09654372930526733, 'timestamp': '2025-10-02 00:12:31.677605', 'step': 929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:31.740896', 'step': 929, 'epoch': 1}
{'type': 'loss', 'content': 0.09843925386667252, 'timestamp': '2025-10-02 00:12:31.743653', 'step': 930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:12:31.807940', 'step': 930, 'epoch': 1}
{'type': 'loss', 'content': 0.0631837546825409, 'timestamp': '2025-10-02 00:12:31.818802', 'step': 931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:31.879010', 'step': 931, 'epoch': 1}
{'type': 'loss', 'content': 0.1216505691409111, 'timestamp': '2025-10-02 00:12:31.885222', 'step': 932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:31.953945', 'step': 932, 'epoch': 1}
{'type': 'loss', 'content': 0.1631343513727188, 'timestamp': '2025-10-02 00:12:31.959620', 'step': 933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:32.025000', 'step': 933, 'epoch': 1}
{'type': 'loss', 'content': 0.035552773624658585, 'timestamp': '2025-10-02 00:12:32.027533', 'step': 934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:32.087353', 'step': 934, 'epoch': 1}
{'type': 'loss', 'content': 0.11835838854312897, 'timestamp': '2025-10-02 00:12:32.090763', 'step': 935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:32.149281', 'step': 935, 'epoch': 1}
{'type': 'loss', 'content': 0.05808024853467941, 'timestamp': '2025-10-02 00:12:32.156217', 'step': 936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 00:12:32.247326', 'step': 936, 'epoch': 1}
{'type': 'loss', 'content': 0.0547298826277256, 'timestamp': '2025-10-02 00:12:32.263612', 'step': 937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:32.318639', 'step': 937, 'epoch': 1}
{'type': 'loss', 'content': 0.1483301967382431, 'timestamp': '2025-10-02 00:12:32.321017', 'step': 938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:32.377838', 'step': 938, 'epoch': 1}
{'type': 'loss', 'content': 0.1088884174823761, 'timestamp': '2025-10-02 00:12:32.380346', 'step': 939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:32.439156', 'step': 939, 'epoch': 1}
{'type': 'loss', 'content': 0.10484002530574799, 'timestamp': '2025-10-02 00:12:32.450118', 'step': 940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:32.505822', 'step': 940, 'epoch': 1}
{'type': 'loss', 'content': 0.09341847151517868, 'timestamp': '2025-10-02 00:12:32.508803', 'step': 941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:32.563664', 'step': 941, 'epoch': 1}
{'type': 'loss', 'content': 0.15478770434856415, 'timestamp': '2025-10-02 00:12:32.566695', 'step': 942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:32.625043', 'step': 942, 'epoch': 1}
{'type': 'loss', 'content': 0.03643562272191048, 'timestamp': '2025-10-02 00:12:32.627676', 'step': 943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:32.686067', 'step': 943, 'epoch': 1}
{'type': 'loss', 'content': 0.0953947901725769, 'timestamp': '2025-10-02 00:12:32.696518', 'step': 944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:32.764343', 'step': 944, 'epoch': 1}
{'type': 'loss', 'content': 0.206705704331398, 'timestamp': '2025-10-02 00:12:32.769474', 'step': 945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:32.831001', 'step': 945, 'epoch': 1}
{'type': 'loss', 'content': 0.17270629107952118, 'timestamp': '2025-10-02 00:12:32.838693', 'step': 946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:32.894895', 'step': 946, 'epoch': 1}
{'type': 'loss', 'content': 0.07575096189975739, 'timestamp': '2025-10-02 00:12:32.902433', 'step': 947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:32.956783', 'step': 947, 'epoch': 1}
{'type': 'loss', 'content': 0.060978177934885025, 'timestamp': '2025-10-02 00:12:32.965267', 'step': 948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:33.021350', 'step': 948, 'epoch': 1}
{'type': 'loss', 'content': 0.06238032132387161, 'timestamp': '2025-10-02 00:12:33.030852', 'step': 949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:33.088702', 'step': 949, 'epoch': 1}
{'type': 'loss', 'content': 0.02103213593363762, 'timestamp': '2025-10-02 00:12:33.091739', 'step': 950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:33.159407', 'step': 950, 'epoch': 1}
{'type': 'loss', 'content': 0.037687480449676514, 'timestamp': '2025-10-02 00:12:33.170084', 'step': 951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:33.240653', 'step': 951, 'epoch': 1}
{'type': 'loss', 'content': 0.0508003793656826, 'timestamp': '2025-10-02 00:12:33.251557', 'step': 952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:33.314708', 'step': 952, 'epoch': 1}
{'type': 'loss', 'content': 0.0725318044424057, 'timestamp': '2025-10-02 00:12:33.318213', 'step': 953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:33.379805', 'step': 953, 'epoch': 1}
{'type': 'loss', 'content': 0.06989750266075134, 'timestamp': '2025-10-02 00:12:33.389938', 'step': 954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:12:33.462085', 'step': 954, 'epoch': 1}
{'type': 'loss', 'content': 0.023398779332637787, 'timestamp': '2025-10-02 00:12:33.474765', 'step': 955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:33.530795', 'step': 955, 'epoch': 1}
{'type': 'loss', 'content': 0.13240985572338104, 'timestamp': '2025-10-02 00:12:33.539856', 'step': 956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:33.602205', 'step': 956, 'epoch': 1}
{'type': 'loss', 'content': 0.03749210387468338, 'timestamp': '2025-10-02 00:12:33.613740', 'step': 957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:33.671794', 'step': 957, 'epoch': 1}
{'type': 'loss', 'content': 0.07617191225290298, 'timestamp': '2025-10-02 00:12:33.677430', 'step': 958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:33.733398', 'step': 958, 'epoch': 1}
{'type': 'loss', 'content': 0.11399252712726593, 'timestamp': '2025-10-02 00:12:33.742736', 'step': 959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:33.804647', 'step': 959, 'epoch': 1}
{'type': 'loss', 'content': 0.07962721586227417, 'timestamp': '2025-10-02 00:12:33.812025', 'step': 960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:33.868656', 'step': 960, 'epoch': 1}
{'type': 'loss', 'content': 0.06258106231689453, 'timestamp': '2025-10-02 00:12:33.874848', 'step': 961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:33.932811', 'step': 961, 'epoch': 1}
{'type': 'loss', 'content': 0.13605822622776031, 'timestamp': '2025-10-02 00:12:33.935701', 'step': 962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:33.993326', 'step': 962, 'epoch': 1}
{'type': 'loss', 'content': 0.06084099039435387, 'timestamp': '2025-10-02 00:12:33.995494', 'step': 963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:34.053473', 'step': 963, 'epoch': 1}
{'type': 'loss', 'content': 0.14636963605880737, 'timestamp': '2025-10-02 00:12:34.063619', 'step': 964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:34.119331', 'step': 964, 'epoch': 1}
{'type': 'loss', 'content': 0.05051610246300697, 'timestamp': '2025-10-02 00:12:34.122411', 'step': 965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:34.176425', 'step': 965, 'epoch': 1}
{'type': 'loss', 'content': 0.12367735058069229, 'timestamp': '2025-10-02 00:12:34.182289', 'step': 966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:34.242117', 'step': 966, 'epoch': 1}
{'type': 'loss', 'content': 0.2581261098384857, 'timestamp': '2025-10-02 00:12:34.246644', 'step': 967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:34.313195', 'step': 967, 'epoch': 1}
{'type': 'loss', 'content': 0.052449781447649, 'timestamp': '2025-10-02 00:12:34.324128', 'step': 968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:34.388328', 'step': 968, 'epoch': 1}
{'type': 'loss', 'content': 0.09147195518016815, 'timestamp': '2025-10-02 00:12:34.399273', 'step': 969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:34.456335', 'step': 969, 'epoch': 1}
{'type': 'loss', 'content': 0.07163772732019424, 'timestamp': '2025-10-02 00:12:34.458825', 'step': 970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:34.521683', 'step': 970, 'epoch': 1}
{'type': 'loss', 'content': 0.3130907118320465, 'timestamp': '2025-10-02 00:12:34.524083', 'step': 971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:34.583383', 'step': 971, 'epoch': 1}
{'type': 'loss', 'content': 0.023373989388346672, 'timestamp': '2025-10-02 00:12:34.590049', 'step': 972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:34.645412', 'step': 972, 'epoch': 1}
{'type': 'loss', 'content': 0.07678451389074326, 'timestamp': '2025-10-02 00:12:34.647477', 'step': 973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:34.701892', 'step': 973, 'epoch': 1}
{'type': 'loss', 'content': 0.12816931307315826, 'timestamp': '2025-10-02 00:12:34.709360', 'step': 974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:34.764607', 'step': 974, 'epoch': 1}
{'type': 'loss', 'content': 0.10337087512016296, 'timestamp': '2025-10-02 00:12:34.768048', 'step': 975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:34.824964', 'step': 975, 'epoch': 1}
{'type': 'loss', 'content': 0.2077898234128952, 'timestamp': '2025-10-02 00:12:34.833425', 'step': 976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:34.892596', 'step': 976, 'epoch': 1}
{'type': 'loss', 'content': 0.12944571673870087, 'timestamp': '2025-10-02 00:12:34.894823', 'step': 977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:34.954651', 'step': 977, 'epoch': 1}
{'type': 'loss', 'content': 0.035347770899534225, 'timestamp': '2025-10-02 00:12:34.962109', 'step': 978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:35.017128', 'step': 978, 'epoch': 1}
{'type': 'loss', 'content': 0.042838048189878464, 'timestamp': '2025-10-02 00:12:35.026645', 'step': 979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:35.083555', 'step': 979, 'epoch': 1}
{'type': 'loss', 'content': 0.05654262751340866, 'timestamp': '2025-10-02 00:12:35.089547', 'step': 980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:35.142823', 'step': 980, 'epoch': 1}
{'type': 'loss', 'content': 0.1814553588628769, 'timestamp': '2025-10-02 00:12:35.144880', 'step': 981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:35.205459', 'step': 981, 'epoch': 1}
{'type': 'loss', 'content': 0.021864889189600945, 'timestamp': '2025-10-02 00:12:35.215896', 'step': 982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:35.270562', 'step': 982, 'epoch': 1}
{'type': 'loss', 'content': 0.13370569050312042, 'timestamp': '2025-10-02 00:12:35.272707', 'step': 983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:35.326417', 'step': 983, 'epoch': 1}
{'type': 'loss', 'content': 0.10297641158103943, 'timestamp': '2025-10-02 00:12:35.332037', 'step': 984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:35.385397', 'step': 984, 'epoch': 1}
{'type': 'loss', 'content': 0.0662555992603302, 'timestamp': '2025-10-02 00:12:35.387755', 'step': 985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:35.448482', 'step': 985, 'epoch': 1}
{'type': 'loss', 'content': 0.021635696291923523, 'timestamp': '2025-10-02 00:12:35.458990', 'step': 986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:35.514482', 'step': 986, 'epoch': 1}
{'type': 'loss', 'content': 0.17731907963752747, 'timestamp': '2025-10-02 00:12:35.516683', 'step': 987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:35.570701', 'step': 987, 'epoch': 1}
{'type': 'loss', 'content': 0.10849960148334503, 'timestamp': '2025-10-02 00:12:35.576690', 'step': 988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:35.630643', 'step': 988, 'epoch': 1}
{'type': 'loss', 'content': 0.028471099212765694, 'timestamp': '2025-10-02 00:12:35.640079', 'step': 989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:35.696023', 'step': 989, 'epoch': 1}
{'type': 'loss', 'content': 0.06051371246576309, 'timestamp': '2025-10-02 00:12:35.698000', 'step': 990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:35.750943', 'step': 990, 'epoch': 1}
{'type': 'loss', 'content': 0.20010459423065186, 'timestamp': '2025-10-02 00:12:35.753606', 'step': 991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:35.808228', 'step': 991, 'epoch': 1}
{'type': 'loss', 'content': 0.09903808683156967, 'timestamp': '2025-10-02 00:12:35.818589', 'step': 992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:35.872330', 'step': 992, 'epoch': 1}
{'type': 'loss', 'content': 0.06899985671043396, 'timestamp': '2025-10-02 00:12:35.875734', 'step': 993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:35.933638', 'step': 993, 'epoch': 1}
{'type': 'loss', 'content': 0.0978962779045105, 'timestamp': '2025-10-02 00:12:35.935626', 'step': 994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:35.990369', 'step': 994, 'epoch': 1}
{'type': 'loss', 'content': 0.0908067375421524, 'timestamp': '2025-10-02 00:12:35.999941', 'step': 995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:36.054058', 'step': 995, 'epoch': 1}
{'type': 'loss', 'content': 0.21498261392116547, 'timestamp': '2025-10-02 00:12:36.059862', 'step': 996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:36.113864', 'step': 996, 'epoch': 1}
{'type': 'loss', 'content': 0.031267449259757996, 'timestamp': '2025-10-02 00:12:36.123425', 'step': 997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:36.178227', 'step': 997, 'epoch': 1}
{'type': 'loss', 'content': 0.02449251525104046, 'timestamp': '2025-10-02 00:12:36.187615', 'step': 998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:12:36.254486', 'step': 998, 'epoch': 1}
{'type': 'loss', 'content': 0.09374435991048813, 'timestamp': '2025-10-02 00:12:36.266461', 'step': 999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:36.320890', 'step': 999, 'epoch': 1}
{'type': 'loss', 'content': 0.06903714686632156, 'timestamp': '2025-10-02 00:12:36.326764', 'step': 1000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-10-02 00:12:36.793166', 'step': 1000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:36.850171', 'step': 1000, 'epoch': 1}
{'type': 'loss', 'content': 0.03845436871051788, 'timestamp': '2025-10-02 00:12:36.853205', 'step': 1001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:36.909877', 'step': 1001, 'epoch': 1}
{'type': 'loss', 'content': 0.16403797268867493, 'timestamp': '2025-10-02 00:12:36.912684', 'step': 1002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:36.966404', 'step': 1002, 'epoch': 1}
{'type': 'loss', 'content': 0.11953415721654892, 'timestamp': '2025-10-02 00:12:36.973548', 'step': 1003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:37.028473', 'step': 1003, 'epoch': 1}
{'type': 'loss', 'content': 0.15899790823459625, 'timestamp': '2025-10-02 00:12:37.034479', 'step': 1004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:37.088464', 'step': 1004, 'epoch': 1}
{'type': 'loss', 'content': 0.1015637069940567, 'timestamp': '2025-10-02 00:12:37.098691', 'step': 1005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:37.153128', 'step': 1005, 'epoch': 1}
{'type': 'loss', 'content': 0.08417283743619919, 'timestamp': '2025-10-02 00:12:37.158906', 'step': 1006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:37.213337', 'step': 1006, 'epoch': 1}
{'type': 'loss', 'content': 0.06446228176355362, 'timestamp': '2025-10-02 00:12:37.219184', 'step': 1007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:37.277502', 'step': 1007, 'epoch': 1}
{'type': 'loss', 'content': 0.12169712781906128, 'timestamp': '2025-10-02 00:12:37.288465', 'step': 1008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:37.342378', 'step': 1008, 'epoch': 1}
{'type': 'loss', 'content': 0.0909411832690239, 'timestamp': '2025-10-02 00:12:37.352535', 'step': 1009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:37.406476', 'step': 1009, 'epoch': 1}
{'type': 'loss', 'content': 0.07997462153434753, 'timestamp': '2025-10-02 00:12:37.413976', 'step': 1010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:37.468270', 'step': 1010, 'epoch': 1}
{'type': 'loss', 'content': 0.11987603455781937, 'timestamp': '2025-10-02 00:12:37.470518', 'step': 1011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:37.524372', 'step': 1011, 'epoch': 1}
{'type': 'loss', 'content': 0.16459570825099945, 'timestamp': '2025-10-02 00:12:37.530634', 'step': 1012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:37.583861', 'step': 1012, 'epoch': 1}
{'type': 'loss', 'content': 0.11507415026426315, 'timestamp': '2025-10-02 00:12:37.593506', 'step': 1013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:37.647587', 'step': 1013, 'epoch': 1}
{'type': 'loss', 'content': 0.09116880595684052, 'timestamp': '2025-10-02 00:12:37.649794', 'step': 1014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:37.703760', 'step': 1014, 'epoch': 1}
{'type': 'loss', 'content': 0.09494858235120773, 'timestamp': '2025-10-02 00:12:37.706643', 'step': 1015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:37.759959', 'step': 1015, 'epoch': 1}
{'type': 'loss', 'content': 0.17297376692295074, 'timestamp': '2025-10-02 00:12:37.766562', 'step': 1016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:37.820848', 'step': 1016, 'epoch': 1}
{'type': 'loss', 'content': 0.04523157328367233, 'timestamp': '2025-10-02 00:12:37.831120', 'step': 1017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:37.885147', 'step': 1017, 'epoch': 1}
{'type': 'loss', 'content': 0.11304184049367905, 'timestamp': '2025-10-02 00:12:37.888592', 'step': 1018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:37.946382', 'step': 1018, 'epoch': 1}
{'type': 'loss', 'content': 0.1214086040854454, 'timestamp': '2025-10-02 00:12:37.948750', 'step': 1019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:12:38.019733', 'step': 1019, 'epoch': 1}
{'type': 'loss', 'content': 0.055723194032907486, 'timestamp': '2025-10-02 00:12:38.033172', 'step': 1020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:38.086464', 'step': 1020, 'epoch': 1}
{'type': 'loss', 'content': 0.07542096823453903, 'timestamp': '2025-10-02 00:12:38.088839', 'step': 1021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:38.143465', 'step': 1021, 'epoch': 1}
{'type': 'loss', 'content': 0.05539580062031746, 'timestamp': '2025-10-02 00:12:38.153024', 'step': 1022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:38.207494', 'step': 1022, 'epoch': 1}
{'type': 'loss', 'content': 0.1567392647266388, 'timestamp': '2025-10-02 00:12:38.209571', 'step': 1023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:38.264292', 'step': 1023, 'epoch': 1}
{'type': 'loss', 'content': 0.0585525780916214, 'timestamp': '2025-10-02 00:12:38.270020', 'step': 1024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:38.330634', 'step': 1024, 'epoch': 1}
{'type': 'loss', 'content': 0.0956869125366211, 'timestamp': '2025-10-02 00:12:38.342161', 'step': 1025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:38.396043', 'step': 1025, 'epoch': 1}
{'type': 'loss', 'content': 0.1656893640756607, 'timestamp': '2025-10-02 00:12:38.403542', 'step': 1026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:38.459322', 'step': 1026, 'epoch': 1}
{'type': 'loss', 'content': 0.1050058975815773, 'timestamp': '2025-10-02 00:12:38.461431', 'step': 1027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:38.515834', 'step': 1027, 'epoch': 1}
{'type': 'loss', 'content': 0.16538682579994202, 'timestamp': '2025-10-02 00:12:38.522445', 'step': 1028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:38.575869', 'step': 1028, 'epoch': 1}
{'type': 'loss', 'content': 0.1947387456893921, 'timestamp': '2025-10-02 00:12:38.578435', 'step': 1029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:38.632649', 'step': 1029, 'epoch': 1}
{'type': 'loss', 'content': 0.11264628916978836, 'timestamp': '2025-10-02 00:12:38.640154', 'step': 1030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:38.693373', 'step': 1030, 'epoch': 1}
{'type': 'loss', 'content': 0.24591799080371857, 'timestamp': '2025-10-02 00:12:38.695530', 'step': 1031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:38.750031', 'step': 1031, 'epoch': 1}
{'type': 'loss', 'content': 0.057497840374708176, 'timestamp': '2025-10-02 00:12:38.760359', 'step': 1032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:38.813975', 'step': 1032, 'epoch': 1}
{'type': 'loss', 'content': 0.1019648090004921, 'timestamp': '2025-10-02 00:12:38.816062', 'step': 1033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:38.871888', 'step': 1033, 'epoch': 1}
{'type': 'loss', 'content': 0.0333288200199604, 'timestamp': '2025-10-02 00:12:38.874390', 'step': 1034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:38.927958', 'step': 1034, 'epoch': 1}
{'type': 'loss', 'content': 0.13512451946735382, 'timestamp': '2025-10-02 00:12:38.930270', 'step': 1035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:38.984193', 'step': 1035, 'epoch': 1}
{'type': 'loss', 'content': 0.11475028842687607, 'timestamp': '2025-10-02 00:12:38.994324', 'step': 1036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:39.055640', 'step': 1036, 'epoch': 1}
{'type': 'loss', 'content': 0.01415946613997221, 'timestamp': '2025-10-02 00:12:39.067176', 'step': 1037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:39.125851', 'step': 1037, 'epoch': 1}
{'type': 'loss', 'content': 0.28427359461784363, 'timestamp': '2025-10-02 00:12:39.127963', 'step': 1038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:39.181486', 'step': 1038, 'epoch': 1}
{'type': 'loss', 'content': 0.07475222647190094, 'timestamp': '2025-10-02 00:12:39.190844', 'step': 1039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:39.245005', 'step': 1039, 'epoch': 1}
{'type': 'loss', 'content': 0.11340086907148361, 'timestamp': '2025-10-02 00:12:39.253328', 'step': 1040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:39.310649', 'step': 1040, 'epoch': 1}
{'type': 'loss', 'content': 0.11763283610343933, 'timestamp': '2025-10-02 00:12:39.312966', 'step': 1041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:39.366865', 'step': 1041, 'epoch': 1}
{'type': 'loss', 'content': 0.10895262658596039, 'timestamp': '2025-10-02 00:12:39.371455', 'step': 1042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:39.425495', 'step': 1042, 'epoch': 1}
{'type': 'loss', 'content': 0.17200739681720734, 'timestamp': '2025-10-02 00:12:39.427517', 'step': 1043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:39.482127', 'step': 1043, 'epoch': 1}
{'type': 'loss', 'content': 0.15935276448726654, 'timestamp': '2025-10-02 00:12:39.492465', 'step': 1044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:39.550106', 'step': 1044, 'epoch': 1}
{'type': 'loss', 'content': 0.2089594602584839, 'timestamp': '2025-10-02 00:12:39.556054', 'step': 1045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:39.614360', 'step': 1045, 'epoch': 1}
{'type': 'loss', 'content': 0.08832994103431702, 'timestamp': '2025-10-02 00:12:39.616576', 'step': 1046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:39.677365', 'step': 1046, 'epoch': 1}
{'type': 'loss', 'content': 0.08087664097547531, 'timestamp': '2025-10-02 00:12:39.682240', 'step': 1047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:39.742371', 'step': 1047, 'epoch': 1}
{'type': 'loss', 'content': 0.10374030470848083, 'timestamp': '2025-10-02 00:12:39.749537', 'step': 1048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:39.807962', 'step': 1048, 'epoch': 1}
{'type': 'loss', 'content': 0.15413253009319305, 'timestamp': '2025-10-02 00:12:39.815321', 'step': 1049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:39.877819', 'step': 1049, 'epoch': 1}
{'type': 'loss', 'content': 0.07613886892795563, 'timestamp': '2025-10-02 00:12:39.879988', 'step': 1050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:39.939900', 'step': 1050, 'epoch': 1}
{'type': 'loss', 'content': 0.07750935852527618, 'timestamp': '2025-10-02 00:12:39.944373', 'step': 1051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:40.004886', 'step': 1051, 'epoch': 1}
{'type': 'loss', 'content': 0.08283770084381104, 'timestamp': '2025-10-02 00:12:40.012145', 'step': 1052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:40.070715', 'step': 1052, 'epoch': 1}
{'type': 'loss', 'content': 0.20050278306007385, 'timestamp': '2025-10-02 00:12:40.072824', 'step': 1053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:40.140854', 'step': 1053, 'epoch': 1}
{'type': 'loss', 'content': 0.17824453115463257, 'timestamp': '2025-10-02 00:12:40.142616', 'step': 1054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:40.197196', 'step': 1054, 'epoch': 1}
{'type': 'loss', 'content': 0.24362775683403015, 'timestamp': '2025-10-02 00:12:40.202091', 'step': 1055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:40.272292', 'step': 1055, 'epoch': 1}
{'type': 'loss', 'content': 0.05967070907354355, 'timestamp': '2025-10-02 00:12:40.283248', 'step': 1056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:40.344191', 'step': 1056, 'epoch': 1}
{'type': 'loss', 'content': 0.06667263060808182, 'timestamp': '2025-10-02 00:12:40.355185', 'step': 1057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:40.408643', 'step': 1057, 'epoch': 1}
{'type': 'loss', 'content': 0.07985546439886093, 'timestamp': '2025-10-02 00:12:40.411559', 'step': 1058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:40.465648', 'step': 1058, 'epoch': 1}
{'type': 'loss', 'content': 0.07613354921340942, 'timestamp': '2025-10-02 00:12:40.467740', 'step': 1059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:40.521421', 'step': 1059, 'epoch': 1}
{'type': 'loss', 'content': 0.13290658593177795, 'timestamp': '2025-10-02 00:12:40.527624', 'step': 1060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:40.580644', 'step': 1060, 'epoch': 1}
{'type': 'loss', 'content': 0.10225431621074677, 'timestamp': '2025-10-02 00:12:40.590199', 'step': 1061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:40.644683', 'step': 1061, 'epoch': 1}
{'type': 'loss', 'content': 0.11672902852296829, 'timestamp': '2025-10-02 00:12:40.646827', 'step': 1062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:40.701365', 'step': 1062, 'epoch': 1}
{'type': 'loss', 'content': 0.06513053923845291, 'timestamp': '2025-10-02 00:12:40.703537', 'step': 1063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:40.757813', 'step': 1063, 'epoch': 1}
{'type': 'loss', 'content': 0.033563971519470215, 'timestamp': '2025-10-02 00:12:40.763841', 'step': 1064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:40.817413', 'step': 1064, 'epoch': 1}
{'type': 'loss', 'content': 0.09642753005027771, 'timestamp': '2025-10-02 00:12:40.823120', 'step': 1065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:40.880366', 'step': 1065, 'epoch': 1}
{'type': 'loss', 'content': 0.052572786808013916, 'timestamp': '2025-10-02 00:12:40.882489', 'step': 1066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:40.936665', 'step': 1066, 'epoch': 1}
{'type': 'loss', 'content': 0.12862612307071686, 'timestamp': '2025-10-02 00:12:40.938705', 'step': 1067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:40.991882', 'step': 1067, 'epoch': 1}
{'type': 'loss', 'content': 0.13877469301223755, 'timestamp': '2025-10-02 00:12:40.998771', 'step': 1068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:41.060742', 'step': 1068, 'epoch': 1}
{'type': 'loss', 'content': 0.0740421861410141, 'timestamp': '2025-10-02 00:12:41.072142', 'step': 1069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:41.131294', 'step': 1069, 'epoch': 1}
{'type': 'loss', 'content': 0.1075209304690361, 'timestamp': '2025-10-02 00:12:41.135304', 'step': 1070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:41.193758', 'step': 1070, 'epoch': 1}
{'type': 'loss', 'content': 0.0985269844532013, 'timestamp': '2025-10-02 00:12:41.196971', 'step': 1071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:41.254210', 'step': 1071, 'epoch': 1}
{'type': 'loss', 'content': 0.18126928806304932, 'timestamp': '2025-10-02 00:12:41.261147', 'step': 1072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:41.317661', 'step': 1072, 'epoch': 1}
{'type': 'loss', 'content': 0.12507854402065277, 'timestamp': '2025-10-02 00:12:41.320401', 'step': 1073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:41.376707', 'step': 1073, 'epoch': 1}
{'type': 'loss', 'content': 0.024224841967225075, 'timestamp': '2025-10-02 00:12:41.383385', 'step': 1074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:41.438788', 'step': 1074, 'epoch': 1}
{'type': 'loss', 'content': 0.08986075222492218, 'timestamp': '2025-10-02 00:12:41.441182', 'step': 1075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:41.496639', 'step': 1075, 'epoch': 1}
{'type': 'loss', 'content': 0.031734295189380646, 'timestamp': '2025-10-02 00:12:41.503861', 'step': 1076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:41.559695', 'step': 1076, 'epoch': 1}
{'type': 'loss', 'content': 0.25357937812805176, 'timestamp': '2025-10-02 00:12:41.563161', 'step': 1077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:41.626498', 'step': 1077, 'epoch': 1}
{'type': 'loss', 'content': 0.0647306889295578, 'timestamp': '2025-10-02 00:12:41.636884', 'step': 1078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:41.693144', 'step': 1078, 'epoch': 1}
{'type': 'loss', 'content': 0.13441820442676544, 'timestamp': '2025-10-02 00:12:41.695518', 'step': 1079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:41.756303', 'step': 1079, 'epoch': 1}
{'type': 'loss', 'content': 0.08379556238651276, 'timestamp': '2025-10-02 00:12:41.767515', 'step': 1080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:41.824355', 'step': 1080, 'epoch': 1}
{'type': 'loss', 'content': 0.05273735150694847, 'timestamp': '2025-10-02 00:12:41.832414', 'step': 1081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:41.888900', 'step': 1081, 'epoch': 1}
{'type': 'loss', 'content': 0.04192623868584633, 'timestamp': '2025-10-02 00:12:41.895914', 'step': 1082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:41.962113', 'step': 1082, 'epoch': 1}
{'type': 'loss', 'content': 0.06014329567551613, 'timestamp': '2025-10-02 00:12:41.972711', 'step': 1083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:12:42.028342', 'step': 1083, 'epoch': 1}
{'type': 'loss', 'content': 0.2018153965473175, 'timestamp': '2025-10-02 00:12:42.035190', 'step': 1084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:42.089573', 'step': 1084, 'epoch': 1}
{'type': 'loss', 'content': 0.15048258006572723, 'timestamp': '2025-10-02 00:12:42.092380', 'step': 1085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:42.148049', 'step': 1085, 'epoch': 1}
{'type': 'loss', 'content': 0.1487770825624466, 'timestamp': '2025-10-02 00:12:42.150910', 'step': 1086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:42.207181', 'step': 1086, 'epoch': 1}
{'type': 'loss', 'content': 0.020182432606816292, 'timestamp': '2025-10-02 00:12:42.216243', 'step': 1087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:42.273207', 'step': 1087, 'epoch': 1}
{'type': 'loss', 'content': 0.046759359538555145, 'timestamp': '2025-10-02 00:12:42.281119', 'step': 1088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:42.335914', 'step': 1088, 'epoch': 1}
{'type': 'loss', 'content': 0.11589797586202621, 'timestamp': '2025-10-02 00:12:42.338868', 'step': 1089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:42.394256', 'step': 1089, 'epoch': 1}
{'type': 'loss', 'content': 0.18101581931114197, 'timestamp': '2025-10-02 00:12:42.397118', 'step': 1090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:42.460905', 'step': 1090, 'epoch': 1}
{'type': 'loss', 'content': 0.008092351257801056, 'timestamp': '2025-10-02 00:12:42.471548', 'step': 1091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:42.526978', 'step': 1091, 'epoch': 1}
{'type': 'loss', 'content': 0.15979836881160736, 'timestamp': '2025-10-02 00:12:42.533659', 'step': 1092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:42.589531', 'step': 1092, 'epoch': 1}
{'type': 'loss', 'content': 0.10474216192960739, 'timestamp': '2025-10-02 00:12:42.592391', 'step': 1093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:42.647561', 'step': 1093, 'epoch': 1}
{'type': 'loss', 'content': 0.08774765580892563, 'timestamp': '2025-10-02 00:12:42.656557', 'step': 1094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:42.713033', 'step': 1094, 'epoch': 1}
{'type': 'loss', 'content': 0.2245122194290161, 'timestamp': '2025-10-02 00:12:42.715716', 'step': 1095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:42.770388', 'step': 1095, 'epoch': 1}
{'type': 'loss', 'content': 0.14546231925487518, 'timestamp': '2025-10-02 00:12:42.777648', 'step': 1096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:42.832229', 'step': 1096, 'epoch': 1}
{'type': 'loss', 'content': 0.2836681008338928, 'timestamp': '2025-10-02 00:12:42.834953', 'step': 1097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:42.891043', 'step': 1097, 'epoch': 1}
{'type': 'loss', 'content': 0.0575113482773304, 'timestamp': '2025-10-02 00:12:42.894021', 'step': 1098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:42.949912', 'step': 1098, 'epoch': 1}
{'type': 'loss', 'content': 0.21496276557445526, 'timestamp': '2025-10-02 00:12:42.951927', 'step': 1099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:43.007894', 'step': 1099, 'epoch': 1}
{'type': 'loss', 'content': 0.071290023624897, 'timestamp': '2025-10-02 00:12:43.014545', 'step': 1100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:43.067465', 'step': 1100, 'epoch': 1}
{'type': 'loss', 'content': 0.14063425362110138, 'timestamp': '2025-10-02 00:12:43.069865', 'step': 1101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:43.124591', 'step': 1101, 'epoch': 1}
{'type': 'loss', 'content': 0.15612980723381042, 'timestamp': '2025-10-02 00:12:43.126807', 'step': 1102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:43.180384', 'step': 1102, 'epoch': 1}
{'type': 'loss', 'content': 0.10530688613653183, 'timestamp': '2025-10-02 00:12:43.182679', 'step': 1103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:43.238811', 'step': 1103, 'epoch': 1}
{'type': 'loss', 'content': 0.08593184500932693, 'timestamp': '2025-10-02 00:12:43.244863', 'step': 1104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:43.298289', 'step': 1104, 'epoch': 1}
{'type': 'loss', 'content': 0.11515951156616211, 'timestamp': '2025-10-02 00:12:43.304307', 'step': 1105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:43.357555', 'step': 1105, 'epoch': 1}
{'type': 'loss', 'content': 0.17852875590324402, 'timestamp': '2025-10-02 00:12:43.359801', 'step': 1106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:12:43.436484', 'step': 1106, 'epoch': 1}
{'type': 'loss', 'content': 0.01129309181123972, 'timestamp': '2025-10-02 00:12:43.450308', 'step': 1107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:43.504337', 'step': 1107, 'epoch': 1}
{'type': 'loss', 'content': 0.07615511864423752, 'timestamp': '2025-10-02 00:12:43.510436', 'step': 1108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:43.563251', 'step': 1108, 'epoch': 1}
{'type': 'loss', 'content': 0.10849736630916595, 'timestamp': '2025-10-02 00:12:43.570761', 'step': 1109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:43.624479', 'step': 1109, 'epoch': 1}
{'type': 'loss', 'content': 0.05315650999546051, 'timestamp': '2025-10-02 00:12:43.631865', 'step': 1110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:12:43.699351', 'step': 1110, 'epoch': 1}
{'type': 'loss', 'content': 0.08825815469026566, 'timestamp': '2025-10-02 00:12:43.711256', 'step': 1111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:43.766133', 'step': 1111, 'epoch': 1}
{'type': 'loss', 'content': 0.10634847730398178, 'timestamp': '2025-10-02 00:12:43.772833', 'step': 1112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:43.826709', 'step': 1112, 'epoch': 1}
{'type': 'loss', 'content': 0.1534326672554016, 'timestamp': '2025-10-02 00:12:43.834299', 'step': 1113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:43.888436', 'step': 1113, 'epoch': 1}
{'type': 'loss', 'content': 0.0785663053393364, 'timestamp': '2025-10-02 00:12:43.890430', 'step': 1114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:43.943775', 'step': 1114, 'epoch': 1}
{'type': 'loss', 'content': 0.21069495379924774, 'timestamp': '2025-10-02 00:12:43.946045', 'step': 1115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:43.999474', 'step': 1115, 'epoch': 1}
{'type': 'loss', 'content': 0.1276475042104721, 'timestamp': '2025-10-02 00:12:44.005223', 'step': 1116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:44.058071', 'step': 1116, 'epoch': 1}
{'type': 'loss', 'content': 0.1027795746922493, 'timestamp': '2025-10-02 00:12:44.060103', 'step': 1117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:44.113981', 'step': 1117, 'epoch': 1}
{'type': 'loss', 'content': 0.033060330897569656, 'timestamp': '2025-10-02 00:12:44.116378', 'step': 1118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:44.171010', 'step': 1118, 'epoch': 1}
{'type': 'loss', 'content': 0.07699170708656311, 'timestamp': '2025-10-02 00:12:44.180584', 'step': 1119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:44.234158', 'step': 1119, 'epoch': 1}
{'type': 'loss', 'content': 0.0496843084692955, 'timestamp': '2025-10-02 00:12:44.240009', 'step': 1120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:44.293070', 'step': 1120, 'epoch': 1}
{'type': 'loss', 'content': 0.09099988639354706, 'timestamp': '2025-10-02 00:12:44.295290', 'step': 1121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:44.350317', 'step': 1121, 'epoch': 1}
{'type': 'loss', 'content': 0.07187862694263458, 'timestamp': '2025-10-02 00:12:44.352781', 'step': 1122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:44.408487', 'step': 1122, 'epoch': 1}
{'type': 'loss', 'content': 0.029502954334020615, 'timestamp': '2025-10-02 00:12:44.418093', 'step': 1123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:44.471808', 'step': 1123, 'epoch': 1}
{'type': 'loss', 'content': 0.1763055920600891, 'timestamp': '2025-10-02 00:12:44.477415', 'step': 1124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:12:44.542984', 'step': 1124, 'epoch': 1}
{'type': 'loss', 'content': 0.03943663835525513, 'timestamp': '2025-10-02 00:12:44.555977', 'step': 1125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:44.617340', 'step': 1125, 'epoch': 1}
{'type': 'loss', 'content': 0.03492545709013939, 'timestamp': '2025-10-02 00:12:44.627778', 'step': 1126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:44.683645', 'step': 1126, 'epoch': 1}
{'type': 'loss', 'content': 0.04478246346116066, 'timestamp': '2025-10-02 00:12:44.693206', 'step': 1127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:44.747895', 'step': 1127, 'epoch': 1}
{'type': 'loss', 'content': 0.07036517560482025, 'timestamp': '2025-10-02 00:12:44.753847', 'step': 1128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:12:44.821533', 'step': 1128, 'epoch': 1}
{'type': 'loss', 'content': 0.05875089764595032, 'timestamp': '2025-10-02 00:12:44.835083', 'step': 1129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:44.889482', 'step': 1129, 'epoch': 1}
{'type': 'loss', 'content': 0.12673938274383545, 'timestamp': '2025-10-02 00:12:44.891581', 'step': 1130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:44.949873', 'step': 1130, 'epoch': 1}
{'type': 'loss', 'content': 0.034848522394895554, 'timestamp': '2025-10-02 00:12:44.960080', 'step': 1131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:45.013469', 'step': 1131, 'epoch': 1}
{'type': 'loss', 'content': 0.1710984706878662, 'timestamp': '2025-10-02 00:12:45.019179', 'step': 1132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:45.079334', 'step': 1132, 'epoch': 1}
{'type': 'loss', 'content': 0.05903981998562813, 'timestamp': '2025-10-02 00:12:45.090673', 'step': 1133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:12:45.159828', 'step': 1133, 'epoch': 1}
{'type': 'loss', 'content': 0.029889022931456566, 'timestamp': '2025-10-02 00:12:45.172279', 'step': 1134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:45.231186', 'step': 1134, 'epoch': 1}
{'type': 'loss', 'content': 0.06610292196273804, 'timestamp': '2025-10-02 00:12:45.241399', 'step': 1135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:45.294897', 'step': 1135, 'epoch': 1}
{'type': 'loss', 'content': 0.14520864188671112, 'timestamp': '2025-10-02 00:12:45.300911', 'step': 1136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:45.353627', 'step': 1136, 'epoch': 1}
{'type': 'loss', 'content': 0.2457292228937149, 'timestamp': '2025-10-02 00:12:45.356098', 'step': 1137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:45.410315', 'step': 1137, 'epoch': 1}
{'type': 'loss', 'content': 0.21058914065361023, 'timestamp': '2025-10-02 00:12:45.412701', 'step': 1138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:45.466590', 'step': 1138, 'epoch': 1}
{'type': 'loss', 'content': 0.1670418679714203, 'timestamp': '2025-10-02 00:12:45.468898', 'step': 1139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:45.523180', 'step': 1139, 'epoch': 1}
{'type': 'loss', 'content': 0.08142665028572083, 'timestamp': '2025-10-02 00:12:45.531421', 'step': 1140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:45.584399', 'step': 1140, 'epoch': 1}
{'type': 'loss', 'content': 0.22268874943256378, 'timestamp': '2025-10-02 00:12:45.586868', 'step': 1141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:45.640642', 'step': 1141, 'epoch': 1}
{'type': 'loss', 'content': 0.06128501892089844, 'timestamp': '2025-10-02 00:12:45.648130', 'step': 1142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:45.702404', 'step': 1142, 'epoch': 1}
{'type': 'loss', 'content': 0.09613997489213943, 'timestamp': '2025-10-02 00:12:45.704592', 'step': 1143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:45.758382', 'step': 1143, 'epoch': 1}
{'type': 'loss', 'content': 0.1387592852115631, 'timestamp': '2025-10-02 00:12:45.764240', 'step': 1144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:45.817361', 'step': 1144, 'epoch': 1}
{'type': 'loss', 'content': 0.224443256855011, 'timestamp': '2025-10-02 00:12:45.819537', 'step': 1145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:12:45.893670', 'step': 1145, 'epoch': 1}
{'type': 'loss', 'content': 0.07012514770030975, 'timestamp': '2025-10-02 00:12:45.907094', 'step': 1146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:45.960607', 'step': 1146, 'epoch': 1}
{'type': 'loss', 'content': 0.09462785720825195, 'timestamp': '2025-10-02 00:12:45.962806', 'step': 1147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:46.016899', 'step': 1147, 'epoch': 1}
{'type': 'loss', 'content': 0.12401498854160309, 'timestamp': '2025-10-02 00:12:46.033734', 'step': 1148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:46.118200', 'step': 1148, 'epoch': 1}
{'type': 'loss', 'content': 0.0701775997877121, 'timestamp': '2025-10-02 00:12:46.134859', 'step': 1149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:46.235957', 'step': 1149, 'epoch': 1}
{'type': 'loss', 'content': 0.04860080033540726, 'timestamp': '2025-10-02 00:12:46.251268', 'step': 1150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:46.322091', 'step': 1150, 'epoch': 1}
{'type': 'loss', 'content': 0.1335160881280899, 'timestamp': '2025-10-02 00:12:46.330074', 'step': 1151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:46.396781', 'step': 1151, 'epoch': 1}
{'type': 'loss', 'content': 0.04573279619216919, 'timestamp': '2025-10-02 00:12:46.406857', 'step': 1152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:46.473884', 'step': 1152, 'epoch': 1}
{'type': 'loss', 'content': 0.0678521916270256, 'timestamp': '2025-10-02 00:12:46.483532', 'step': 1153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:46.552102', 'step': 1153, 'epoch': 1}
{'type': 'loss', 'content': 0.029797228053212166, 'timestamp': '2025-10-02 00:12:46.559241', 'step': 1154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:46.624229', 'step': 1154, 'epoch': 1}
{'type': 'loss', 'content': 0.03300945833325386, 'timestamp': '2025-10-02 00:12:46.631310', 'step': 1155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:46.699490', 'step': 1155, 'epoch': 1}
{'type': 'loss', 'content': 0.10910475254058838, 'timestamp': '2025-10-02 00:12:46.709117', 'step': 1156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:46.771908', 'step': 1156, 'epoch': 1}
{'type': 'loss', 'content': 0.06506524980068207, 'timestamp': '2025-10-02 00:12:46.781544', 'step': 1157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:46.847948', 'step': 1157, 'epoch': 1}
{'type': 'loss', 'content': 0.09692925214767456, 'timestamp': '2025-10-02 00:12:46.852428', 'step': 1158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:12:46.920120', 'step': 1158, 'epoch': 1}
{'type': 'loss', 'content': 0.052043553441762924, 'timestamp': '2025-10-02 00:12:46.930752', 'step': 1159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:46.996007', 'step': 1159, 'epoch': 1}
{'type': 'loss', 'content': 0.1349157840013504, 'timestamp': '2025-10-02 00:12:47.002534', 'step': 1160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:47.063370', 'step': 1160, 'epoch': 1}
{'type': 'loss', 'content': 0.2101031243801117, 'timestamp': '2025-10-02 00:12:47.068892', 'step': 1161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:47.125923', 'step': 1161, 'epoch': 1}
{'type': 'loss', 'content': 0.09456448256969452, 'timestamp': '2025-10-02 00:12:47.133121', 'step': 1162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:47.198671', 'step': 1162, 'epoch': 1}
{'type': 'loss', 'content': 0.13566912710666656, 'timestamp': '2025-10-02 00:12:47.206229', 'step': 1163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:47.267984', 'step': 1163, 'epoch': 1}
{'type': 'loss', 'content': 0.21090935170650482, 'timestamp': '2025-10-02 00:12:47.277497', 'step': 1164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:47.343560', 'step': 1164, 'epoch': 1}
{'type': 'loss', 'content': 0.09476393461227417, 'timestamp': '2025-10-02 00:12:47.347642', 'step': 1165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:47.408919', 'step': 1165, 'epoch': 1}
{'type': 'loss', 'content': 0.16479326784610748, 'timestamp': '2025-10-02 00:12:47.416552', 'step': 1166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:47.481172', 'step': 1166, 'epoch': 1}
{'type': 'loss', 'content': 0.1640762984752655, 'timestamp': '2025-10-02 00:12:47.486321', 'step': 1167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:47.555405', 'step': 1167, 'epoch': 1}
{'type': 'loss', 'content': 0.012541520409286022, 'timestamp': '2025-10-02 00:12:47.567000', 'step': 1168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:47.631677', 'step': 1168, 'epoch': 1}
{'type': 'loss', 'content': 0.06597854197025299, 'timestamp': '2025-10-02 00:12:47.641076', 'step': 1169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:47.705367', 'step': 1169, 'epoch': 1}
{'type': 'loss', 'content': 0.17394563555717468, 'timestamp': '2025-10-02 00:12:47.712390', 'step': 1170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:12:47.794801', 'step': 1170, 'epoch': 1}
{'type': 'loss', 'content': 0.065348319709301, 'timestamp': '2025-10-02 00:12:47.806782', 'step': 1171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:12:47.886581', 'step': 1171, 'epoch': 1}
{'type': 'loss', 'content': 0.05895426869392395, 'timestamp': '2025-10-02 00:12:47.899310', 'step': 1172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:47.962719', 'step': 1172, 'epoch': 1}
{'type': 'loss', 'content': 0.20340070128440857, 'timestamp': '2025-10-02 00:12:47.965663', 'step': 1173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:48.033435', 'step': 1173, 'epoch': 1}
{'type': 'loss', 'content': 0.06332054734230042, 'timestamp': '2025-10-02 00:12:48.041105', 'step': 1174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:48.102375', 'step': 1174, 'epoch': 1}
{'type': 'loss', 'content': 0.03802470862865448, 'timestamp': '2025-10-02 00:12:48.106294', 'step': 1175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:48.173767', 'step': 1175, 'epoch': 1}
{'type': 'loss', 'content': 0.083838552236557, 'timestamp': '2025-10-02 00:12:48.179821', 'step': 1176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:12:48.249068', 'step': 1176, 'epoch': 1}
{'type': 'loss', 'content': 0.025371627882122993, 'timestamp': '2025-10-02 00:12:48.262478', 'step': 1177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:48.323635', 'step': 1177, 'epoch': 1}
{'type': 'loss', 'content': 0.06272603571414948, 'timestamp': '2025-10-02 00:12:48.331014', 'step': 1178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:12:48.411124', 'step': 1178, 'epoch': 1}
{'type': 'loss', 'content': 0.00958127249032259, 'timestamp': '2025-10-02 00:12:48.423592', 'step': 1179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:48.484419', 'step': 1179, 'epoch': 1}
{'type': 'loss', 'content': 0.1394953727722168, 'timestamp': '2025-10-02 00:12:48.493017', 'step': 1180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:48.556847', 'step': 1180, 'epoch': 1}
{'type': 'loss', 'content': 0.10306911915540695, 'timestamp': '2025-10-02 00:12:48.563605', 'step': 1181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:48.630418', 'step': 1181, 'epoch': 1}
{'type': 'loss', 'content': 0.1884746104478836, 'timestamp': '2025-10-02 00:12:48.637568', 'step': 1182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:48.710284', 'step': 1182, 'epoch': 1}
{'type': 'loss', 'content': 0.0716434046626091, 'timestamp': '2025-10-02 00:12:48.720611', 'step': 1183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:48.792915', 'step': 1183, 'epoch': 1}
{'type': 'loss', 'content': 0.02598145417869091, 'timestamp': '2025-10-02 00:12:48.804185', 'step': 1184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:48.866364', 'step': 1184, 'epoch': 1}
{'type': 'loss', 'content': 0.07852452993392944, 'timestamp': '2025-10-02 00:12:48.869652', 'step': 1185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:48.924161', 'step': 1185, 'epoch': 1}
{'type': 'loss', 'content': 0.1258038431406021, 'timestamp': '2025-10-02 00:12:48.930652', 'step': 1186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:48.996776', 'step': 1186, 'epoch': 1}
{'type': 'loss', 'content': 0.04993564635515213, 'timestamp': '2025-10-02 00:12:49.005316', 'step': 1187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:49.080268', 'step': 1187, 'epoch': 1}
{'type': 'loss', 'content': 0.030004702508449554, 'timestamp': '2025-10-02 00:12:49.091563', 'step': 1188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:49.149679', 'step': 1188, 'epoch': 1}
{'type': 'loss', 'content': 0.04503384977579117, 'timestamp': '2025-10-02 00:12:49.159312', 'step': 1189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:49.221398', 'step': 1189, 'epoch': 1}
{'type': 'loss', 'content': 0.33097758889198303, 'timestamp': '2025-10-02 00:12:49.227939', 'step': 1190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:49.292385', 'step': 1190, 'epoch': 1}
{'type': 'loss', 'content': 0.02361280843615532, 'timestamp': '2025-10-02 00:12:49.301794', 'step': 1191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:49.369482', 'step': 1191, 'epoch': 1}
{'type': 'loss', 'content': 0.18047621846199036, 'timestamp': '2025-10-02 00:12:49.378598', 'step': 1192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:49.439457', 'step': 1192, 'epoch': 1}
{'type': 'loss', 'content': 0.12138912826776505, 'timestamp': '2025-10-02 00:12:49.447394', 'step': 1193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:49.512809', 'step': 1193, 'epoch': 1}
{'type': 'loss', 'content': 0.18112920224666595, 'timestamp': '2025-10-02 00:12:49.518659', 'step': 1194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:49.584663', 'step': 1194, 'epoch': 1}
{'type': 'loss', 'content': 0.052223268896341324, 'timestamp': '2025-10-02 00:12:49.591761', 'step': 1195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:49.654981', 'step': 1195, 'epoch': 1}
{'type': 'loss', 'content': 0.13769443333148956, 'timestamp': '2025-10-02 00:12:49.668123', 'step': 1196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:49.734464', 'step': 1196, 'epoch': 1}
{'type': 'loss', 'content': 0.20015515387058258, 'timestamp': '2025-10-02 00:12:49.738716', 'step': 1197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:49.804336', 'step': 1197, 'epoch': 1}
{'type': 'loss', 'content': 0.0513661652803421, 'timestamp': '2025-10-02 00:12:49.807947', 'step': 1198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:49.874757', 'step': 1198, 'epoch': 1}
{'type': 'loss', 'content': 0.06481419503688812, 'timestamp': '2025-10-02 00:12:49.882485', 'step': 1199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:49.954986', 'step': 1199, 'epoch': 1}
{'type': 'loss', 'content': 0.0708324983716011, 'timestamp': '2025-10-02 00:12:49.965324', 'step': 1200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:50.036966', 'step': 1200, 'epoch': 1}
{'type': 'loss', 'content': 0.10576619952917099, 'timestamp': '2025-10-02 00:12:50.048384', 'step': 1201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:50.111647', 'step': 1201, 'epoch': 1}
{'type': 'loss', 'content': 0.044498514384031296, 'timestamp': '2025-10-02 00:12:50.117558', 'step': 1202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:50.173556', 'step': 1202, 'epoch': 1}
{'type': 'loss', 'content': 0.2576943337917328, 'timestamp': '2025-10-02 00:12:50.175902', 'step': 1203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:50.230714', 'step': 1203, 'epoch': 1}
{'type': 'loss', 'content': 0.051884159445762634, 'timestamp': '2025-10-02 00:12:50.240869', 'step': 1204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:50.298107', 'step': 1204, 'epoch': 1}
{'type': 'loss', 'content': 0.15553250908851624, 'timestamp': '2025-10-02 00:12:50.301020', 'step': 1205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:50.364319', 'step': 1205, 'epoch': 1}
{'type': 'loss', 'content': 0.08035735785961151, 'timestamp': '2025-10-02 00:12:50.366701', 'step': 1206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:50.427382', 'step': 1206, 'epoch': 1}
{'type': 'loss', 'content': 0.07609538733959198, 'timestamp': '2025-10-02 00:12:50.430110', 'step': 1207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:50.510707', 'step': 1207, 'epoch': 1}
{'type': 'loss', 'content': 0.12903326749801636, 'timestamp': '2025-10-02 00:12:50.520099', 'step': 1208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:50.588899', 'step': 1208, 'epoch': 1}
{'type': 'loss', 'content': 0.05326998978853226, 'timestamp': '2025-10-02 00:12:50.600324', 'step': 1209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:50.678127', 'step': 1209, 'epoch': 1}
{'type': 'loss', 'content': 0.05153537541627884, 'timestamp': '2025-10-02 00:12:50.681269', 'step': 1210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:50.766948', 'step': 1210, 'epoch': 1}
{'type': 'loss', 'content': 0.04074159264564514, 'timestamp': '2025-10-02 00:12:50.777428', 'step': 1211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:50.841828', 'step': 1211, 'epoch': 1}
{'type': 'loss', 'content': 0.10096230357885361, 'timestamp': '2025-10-02 00:12:50.854090', 'step': 1212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:50.929085', 'step': 1212, 'epoch': 1}
{'type': 'loss', 'content': 0.24528057873249054, 'timestamp': '2025-10-02 00:12:50.938974', 'step': 1213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:51.020761', 'step': 1213, 'epoch': 1}
{'type': 'loss', 'content': 0.06767766922712326, 'timestamp': '2025-10-02 00:12:51.028628', 'step': 1214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:51.099777', 'step': 1214, 'epoch': 1}
{'type': 'loss', 'content': 0.21626779437065125, 'timestamp': '2025-10-02 00:12:51.101979', 'step': 1215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:51.171477', 'step': 1215, 'epoch': 1}
{'type': 'loss', 'content': 0.15062731504440308, 'timestamp': '2025-10-02 00:12:51.179287', 'step': 1216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:51.261697', 'step': 1216, 'epoch': 1}
{'type': 'loss', 'content': 0.2034878134727478, 'timestamp': '2025-10-02 00:12:51.267850', 'step': 1217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:51.327465', 'step': 1217, 'epoch': 1}
{'type': 'loss', 'content': 0.09365566819906235, 'timestamp': '2025-10-02 00:12:51.345509', 'step': 1218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:51.420484', 'step': 1218, 'epoch': 1}
{'type': 'loss', 'content': 0.09196453541517258, 'timestamp': '2025-10-02 00:12:51.429643', 'step': 1219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:51.512069', 'step': 1219, 'epoch': 1}
{'type': 'loss', 'content': 0.05829472467303276, 'timestamp': '2025-10-02 00:12:51.528998', 'step': 1220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:51.606211', 'step': 1220, 'epoch': 1}
{'type': 'loss', 'content': 0.1100357249379158, 'timestamp': '2025-10-02 00:12:51.614411', 'step': 1221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:51.695326', 'step': 1221, 'epoch': 1}
{'type': 'loss', 'content': 0.12640316784381866, 'timestamp': '2025-10-02 00:12:51.704076', 'step': 1222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:51.771846', 'step': 1222, 'epoch': 1}
{'type': 'loss', 'content': 0.2833201587200165, 'timestamp': '2025-10-02 00:12:51.781691', 'step': 1223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:51.867850', 'step': 1223, 'epoch': 1}
{'type': 'loss', 'content': 0.17852874100208282, 'timestamp': '2025-10-02 00:12:51.875726', 'step': 1224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:51.949115', 'step': 1224, 'epoch': 1}
{'type': 'loss', 'content': 0.09477584064006805, 'timestamp': '2025-10-02 00:12:51.959369', 'step': 1225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:52.025775', 'step': 1225, 'epoch': 1}
{'type': 'loss', 'content': 0.2798117697238922, 'timestamp': '2025-10-02 00:12:52.033382', 'step': 1226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:52.106338', 'step': 1226, 'epoch': 1}
{'type': 'loss', 'content': 0.21289761364459991, 'timestamp': '2025-10-02 00:12:52.108845', 'step': 1227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:52.185453', 'step': 1227, 'epoch': 1}
{'type': 'loss', 'content': 0.1695394217967987, 'timestamp': '2025-10-02 00:12:52.196652', 'step': 1228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:52.265611', 'step': 1228, 'epoch': 1}
{'type': 'loss', 'content': 0.06910993903875351, 'timestamp': '2025-10-02 00:12:52.275304', 'step': 1229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:52.345316', 'step': 1229, 'epoch': 1}
{'type': 'loss', 'content': 0.04876897111535072, 'timestamp': '2025-10-02 00:12:52.352261', 'step': 1230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:52.418246', 'step': 1230, 'epoch': 1}
{'type': 'loss', 'content': 0.10799961537122726, 'timestamp': '2025-10-02 00:12:52.426112', 'step': 1231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:52.495148', 'step': 1231, 'epoch': 1}
{'type': 'loss', 'content': 0.13074548542499542, 'timestamp': '2025-10-02 00:12:52.505362', 'step': 1232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:52.573573', 'step': 1232, 'epoch': 1}
{'type': 'loss', 'content': 0.05573410913348198, 'timestamp': '2025-10-02 00:12:52.579111', 'step': 1233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:52.640305', 'step': 1233, 'epoch': 1}
{'type': 'loss', 'content': 0.030705243349075317, 'timestamp': '2025-10-02 00:12:52.650240', 'step': 1234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:52.722754', 'step': 1234, 'epoch': 1}
{'type': 'loss', 'content': 0.1300901472568512, 'timestamp': '2025-10-02 00:12:52.731931', 'step': 1235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:52.803693', 'step': 1235, 'epoch': 1}
{'type': 'loss', 'content': 0.13646149635314941, 'timestamp': '2025-10-02 00:12:52.814361', 'step': 1236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:52.885086', 'step': 1236, 'epoch': 1}
{'type': 'loss', 'content': 0.1900417059659958, 'timestamp': '2025-10-02 00:12:52.888219', 'step': 1237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:52.962218', 'step': 1237, 'epoch': 1}
{'type': 'loss', 'content': 0.297742635011673, 'timestamp': '2025-10-02 00:12:52.970134', 'step': 1238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:53.042836', 'step': 1238, 'epoch': 1}
{'type': 'loss', 'content': 0.04161279648542404, 'timestamp': '2025-10-02 00:12:53.050704', 'step': 1239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:53.118372', 'step': 1239, 'epoch': 1}
{'type': 'loss', 'content': 0.10948693752288818, 'timestamp': '2025-10-02 00:12:53.130208', 'step': 1240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:53.205626', 'step': 1240, 'epoch': 1}
{'type': 'loss', 'content': 0.0754624605178833, 'timestamp': '2025-10-02 00:12:53.213178', 'step': 1241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:53.285600', 'step': 1241, 'epoch': 1}
{'type': 'loss', 'content': 0.0669313594698906, 'timestamp': '2025-10-02 00:12:53.288314', 'step': 1242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:53.360929', 'step': 1242, 'epoch': 1}
{'type': 'loss', 'content': 0.05477604642510414, 'timestamp': '2025-10-02 00:12:53.370540', 'step': 1243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:53.441706', 'step': 1243, 'epoch': 1}
{'type': 'loss', 'content': 0.11780957132577896, 'timestamp': '2025-10-02 00:12:53.453994', 'step': 1244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:53.519780', 'step': 1244, 'epoch': 1}
{'type': 'loss', 'content': 0.1361408829689026, 'timestamp': '2025-10-02 00:12:53.527538', 'step': 1245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:53.603665', 'step': 1245, 'epoch': 1}
{'type': 'loss', 'content': 0.0534016415476799, 'timestamp': '2025-10-02 00:12:53.613827', 'step': 1246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:53.680308', 'step': 1246, 'epoch': 1}
{'type': 'loss', 'content': 0.17337018251419067, 'timestamp': '2025-10-02 00:12:53.683922', 'step': 1247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:53.745561', 'step': 1247, 'epoch': 1}
{'type': 'loss', 'content': 0.020753391087055206, 'timestamp': '2025-10-02 00:12:53.756544', 'step': 1248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:53.827627', 'step': 1248, 'epoch': 1}
{'type': 'loss', 'content': 0.24207116663455963, 'timestamp': '2025-10-02 00:12:53.835680', 'step': 1249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:53.907269', 'step': 1249, 'epoch': 1}
{'type': 'loss', 'content': 0.11143999546766281, 'timestamp': '2025-10-02 00:12:53.915239', 'step': 1250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:53.985784', 'step': 1250, 'epoch': 1}
{'type': 'loss', 'content': 0.1714390516281128, 'timestamp': '2025-10-02 00:12:53.989119', 'step': 1251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:54.069103', 'step': 1251, 'epoch': 1}
{'type': 'loss', 'content': 0.08291114121675491, 'timestamp': '2025-10-02 00:12:54.079977', 'step': 1252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:54.140417', 'step': 1252, 'epoch': 1}
{'type': 'loss', 'content': 0.03278744965791702, 'timestamp': '2025-10-02 00:12:54.149866', 'step': 1253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:54.220289', 'step': 1253, 'epoch': 1}
{'type': 'loss', 'content': 0.18698672950267792, 'timestamp': '2025-10-02 00:12:54.228615', 'step': 1254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:54.297201', 'step': 1254, 'epoch': 1}
{'type': 'loss', 'content': 0.05419261381030083, 'timestamp': '2025-10-02 00:12:54.301435', 'step': 1255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:54.369575', 'step': 1255, 'epoch': 1}
{'type': 'loss', 'content': 0.027910733595490456, 'timestamp': '2025-10-02 00:12:54.379739', 'step': 1256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:54.448506', 'step': 1256, 'epoch': 1}
{'type': 'loss', 'content': 0.08901217579841614, 'timestamp': '2025-10-02 00:12:54.458813', 'step': 1257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:54.515805', 'step': 1257, 'epoch': 1}
{'type': 'loss', 'content': 0.10259778797626495, 'timestamp': '2025-10-02 00:12:54.519631', 'step': 1258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:54.575297', 'step': 1258, 'epoch': 1}
{'type': 'loss', 'content': 0.2817842960357666, 'timestamp': '2025-10-02 00:12:54.578064', 'step': 1259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:54.633005', 'step': 1259, 'epoch': 1}
{'type': 'loss', 'content': 0.23394906520843506, 'timestamp': '2025-10-02 00:12:54.639306', 'step': 1260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:54.695038', 'step': 1260, 'epoch': 1}
{'type': 'loss', 'content': 0.13612310588359833, 'timestamp': '2025-10-02 00:12:54.702716', 'step': 1261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:54.758911', 'step': 1261, 'epoch': 1}
{'type': 'loss', 'content': 0.04993657022714615, 'timestamp': '2025-10-02 00:12:54.764937', 'step': 1262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:54.821323', 'step': 1262, 'epoch': 1}
{'type': 'loss', 'content': 0.11306694149971008, 'timestamp': '2025-10-02 00:12:54.824286', 'step': 1263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:54.879026', 'step': 1263, 'epoch': 1}
{'type': 'loss', 'content': 0.10700435936450958, 'timestamp': '2025-10-02 00:12:54.884524', 'step': 1264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:54.937383', 'step': 1264, 'epoch': 1}
{'type': 'loss', 'content': 0.09993986040353775, 'timestamp': '2025-10-02 00:12:54.940134', 'step': 1265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:54.995995', 'step': 1265, 'epoch': 1}
{'type': 'loss', 'content': 0.0622497983276844, 'timestamp': '2025-10-02 00:12:55.005535', 'step': 1266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:55.059791', 'step': 1266, 'epoch': 1}
{'type': 'loss', 'content': 0.09005200117826462, 'timestamp': '2025-10-02 00:12:55.061930', 'step': 1267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:55.117179', 'step': 1267, 'epoch': 1}
{'type': 'loss', 'content': 0.05623297393321991, 'timestamp': '2025-10-02 00:12:55.127541', 'step': 1268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:55.181098', 'step': 1268, 'epoch': 1}
{'type': 'loss', 'content': 0.11087067425251007, 'timestamp': '2025-10-02 00:12:55.188667', 'step': 1269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:55.243866', 'step': 1269, 'epoch': 1}
{'type': 'loss', 'content': 0.030605340376496315, 'timestamp': '2025-10-02 00:12:55.253349', 'step': 1270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:55.308279', 'step': 1270, 'epoch': 1}
{'type': 'loss', 'content': 0.09170380234718323, 'timestamp': '2025-10-02 00:12:55.310472', 'step': 1271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:55.365746', 'step': 1271, 'epoch': 1}
{'type': 'loss', 'content': 0.06375568360090256, 'timestamp': '2025-10-02 00:12:55.375934', 'step': 1272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:55.429761', 'step': 1272, 'epoch': 1}
{'type': 'loss', 'content': 0.11826156079769135, 'timestamp': '2025-10-02 00:12:55.432000', 'step': 1273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:55.486485', 'step': 1273, 'epoch': 1}
{'type': 'loss', 'content': 0.02524213306605816, 'timestamp': '2025-10-02 00:12:55.495879', 'step': 1274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:55.558227', 'step': 1274, 'epoch': 1}
{'type': 'loss', 'content': 0.1368863582611084, 'timestamp': '2025-10-02 00:12:55.560514', 'step': 1275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:12:55.627806', 'step': 1275, 'epoch': 1}
{'type': 'loss', 'content': 0.03616669401526451, 'timestamp': '2025-10-02 00:12:55.640558', 'step': 1276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:55.694345', 'step': 1276, 'epoch': 1}
{'type': 'loss', 'content': 0.06082822009921074, 'timestamp': '2025-10-02 00:12:55.696289', 'step': 1277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:55.749373', 'step': 1277, 'epoch': 1}
{'type': 'loss', 'content': 0.23324204981327057, 'timestamp': '2025-10-02 00:12:55.751617', 'step': 1278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:55.805321', 'step': 1278, 'epoch': 1}
{'type': 'loss', 'content': 0.07625787705183029, 'timestamp': '2025-10-02 00:12:55.807942', 'step': 1279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:55.863380', 'step': 1279, 'epoch': 1}
{'type': 'loss', 'content': 0.07195957750082016, 'timestamp': '2025-10-02 00:12:55.869181', 'step': 1280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:55.923235', 'step': 1280, 'epoch': 1}
{'type': 'loss', 'content': 0.10164425522089005, 'timestamp': '2025-10-02 00:12:55.925522', 'step': 1281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:55.981097', 'step': 1281, 'epoch': 1}
{'type': 'loss', 'content': 0.012507413513958454, 'timestamp': '2025-10-02 00:12:55.990688', 'step': 1282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:56.044677', 'step': 1282, 'epoch': 1}
{'type': 'loss', 'content': 0.059027668088674545, 'timestamp': '2025-10-02 00:12:56.052162', 'step': 1283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:56.106881', 'step': 1283, 'epoch': 1}
{'type': 'loss', 'content': 0.08770822733640671, 'timestamp': '2025-10-02 00:12:56.117248', 'step': 1284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:56.170474', 'step': 1284, 'epoch': 1}
{'type': 'loss', 'content': 0.20024892687797546, 'timestamp': '2025-10-02 00:12:56.173005', 'step': 1285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:56.227079', 'step': 1285, 'epoch': 1}
{'type': 'loss', 'content': 0.15297730267047882, 'timestamp': '2025-10-02 00:12:56.229395', 'step': 1286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:56.284497', 'step': 1286, 'epoch': 1}
{'type': 'loss', 'content': 0.13110873103141785, 'timestamp': '2025-10-02 00:12:56.294083', 'step': 1287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:12:56.347485', 'step': 1287, 'epoch': 1}
{'type': 'loss', 'content': 0.1305556744337082, 'timestamp': '2025-10-02 00:12:56.361513', 'step': 1288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:56.418390', 'step': 1288, 'epoch': 1}
{'type': 'loss', 'content': 0.12524425983428955, 'timestamp': '2025-10-02 00:12:56.421155', 'step': 1289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:56.475861', 'step': 1289, 'epoch': 1}
{'type': 'loss', 'content': 0.07728651165962219, 'timestamp': '2025-10-02 00:12:56.478353', 'step': 1290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:56.531850', 'step': 1290, 'epoch': 1}
{'type': 'loss', 'content': 0.11193278431892395, 'timestamp': '2025-10-02 00:12:56.534155', 'step': 1291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:56.587650', 'step': 1291, 'epoch': 1}
{'type': 'loss', 'content': 0.08308152854442596, 'timestamp': '2025-10-02 00:12:56.593449', 'step': 1292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:56.646125', 'step': 1292, 'epoch': 1}
{'type': 'loss', 'content': 0.15154939889907837, 'timestamp': '2025-10-02 00:12:56.648403', 'step': 1293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:56.707392', 'step': 1293, 'epoch': 1}
{'type': 'loss', 'content': 0.03972209617495537, 'timestamp': '2025-10-02 00:12:56.717598', 'step': 1294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:56.771580', 'step': 1294, 'epoch': 1}
{'type': 'loss', 'content': 0.042398061603307724, 'timestamp': '2025-10-02 00:12:56.773802', 'step': 1295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:56.827274', 'step': 1295, 'epoch': 1}
{'type': 'loss', 'content': 0.22648486495018005, 'timestamp': '2025-10-02 00:12:56.832754', 'step': 1296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:56.885864', 'step': 1296, 'epoch': 1}
{'type': 'loss', 'content': 0.14328114688396454, 'timestamp': '2025-10-02 00:12:56.887832', 'step': 1297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:56.941441', 'step': 1297, 'epoch': 1}
{'type': 'loss', 'content': 0.10373664647340775, 'timestamp': '2025-10-02 00:12:56.943837', 'step': 1298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:56.999246', 'step': 1298, 'epoch': 1}
{'type': 'loss', 'content': 0.08681747317314148, 'timestamp': '2025-10-02 00:12:57.001280', 'step': 1299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:57.055628', 'step': 1299, 'epoch': 1}
{'type': 'loss', 'content': 0.06109336391091347, 'timestamp': '2025-10-02 00:12:57.061294', 'step': 1300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:57.114482', 'step': 1300, 'epoch': 1}
{'type': 'loss', 'content': 0.14583644270896912, 'timestamp': '2025-10-02 00:12:57.116826', 'step': 1301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:57.171381', 'step': 1301, 'epoch': 1}
{'type': 'loss', 'content': 0.03809763118624687, 'timestamp': '2025-10-02 00:12:57.173697', 'step': 1302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:57.227771', 'step': 1302, 'epoch': 1}
{'type': 'loss', 'content': 0.16339260339736938, 'timestamp': '2025-10-02 00:12:57.229974', 'step': 1303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:57.284081', 'step': 1303, 'epoch': 1}
{'type': 'loss', 'content': 0.1334872543811798, 'timestamp': '2025-10-02 00:12:57.289712', 'step': 1304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:57.343217', 'step': 1304, 'epoch': 1}
{'type': 'loss', 'content': 0.13229137659072876, 'timestamp': '2025-10-02 00:12:57.345386', 'step': 1305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:57.399298', 'step': 1305, 'epoch': 1}
{'type': 'loss', 'content': 0.1036330834031105, 'timestamp': '2025-10-02 00:12:57.401501', 'step': 1306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:12:57.455949', 'step': 1306, 'epoch': 1}
{'type': 'loss', 'content': 0.16931571066379547, 'timestamp': '2025-10-02 00:12:57.458391', 'step': 1307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:57.512662', 'step': 1307, 'epoch': 1}
{'type': 'loss', 'content': 0.02094743773341179, 'timestamp': '2025-10-02 00:12:57.519337', 'step': 1308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:12:57.576393', 'step': 1308, 'epoch': 1}
{'type': 'loss', 'content': 0.05823984369635582, 'timestamp': '2025-10-02 00:12:57.587382', 'step': 1309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:12:57.640711', 'step': 1309, 'epoch': 1}
{'type': 'loss', 'content': 0.07723850011825562, 'timestamp': '2025-10-02 00:12:57.643021', 'step': 1310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:57.696347', 'step': 1310, 'epoch': 1}
{'type': 'loss', 'content': 0.06630945950746536, 'timestamp': '2025-10-02 00:12:57.698401', 'step': 1311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:57.752047', 'step': 1311, 'epoch': 1}
{'type': 'loss', 'content': 0.15213245153427124, 'timestamp': '2025-10-02 00:12:57.757937', 'step': 1312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:57.811273', 'step': 1312, 'epoch': 1}
{'type': 'loss', 'content': 0.12798626720905304, 'timestamp': '2025-10-02 00:12:57.813776', 'step': 1313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:57.868908', 'step': 1313, 'epoch': 1}
{'type': 'loss', 'content': 0.025859519839286804, 'timestamp': '2025-10-02 00:12:57.878458', 'step': 1314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:57.932254', 'step': 1314, 'epoch': 1}
{'type': 'loss', 'content': 0.053449682891368866, 'timestamp': '2025-10-02 00:12:57.934497', 'step': 1315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:57.987549', 'step': 1315, 'epoch': 1}
{'type': 'loss', 'content': 0.23678287863731384, 'timestamp': '2025-10-02 00:12:57.993395', 'step': 1316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:12:58.047323', 'step': 1316, 'epoch': 1}
{'type': 'loss', 'content': 0.07514144480228424, 'timestamp': '2025-10-02 00:12:58.057590', 'step': 1317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:58.112129', 'step': 1317, 'epoch': 1}
{'type': 'loss', 'content': 0.03153664991259575, 'timestamp': '2025-10-02 00:12:58.121490', 'step': 1318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:58.175236', 'step': 1318, 'epoch': 1}
{'type': 'loss', 'content': 0.1332838386297226, 'timestamp': '2025-10-02 00:12:58.178535', 'step': 1319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:12:58.246702', 'step': 1319, 'epoch': 1}
{'type': 'loss', 'content': 0.07326895743608475, 'timestamp': '2025-10-02 00:12:58.259833', 'step': 1320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:12:58.312463', 'step': 1320, 'epoch': 1}
{'type': 'loss', 'content': 0.09757847338914871, 'timestamp': '2025-10-02 00:12:58.314808', 'step': 1321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:58.368238', 'step': 1321, 'epoch': 1}
{'type': 'loss', 'content': 0.19544661045074463, 'timestamp': '2025-10-02 00:12:58.370381', 'step': 1322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:58.423710', 'step': 1322, 'epoch': 1}
{'type': 'loss', 'content': 0.21949774026870728, 'timestamp': '2025-10-02 00:12:58.425873', 'step': 1323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:58.479905', 'step': 1323, 'epoch': 1}
{'type': 'loss', 'content': 0.11958570778369904, 'timestamp': '2025-10-02 00:12:58.490054', 'step': 1324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:58.542725', 'step': 1324, 'epoch': 1}
{'type': 'loss', 'content': 0.15684957802295685, 'timestamp': '2025-10-02 00:12:58.545511', 'step': 1325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:58.598724', 'step': 1325, 'epoch': 1}
{'type': 'loss', 'content': 0.11615308374166489, 'timestamp': '2025-10-02 00:12:58.600844', 'step': 1326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:58.655413', 'step': 1326, 'epoch': 1}
{'type': 'loss', 'content': 0.0625496506690979, 'timestamp': '2025-10-02 00:12:58.662963', 'step': 1327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:58.717287', 'step': 1327, 'epoch': 1}
{'type': 'loss', 'content': 0.058191195130348206, 'timestamp': '2025-10-02 00:12:58.722851', 'step': 1328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:58.776322', 'step': 1328, 'epoch': 1}
{'type': 'loss', 'content': 0.11146873980760574, 'timestamp': '2025-10-02 00:12:58.778510', 'step': 1329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:58.832896', 'step': 1329, 'epoch': 1}
{'type': 'loss', 'content': 0.18747739493846893, 'timestamp': '2025-10-02 00:12:58.835024', 'step': 1330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:58.889418', 'step': 1330, 'epoch': 1}
{'type': 'loss', 'content': 0.10081891715526581, 'timestamp': '2025-10-02 00:12:58.891748', 'step': 1331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:58.945633', 'step': 1331, 'epoch': 1}
{'type': 'loss', 'content': 0.09908895939588547, 'timestamp': '2025-10-02 00:12:58.951454', 'step': 1332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:59.004713', 'step': 1332, 'epoch': 1}
{'type': 'loss', 'content': 0.16954348981380463, 'timestamp': '2025-10-02 00:12:59.006803', 'step': 1333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:12:59.060038', 'step': 1333, 'epoch': 1}
{'type': 'loss', 'content': 0.09905054420232773, 'timestamp': '2025-10-02 00:12:59.067798', 'step': 1334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:12:59.122226', 'step': 1334, 'epoch': 1}
{'type': 'loss', 'content': 0.04731200635433197, 'timestamp': '2025-10-02 00:12:59.124412', 'step': 1335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:59.178077', 'step': 1335, 'epoch': 1}
{'type': 'loss', 'content': 0.14414440095424652, 'timestamp': '2025-10-02 00:12:59.183910', 'step': 1336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:12:59.236920', 'step': 1336, 'epoch': 1}
{'type': 'loss', 'content': 0.027068201452493668, 'timestamp': '2025-10-02 00:12:59.246555', 'step': 1337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:12:59.308000', 'step': 1337, 'epoch': 1}
{'type': 'loss', 'content': 0.015994064509868622, 'timestamp': '2025-10-02 00:12:59.318507', 'step': 1338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:12:59.393951', 'step': 1338, 'epoch': 1}
{'type': 'loss', 'content': 0.015370313078165054, 'timestamp': '2025-10-02 00:12:59.407693', 'step': 1339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:59.460882', 'step': 1339, 'epoch': 1}
{'type': 'loss', 'content': 0.219970703125, 'timestamp': '2025-10-02 00:12:59.466835', 'step': 1340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:12:59.519729', 'step': 1340, 'epoch': 1}
{'type': 'loss', 'content': 0.17447714507579803, 'timestamp': '2025-10-02 00:12:59.521754', 'step': 1341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:12:59.575438', 'step': 1341, 'epoch': 1}
{'type': 'loss', 'content': 0.05643597990274429, 'timestamp': '2025-10-02 00:12:59.581383', 'step': 1342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:12:59.635155', 'step': 1342, 'epoch': 1}
{'type': 'loss', 'content': 0.1279110461473465, 'timestamp': '2025-10-02 00:12:59.637348', 'step': 1343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:59.691163', 'step': 1343, 'epoch': 1}
{'type': 'loss', 'content': 0.10702932626008987, 'timestamp': '2025-10-02 00:12:59.696732', 'step': 1344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:59.749588', 'step': 1344, 'epoch': 1}
{'type': 'loss', 'content': 0.08199033886194229, 'timestamp': '2025-10-02 00:12:59.751999', 'step': 1345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:12:59.805654', 'step': 1345, 'epoch': 1}
{'type': 'loss', 'content': 0.11354529857635498, 'timestamp': '2025-10-02 00:12:59.808051', 'step': 1346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:12:59.869589', 'step': 1346, 'epoch': 1}
{'type': 'loss', 'content': 0.14524972438812256, 'timestamp': '2025-10-02 00:12:59.872216', 'step': 1347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:12:59.925444', 'step': 1347, 'epoch': 1}
{'type': 'loss', 'content': 0.0872388631105423, 'timestamp': '2025-10-02 00:12:59.931404', 'step': 1348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:12:59.986545', 'step': 1348, 'epoch': 1}
{'type': 'loss', 'content': 0.23646752536296844, 'timestamp': '2025-10-02 00:12:59.988818', 'step': 1349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:00.047992', 'step': 1349, 'epoch': 1}
{'type': 'loss', 'content': 0.0910271406173706, 'timestamp': '2025-10-02 00:13:00.050482', 'step': 1350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:00.112978', 'step': 1350, 'epoch': 1}
{'type': 'loss', 'content': 0.09422151744365692, 'timestamp': '2025-10-02 00:13:00.115410', 'step': 1351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:00.174146', 'step': 1351, 'epoch': 1}
{'type': 'loss', 'content': 0.1525631546974182, 'timestamp': '2025-10-02 00:13:00.181057', 'step': 1352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:00.239999', 'step': 1352, 'epoch': 1}
{'type': 'loss', 'content': 0.08753667026758194, 'timestamp': '2025-10-02 00:13:00.242195', 'step': 1353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:00.301312', 'step': 1353, 'epoch': 1}
{'type': 'loss', 'content': 0.10344567149877548, 'timestamp': '2025-10-02 00:13:00.303799', 'step': 1354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:00.364094', 'step': 1354, 'epoch': 1}
{'type': 'loss', 'content': 0.1022922694683075, 'timestamp': '2025-10-02 00:13:00.366476', 'step': 1355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:00.427736', 'step': 1355, 'epoch': 1}
{'type': 'loss', 'content': 0.12392991781234741, 'timestamp': '2025-10-02 00:13:00.435005', 'step': 1356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:00.490227', 'step': 1356, 'epoch': 1}
{'type': 'loss', 'content': 0.19251985847949982, 'timestamp': '2025-10-02 00:13:00.492670', 'step': 1357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:00.547698', 'step': 1357, 'epoch': 1}
{'type': 'loss', 'content': 0.08221734315156937, 'timestamp': '2025-10-02 00:13:00.550073', 'step': 1358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:00.604085', 'step': 1358, 'epoch': 1}
{'type': 'loss', 'content': 0.1575375646352768, 'timestamp': '2025-10-02 00:13:00.606381', 'step': 1359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:00.661945', 'step': 1359, 'epoch': 1}
{'type': 'loss', 'content': 0.04346805438399315, 'timestamp': '2025-10-02 00:13:00.668176', 'step': 1360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:00.721855', 'step': 1360, 'epoch': 1}
{'type': 'loss', 'content': 0.06114169582724571, 'timestamp': '2025-10-02 00:13:00.729342', 'step': 1361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:13:00.783217', 'step': 1361, 'epoch': 1}
{'type': 'loss', 'content': 0.23399074375629425, 'timestamp': '2025-10-02 00:13:00.785632', 'step': 1362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:00.844920', 'step': 1362, 'epoch': 1}
{'type': 'loss', 'content': 0.03409332036972046, 'timestamp': '2025-10-02 00:13:00.855142', 'step': 1363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:00.909550', 'step': 1363, 'epoch': 1}
{'type': 'loss', 'content': 0.08484721183776855, 'timestamp': '2025-10-02 00:13:00.915582', 'step': 1364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:00.969630', 'step': 1364, 'epoch': 1}
{'type': 'loss', 'content': 0.11680451780557632, 'timestamp': '2025-10-02 00:13:00.976969', 'step': 1365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:01.030824', 'step': 1365, 'epoch': 1}
{'type': 'loss', 'content': 0.29299196600914, 'timestamp': '2025-10-02 00:13:01.033121', 'step': 1366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:01.087946', 'step': 1366, 'epoch': 1}
{'type': 'loss', 'content': 0.03800249099731445, 'timestamp': '2025-10-02 00:13:01.095241', 'step': 1367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:01.149015', 'step': 1367, 'epoch': 1}
{'type': 'loss', 'content': 0.14288443326950073, 'timestamp': '2025-10-02 00:13:01.154839', 'step': 1368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:01.208486', 'step': 1368, 'epoch': 1}
{'type': 'loss', 'content': 0.15856364369392395, 'timestamp': '2025-10-02 00:13:01.210636', 'step': 1369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:01.264421', 'step': 1369, 'epoch': 1}
{'type': 'loss', 'content': 0.07063023000955582, 'timestamp': '2025-10-02 00:13:01.270149', 'step': 1370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:13:01.341377', 'step': 1370, 'epoch': 1}
{'type': 'loss', 'content': 0.07978110015392303, 'timestamp': '2025-10-02 00:13:01.353821', 'step': 1371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:01.410283', 'step': 1371, 'epoch': 1}
{'type': 'loss', 'content': 0.15529824793338776, 'timestamp': '2025-10-02 00:13:01.416838', 'step': 1372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:13:01.486757', 'step': 1372, 'epoch': 1}
{'type': 'loss', 'content': 0.02446620911359787, 'timestamp': '2025-10-02 00:13:01.500165', 'step': 1373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:01.557895', 'step': 1373, 'epoch': 1}
{'type': 'loss', 'content': 0.047407809644937515, 'timestamp': '2025-10-02 00:13:01.560618', 'step': 1374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:01.616236', 'step': 1374, 'epoch': 1}
{'type': 'loss', 'content': 0.08710356056690216, 'timestamp': '2025-10-02 00:13:01.619650', 'step': 1375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:01.676816', 'step': 1375, 'epoch': 1}
{'type': 'loss', 'content': 0.14004696905612946, 'timestamp': '2025-10-02 00:13:01.687175', 'step': 1376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:01.742754', 'step': 1376, 'epoch': 1}
{'type': 'loss', 'content': 0.055711787194013596, 'timestamp': '2025-10-02 00:13:01.751849', 'step': 1377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:01.807531', 'step': 1377, 'epoch': 1}
{'type': 'loss', 'content': 0.06253396719694138, 'timestamp': '2025-10-02 00:13:01.810092', 'step': 1378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:01.865301', 'step': 1378, 'epoch': 1}
{'type': 'loss', 'content': 0.1478075385093689, 'timestamp': '2025-10-02 00:13:01.870927', 'step': 1379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:01.926944', 'step': 1379, 'epoch': 1}
{'type': 'loss', 'content': 0.18165643513202667, 'timestamp': '2025-10-02 00:13:01.934610', 'step': 1380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:01.997569', 'step': 1380, 'epoch': 1}
{'type': 'loss', 'content': 0.04083540663123131, 'timestamp': '2025-10-02 00:13:02.008549', 'step': 1381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:02.070521', 'step': 1381, 'epoch': 1}
{'type': 'loss', 'content': 0.0980907753109932, 'timestamp': '2025-10-02 00:13:02.080754', 'step': 1382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:02.137722', 'step': 1382, 'epoch': 1}
{'type': 'loss', 'content': 0.03537651151418686, 'timestamp': '2025-10-02 00:13:02.146217', 'step': 1383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:02.203116', 'step': 1383, 'epoch': 1}
{'type': 'loss', 'content': 0.1384328305721283, 'timestamp': '2025-10-02 00:13:02.209519', 'step': 1384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:02.265807', 'step': 1384, 'epoch': 1}
{'type': 'loss', 'content': 0.08049127459526062, 'timestamp': '2025-10-02 00:13:02.268219', 'step': 1385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:02.323582', 'step': 1385, 'epoch': 1}
{'type': 'loss', 'content': 0.023326875641942024, 'timestamp': '2025-10-02 00:13:02.332909', 'step': 1386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:02.391892', 'step': 1386, 'epoch': 1}
{'type': 'loss', 'content': 0.09153090417385101, 'timestamp': '2025-10-02 00:13:02.394944', 'step': 1387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:02.450445', 'step': 1387, 'epoch': 1}
{'type': 'loss', 'content': 0.2532808184623718, 'timestamp': '2025-10-02 00:13:02.457564', 'step': 1388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:02.513262', 'step': 1388, 'epoch': 1}
{'type': 'loss', 'content': 0.07198852300643921, 'timestamp': '2025-10-02 00:13:02.515843', 'step': 1389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:02.573632', 'step': 1389, 'epoch': 1}
{'type': 'loss', 'content': 0.045556534081697464, 'timestamp': '2025-10-02 00:13:02.583172', 'step': 1390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:02.639648', 'step': 1390, 'epoch': 1}
{'type': 'loss', 'content': 0.20287327468395233, 'timestamp': '2025-10-02 00:13:02.642497', 'step': 1391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:02.698472', 'step': 1391, 'epoch': 1}
{'type': 'loss', 'content': 0.09476695209741592, 'timestamp': '2025-10-02 00:13:02.705412', 'step': 1392, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:13:29.369180', 'step': 1392, 'epoch': 1}
{'type': 'pplx', 'content': 81.1445249185832, 'timestamp': '2025-10-02 00:13:29.381454', 'step': 1392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:29.448302', 'step': 1392, 'epoch': 1}
{'type': 'loss', 'content': 0.06991507858037949, 'timestamp': '2025-10-02 00:13:29.455913', 'step': 1393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:29.522493', 'step': 1393, 'epoch': 1}
{'type': 'loss', 'content': 0.2505311369895935, 'timestamp': '2025-10-02 00:13:29.528632', 'step': 1394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:29.596984', 'step': 1394, 'epoch': 1}
{'type': 'loss', 'content': 0.18473245203495026, 'timestamp': '2025-10-02 00:13:29.603645', 'step': 1395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:29.669240', 'step': 1395, 'epoch': 1}
{'type': 'loss', 'content': 0.11435583978891373, 'timestamp': '2025-10-02 00:13:29.677195', 'step': 1396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:29.744455', 'step': 1396, 'epoch': 1}
{'type': 'loss', 'content': 0.06470776349306107, 'timestamp': '2025-10-02 00:13:29.755492', 'step': 1397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:29.817048', 'step': 1397, 'epoch': 1}
{'type': 'loss', 'content': 0.11501224339008331, 'timestamp': '2025-10-02 00:13:29.824357', 'step': 1398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:29.889600', 'step': 1398, 'epoch': 1}
{'type': 'loss', 'content': 0.08504026383161545, 'timestamp': '2025-10-02 00:13:29.898765', 'step': 1399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:29.966488', 'step': 1399, 'epoch': 1}
{'type': 'loss', 'content': 0.22953006625175476, 'timestamp': '2025-10-02 00:13:29.972625', 'step': 1400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:30.028530', 'step': 1400, 'epoch': 1}
{'type': 'loss', 'content': 0.12238267064094543, 'timestamp': '2025-10-02 00:13:30.031235', 'step': 1401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:30.097189', 'step': 1401, 'epoch': 1}
{'type': 'loss', 'content': 0.014339955523610115, 'timestamp': '2025-10-02 00:13:30.103015', 'step': 1402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:30.170115', 'step': 1402, 'epoch': 1}
{'type': 'loss', 'content': 0.11028186976909637, 'timestamp': '2025-10-02 00:13:30.177705', 'step': 1403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:30.245331', 'step': 1403, 'epoch': 1}
{'type': 'loss', 'content': 0.14959757030010223, 'timestamp': '2025-10-02 00:13:30.251785', 'step': 1404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:30.309871', 'step': 1404, 'epoch': 1}
{'type': 'loss', 'content': 0.15730619430541992, 'timestamp': '2025-10-02 00:13:30.312373', 'step': 1405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:30.375201', 'step': 1405, 'epoch': 1}
{'type': 'loss', 'content': 0.15726307034492493, 'timestamp': '2025-10-02 00:13:30.383949', 'step': 1406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:30.445125', 'step': 1406, 'epoch': 1}
{'type': 'loss', 'content': 0.037335120141506195, 'timestamp': '2025-10-02 00:13:30.449247', 'step': 1407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:30.517550', 'step': 1407, 'epoch': 1}
{'type': 'loss', 'content': 0.06845661997795105, 'timestamp': '2025-10-02 00:13:30.529248', 'step': 1408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:30.591192', 'step': 1408, 'epoch': 1}
{'type': 'loss', 'content': 0.07221727073192596, 'timestamp': '2025-10-02 00:13:30.602125', 'step': 1409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:30.666580', 'step': 1409, 'epoch': 1}
{'type': 'loss', 'content': 0.13354164361953735, 'timestamp': '2025-10-02 00:13:30.673129', 'step': 1410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:30.737210', 'step': 1410, 'epoch': 1}
{'type': 'loss', 'content': 0.05306513234972954, 'timestamp': '2025-10-02 00:13:30.744591', 'step': 1411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:30.805139', 'step': 1411, 'epoch': 1}
{'type': 'loss', 'content': 0.09416399896144867, 'timestamp': '2025-10-02 00:13:30.812146', 'step': 1412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:30.879203', 'step': 1412, 'epoch': 1}
{'type': 'loss', 'content': 0.1351277232170105, 'timestamp': '2025-10-02 00:13:30.885229', 'step': 1413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:30.952679', 'step': 1413, 'epoch': 1}
{'type': 'loss', 'content': 0.07708578556776047, 'timestamp': '2025-10-02 00:13:30.962784', 'step': 1414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:31.027037', 'step': 1414, 'epoch': 1}
{'type': 'loss', 'content': 0.09176299721002579, 'timestamp': '2025-10-02 00:13:31.036578', 'step': 1415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:31.097902', 'step': 1415, 'epoch': 1}
{'type': 'loss', 'content': 0.09637966752052307, 'timestamp': '2025-10-02 00:13:31.107161', 'step': 1416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:31.165817', 'step': 1416, 'epoch': 1}
{'type': 'loss', 'content': 0.03049170970916748, 'timestamp': '2025-10-02 00:13:31.170956', 'step': 1417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:31.236483', 'step': 1417, 'epoch': 1}
{'type': 'loss', 'content': 0.24220533668994904, 'timestamp': '2025-10-02 00:13:31.242830', 'step': 1418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:31.306135', 'step': 1418, 'epoch': 1}
{'type': 'loss', 'content': 0.02471490204334259, 'timestamp': '2025-10-02 00:13:31.315530', 'step': 1419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:31.375653', 'step': 1419, 'epoch': 1}
{'type': 'loss', 'content': 0.1877172440290451, 'timestamp': '2025-10-02 00:13:31.383349', 'step': 1420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:31.446097', 'step': 1420, 'epoch': 1}
{'type': 'loss', 'content': 0.02329040877521038, 'timestamp': '2025-10-02 00:13:31.457041', 'step': 1421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:31.524932', 'step': 1421, 'epoch': 1}
{'type': 'loss', 'content': 0.0793432891368866, 'timestamp': '2025-10-02 00:13:31.533520', 'step': 1422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:31.590570', 'step': 1422, 'epoch': 1}
{'type': 'loss', 'content': 0.07774505764245987, 'timestamp': '2025-10-02 00:13:31.594234', 'step': 1423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:31.655811', 'step': 1423, 'epoch': 1}
{'type': 'loss', 'content': 0.2376401573419571, 'timestamp': '2025-10-02 00:13:31.662205', 'step': 1424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:31.717907', 'step': 1424, 'epoch': 1}
{'type': 'loss', 'content': 0.20349809527397156, 'timestamp': '2025-10-02 00:13:31.721171', 'step': 1425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:31.779946', 'step': 1425, 'epoch': 1}
{'type': 'loss', 'content': 0.04716816544532776, 'timestamp': '2025-10-02 00:13:31.793881', 'step': 1426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:31.873492', 'step': 1426, 'epoch': 1}
{'type': 'loss', 'content': 0.08593415468931198, 'timestamp': '2025-10-02 00:13:31.876261', 'step': 1427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:31.934658', 'step': 1427, 'epoch': 1}
{'type': 'loss', 'content': 0.14547394216060638, 'timestamp': '2025-10-02 00:13:31.949455', 'step': 1428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:32.008151', 'step': 1428, 'epoch': 1}
{'type': 'loss', 'content': 0.05518458038568497, 'timestamp': '2025-10-02 00:13:32.017439', 'step': 1429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:32.073906', 'step': 1429, 'epoch': 1}
{'type': 'loss', 'content': 0.22654138505458832, 'timestamp': '2025-10-02 00:13:32.084233', 'step': 1430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:32.162742', 'step': 1430, 'epoch': 1}
{'type': 'loss', 'content': 0.10449957102537155, 'timestamp': '2025-10-02 00:13:32.166017', 'step': 1431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:13:32.248134', 'step': 1431, 'epoch': 1}
{'type': 'loss', 'content': 0.058817680925130844, 'timestamp': '2025-10-02 00:13:32.259543', 'step': 1432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:32.336696', 'step': 1432, 'epoch': 1}
{'type': 'loss', 'content': 0.019346101209521294, 'timestamp': '2025-10-02 00:13:32.345856', 'step': 1433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:32.419049', 'step': 1433, 'epoch': 1}
{'type': 'loss', 'content': 0.08371435105800629, 'timestamp': '2025-10-02 00:13:32.426597', 'step': 1434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:32.501833', 'step': 1434, 'epoch': 1}
{'type': 'loss', 'content': 0.05810242146253586, 'timestamp': '2025-10-02 00:13:32.512024', 'step': 1435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:32.584742', 'step': 1435, 'epoch': 1}
{'type': 'loss', 'content': 0.058219488710165024, 'timestamp': '2025-10-02 00:13:32.591306', 'step': 1436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:32.648762', 'step': 1436, 'epoch': 1}
{'type': 'loss', 'content': 0.023943321779370308, 'timestamp': '2025-10-02 00:13:32.653299', 'step': 1437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:32.723057', 'step': 1437, 'epoch': 1}
{'type': 'loss', 'content': 0.1001729965209961, 'timestamp': '2025-10-02 00:13:32.726792', 'step': 1438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:32.801580', 'step': 1438, 'epoch': 1}
{'type': 'loss', 'content': 0.07465109974145889, 'timestamp': '2025-10-02 00:13:32.807345', 'step': 1439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:32.906894', 'step': 1439, 'epoch': 1}
{'type': 'loss', 'content': 0.09820351004600525, 'timestamp': '2025-10-02 00:13:32.915677', 'step': 1440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:32.987093', 'step': 1440, 'epoch': 1}
{'type': 'loss', 'content': 0.10602729767560959, 'timestamp': '2025-10-02 00:13:32.992867', 'step': 1441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:33.057603', 'step': 1441, 'epoch': 1}
{'type': 'loss', 'content': 0.03889542073011398, 'timestamp': '2025-10-02 00:13:33.060464', 'step': 1442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:33.130367', 'step': 1442, 'epoch': 1}
{'type': 'loss', 'content': 0.08606799691915512, 'timestamp': '2025-10-02 00:13:33.143982', 'step': 1443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:13:33.220068', 'step': 1443, 'epoch': 1}
{'type': 'loss', 'content': 0.013559186831116676, 'timestamp': '2025-10-02 00:13:33.234320', 'step': 1444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:13:33.291078', 'step': 1444, 'epoch': 1}
{'type': 'loss', 'content': 0.21406838297843933, 'timestamp': '2025-10-02 00:13:33.294536', 'step': 1445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:33.350939', 'step': 1445, 'epoch': 1}
{'type': 'loss', 'content': 0.07120339572429657, 'timestamp': '2025-10-02 00:13:33.354261', 'step': 1446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:33.411314', 'step': 1446, 'epoch': 1}
{'type': 'loss', 'content': 0.04393751174211502, 'timestamp': '2025-10-02 00:13:33.420869', 'step': 1447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:33.497355', 'step': 1447, 'epoch': 1}
{'type': 'loss', 'content': 0.04068903997540474, 'timestamp': '2025-10-02 00:13:33.508677', 'step': 1448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:13:33.589984', 'step': 1448, 'epoch': 1}
{'type': 'loss', 'content': 0.05575801059603691, 'timestamp': '2025-10-02 00:13:33.601775', 'step': 1449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:33.659279', 'step': 1449, 'epoch': 1}
{'type': 'loss', 'content': 0.07282519340515137, 'timestamp': '2025-10-02 00:13:33.662503', 'step': 1450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:13:33.732114', 'step': 1450, 'epoch': 1}
{'type': 'loss', 'content': 0.006838741712272167, 'timestamp': '2025-10-02 00:13:33.742779', 'step': 1451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:33.808626', 'step': 1451, 'epoch': 1}
{'type': 'loss', 'content': 0.10448000580072403, 'timestamp': '2025-10-02 00:13:33.815463', 'step': 1452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:33.872465', 'step': 1452, 'epoch': 1}
{'type': 'loss', 'content': 0.08556917309761047, 'timestamp': '2025-10-02 00:13:33.885287', 'step': 1453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:33.971784', 'step': 1453, 'epoch': 1}
{'type': 'loss', 'content': 0.07317084819078445, 'timestamp': '2025-10-02 00:13:33.981157', 'step': 1454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:34.049187', 'step': 1454, 'epoch': 1}
{'type': 'loss', 'content': 0.11712539941072464, 'timestamp': '2025-10-02 00:13:34.056599', 'step': 1455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:34.128882', 'step': 1455, 'epoch': 1}
{'type': 'loss', 'content': 0.08057377487421036, 'timestamp': '2025-10-02 00:13:34.138359', 'step': 1456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:13:34.210000', 'step': 1456, 'epoch': 1}
{'type': 'loss', 'content': 0.04976917803287506, 'timestamp': '2025-10-02 00:13:34.221540', 'step': 1457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:34.287179', 'step': 1457, 'epoch': 1}
{'type': 'loss', 'content': 0.1114547923207283, 'timestamp': '2025-10-02 00:13:34.290122', 'step': 1458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:34.353637', 'step': 1458, 'epoch': 1}
{'type': 'loss', 'content': 0.1320769488811493, 'timestamp': '2025-10-02 00:13:34.356817', 'step': 1459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:34.426197', 'step': 1459, 'epoch': 1}
{'type': 'loss', 'content': 0.01409800536930561, 'timestamp': '2025-10-02 00:13:34.438851', 'step': 1460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:34.499562', 'step': 1460, 'epoch': 1}
{'type': 'loss', 'content': 0.029398681595921516, 'timestamp': '2025-10-02 00:13:34.510948', 'step': 1461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:34.590506', 'step': 1461, 'epoch': 1}
{'type': 'loss', 'content': 0.07000009715557098, 'timestamp': '2025-10-02 00:13:34.600753', 'step': 1462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:34.671011', 'step': 1462, 'epoch': 1}
{'type': 'loss', 'content': 0.14849796891212463, 'timestamp': '2025-10-02 00:13:34.676821', 'step': 1463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:34.749398', 'step': 1463, 'epoch': 1}
{'type': 'loss', 'content': 0.09568595886230469, 'timestamp': '2025-10-02 00:13:34.762403', 'step': 1464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:34.825606', 'step': 1464, 'epoch': 1}
{'type': 'loss', 'content': 0.0426383800804615, 'timestamp': '2025-10-02 00:13:34.834077', 'step': 1465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:34.897657', 'step': 1465, 'epoch': 1}
{'type': 'loss', 'content': 0.09647233039140701, 'timestamp': '2025-10-02 00:13:34.900251', 'step': 1466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:34.957534', 'step': 1466, 'epoch': 1}
{'type': 'loss', 'content': 0.21515454351902008, 'timestamp': '2025-10-02 00:13:34.965061', 'step': 1467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:35.032573', 'step': 1467, 'epoch': 1}
{'type': 'loss', 'content': 0.039678871631622314, 'timestamp': '2025-10-02 00:13:35.039546', 'step': 1468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:35.099826', 'step': 1468, 'epoch': 1}
{'type': 'loss', 'content': 0.08923802524805069, 'timestamp': '2025-10-02 00:13:35.107959', 'step': 1469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:35.180310', 'step': 1469, 'epoch': 1}
{'type': 'loss', 'content': 0.04886584356427193, 'timestamp': '2025-10-02 00:13:35.189000', 'step': 1470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:35.253188', 'step': 1470, 'epoch': 1}
{'type': 'loss', 'content': 0.046704649925231934, 'timestamp': '2025-10-02 00:13:35.262440', 'step': 1471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:35.325681', 'step': 1471, 'epoch': 1}
{'type': 'loss', 'content': 0.10273806005716324, 'timestamp': '2025-10-02 00:13:35.336973', 'step': 1472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:35.409777', 'step': 1472, 'epoch': 1}
{'type': 'loss', 'content': 0.24376153945922852, 'timestamp': '2025-10-02 00:13:35.420392', 'step': 1473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:35.491271', 'step': 1473, 'epoch': 1}
{'type': 'loss', 'content': 0.042579881846904755, 'timestamp': '2025-10-02 00:13:35.501454', 'step': 1474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:35.572711', 'step': 1474, 'epoch': 1}
{'type': 'loss', 'content': 0.0924958735704422, 'timestamp': '2025-10-02 00:13:35.582014', 'step': 1475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:35.650639', 'step': 1475, 'epoch': 1}
{'type': 'loss', 'content': 0.06550834327936172, 'timestamp': '2025-10-02 00:13:35.662743', 'step': 1476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:35.723469', 'step': 1476, 'epoch': 1}
{'type': 'loss', 'content': 0.18235351145267487, 'timestamp': '2025-10-02 00:13:35.731875', 'step': 1477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:35.800074', 'step': 1477, 'epoch': 1}
{'type': 'loss', 'content': 0.0546739362180233, 'timestamp': '2025-10-02 00:13:35.803393', 'step': 1478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:35.872952', 'step': 1478, 'epoch': 1}
{'type': 'loss', 'content': 0.15416264533996582, 'timestamp': '2025-10-02 00:13:35.876286', 'step': 1479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:35.942537', 'step': 1479, 'epoch': 1}
{'type': 'loss', 'content': 0.050263889133930206, 'timestamp': '2025-10-02 00:13:35.953813', 'step': 1480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:36.010499', 'step': 1480, 'epoch': 1}
{'type': 'loss', 'content': 0.14970752596855164, 'timestamp': '2025-10-02 00:13:36.017207', 'step': 1481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:36.075289', 'step': 1481, 'epoch': 1}
{'type': 'loss', 'content': 0.04447191581130028, 'timestamp': '2025-10-02 00:13:36.082390', 'step': 1482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:36.140908', 'step': 1482, 'epoch': 1}
{'type': 'loss', 'content': 0.057604074478149414, 'timestamp': '2025-10-02 00:13:36.146846', 'step': 1483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:36.207634', 'step': 1483, 'epoch': 1}
{'type': 'loss', 'content': 0.014229495078325272, 'timestamp': '2025-10-02 00:13:36.220072', 'step': 1484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:36.289041', 'step': 1484, 'epoch': 1}
{'type': 'loss', 'content': 0.09236519783735275, 'timestamp': '2025-10-02 00:13:36.293314', 'step': 1485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:36.349201', 'step': 1485, 'epoch': 1}
{'type': 'loss', 'content': 0.3146783411502838, 'timestamp': '2025-10-02 00:13:36.351818', 'step': 1486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:36.417470', 'step': 1486, 'epoch': 1}
{'type': 'loss', 'content': 0.022361457347869873, 'timestamp': '2025-10-02 00:13:36.420343', 'step': 1487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:36.476358', 'step': 1487, 'epoch': 1}
{'type': 'loss', 'content': 0.16198159754276276, 'timestamp': '2025-10-02 00:13:36.483116', 'step': 1488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:36.538057', 'step': 1488, 'epoch': 1}
{'type': 'loss', 'content': 0.17443224787712097, 'timestamp': '2025-10-02 00:13:36.540547', 'step': 1489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:36.594571', 'step': 1489, 'epoch': 1}
{'type': 'loss', 'content': 0.29282134771347046, 'timestamp': '2025-10-02 00:13:36.596782', 'step': 1490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:36.650422', 'step': 1490, 'epoch': 1}
{'type': 'loss', 'content': 0.09181664139032364, 'timestamp': '2025-10-02 00:13:36.653198', 'step': 1491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:36.707147', 'step': 1491, 'epoch': 1}
{'type': 'loss', 'content': 0.13028018176555634, 'timestamp': '2025-10-02 00:13:36.715481', 'step': 1492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:36.768735', 'step': 1492, 'epoch': 1}
{'type': 'loss', 'content': 0.1320185363292694, 'timestamp': '2025-10-02 00:13:36.771371', 'step': 1493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:36.825180', 'step': 1493, 'epoch': 1}
{'type': 'loss', 'content': 0.058140262961387634, 'timestamp': '2025-10-02 00:13:36.827843', 'step': 1494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:36.882687', 'step': 1494, 'epoch': 1}
{'type': 'loss', 'content': 0.1810777634382248, 'timestamp': '2025-10-02 00:13:36.884878', 'step': 1495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:36.938722', 'step': 1495, 'epoch': 1}
{'type': 'loss', 'content': 0.14898504316806793, 'timestamp': '2025-10-02 00:13:36.944861', 'step': 1496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:36.999081', 'step': 1496, 'epoch': 1}
{'type': 'loss', 'content': 0.050837159156799316, 'timestamp': '2025-10-02 00:13:37.008596', 'step': 1497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:13:37.069741', 'step': 1497, 'epoch': 1}
{'type': 'loss', 'content': 0.0521506741642952, 'timestamp': '2025-10-02 00:13:37.080405', 'step': 1498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:37.134833', 'step': 1498, 'epoch': 1}
{'type': 'loss', 'content': 0.05064144730567932, 'timestamp': '2025-10-02 00:13:37.136829', 'step': 1499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:37.189996', 'step': 1499, 'epoch': 1}
{'type': 'loss', 'content': 0.13983730971813202, 'timestamp': '2025-10-02 00:13:37.195875', 'step': 1500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-10-02 00:13:37.720111', 'step': 1500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:37.778599', 'step': 1500, 'epoch': 1}
{'type': 'loss', 'content': 0.04263236001133919, 'timestamp': '2025-10-02 00:13:37.787162', 'step': 1501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:37.840818', 'step': 1501, 'epoch': 1}
{'type': 'loss', 'content': 0.06797756254673004, 'timestamp': '2025-10-02 00:13:37.843016', 'step': 1502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:37.898441', 'step': 1502, 'epoch': 1}
{'type': 'loss', 'content': 0.09123693406581879, 'timestamp': '2025-10-02 00:13:37.903689', 'step': 1503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:37.957304', 'step': 1503, 'epoch': 1}
{'type': 'loss', 'content': 0.10196022689342499, 'timestamp': '2025-10-02 00:13:37.963337', 'step': 1504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:38.017576', 'step': 1504, 'epoch': 1}
{'type': 'loss', 'content': 0.10029854625463486, 'timestamp': '2025-10-02 00:13:38.022973', 'step': 1505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:38.077102', 'step': 1505, 'epoch': 1}
{'type': 'loss', 'content': 0.2532627284526825, 'timestamp': '2025-10-02 00:13:38.080354', 'step': 1506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:38.143047', 'step': 1506, 'epoch': 1}
{'type': 'loss', 'content': 0.025833923369646072, 'timestamp': '2025-10-02 00:13:38.153177', 'step': 1507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:38.207394', 'step': 1507, 'epoch': 1}
{'type': 'loss', 'content': 0.08334238827228546, 'timestamp': '2025-10-02 00:13:38.213197', 'step': 1508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:38.266721', 'step': 1508, 'epoch': 1}
{'type': 'loss', 'content': 0.08631261438131332, 'timestamp': '2025-10-02 00:13:38.268622', 'step': 1509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:38.321711', 'step': 1509, 'epoch': 1}
{'type': 'loss', 'content': 0.21310898661613464, 'timestamp': '2025-10-02 00:13:38.323728', 'step': 1510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:38.377668', 'step': 1510, 'epoch': 1}
{'type': 'loss', 'content': 0.1691320240497589, 'timestamp': '2025-10-02 00:13:38.386488', 'step': 1511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:38.444108', 'step': 1511, 'epoch': 1}
{'type': 'loss', 'content': 0.1266842484474182, 'timestamp': '2025-10-02 00:13:38.451385', 'step': 1512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:38.504123', 'step': 1512, 'epoch': 1}
{'type': 'loss', 'content': 0.1025143414735794, 'timestamp': '2025-10-02 00:13:38.506334', 'step': 1513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:38.560044', 'step': 1513, 'epoch': 1}
{'type': 'loss', 'content': 0.0997937023639679, 'timestamp': '2025-10-02 00:13:38.561945', 'step': 1514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:38.616136', 'step': 1514, 'epoch': 1}
{'type': 'loss', 'content': 0.12613609433174133, 'timestamp': '2025-10-02 00:13:38.625002', 'step': 1515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:13:38.697916', 'step': 1515, 'epoch': 1}
{'type': 'loss', 'content': 0.03477941453456879, 'timestamp': '2025-10-02 00:13:38.710551', 'step': 1516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:38.767304', 'step': 1516, 'epoch': 1}
{'type': 'loss', 'content': 0.03274574875831604, 'timestamp': '2025-10-02 00:13:38.778193', 'step': 1517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:38.832088', 'step': 1517, 'epoch': 1}
{'type': 'loss', 'content': 0.0927000492811203, 'timestamp': '2025-10-02 00:13:38.834709', 'step': 1518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:38.895094', 'step': 1518, 'epoch': 1}
{'type': 'loss', 'content': 0.057262834161520004, 'timestamp': '2025-10-02 00:13:38.905514', 'step': 1519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:13:38.958568', 'step': 1519, 'epoch': 1}
{'type': 'loss', 'content': 0.3287079632282257, 'timestamp': '2025-10-02 00:13:38.964358', 'step': 1520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:39.016642', 'step': 1520, 'epoch': 1}
{'type': 'loss', 'content': 0.12716208398342133, 'timestamp': '2025-10-02 00:13:39.018904', 'step': 1521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:39.073487', 'step': 1521, 'epoch': 1}
{'type': 'loss', 'content': 0.05612208694219589, 'timestamp': '2025-10-02 00:13:39.075418', 'step': 1522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:39.128259', 'step': 1522, 'epoch': 1}
{'type': 'loss', 'content': 0.1667911410331726, 'timestamp': '2025-10-02 00:13:39.130406', 'step': 1523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:39.183653', 'step': 1523, 'epoch': 1}
{'type': 'loss', 'content': 0.02821292355656624, 'timestamp': '2025-10-02 00:13:39.189247', 'step': 1524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:39.243115', 'step': 1524, 'epoch': 1}
{'type': 'loss', 'content': 0.15211866796016693, 'timestamp': '2025-10-02 00:13:39.245617', 'step': 1525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:39.298871', 'step': 1525, 'epoch': 1}
{'type': 'loss', 'content': 0.09045631438493729, 'timestamp': '2025-10-02 00:13:39.301226', 'step': 1526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:39.356347', 'step': 1526, 'epoch': 1}
{'type': 'loss', 'content': 0.12368249893188477, 'timestamp': '2025-10-02 00:13:39.358379', 'step': 1527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:39.411072', 'step': 1527, 'epoch': 1}
{'type': 'loss', 'content': 0.12324733287096024, 'timestamp': '2025-10-02 00:13:39.417775', 'step': 1528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:39.470309', 'step': 1528, 'epoch': 1}
{'type': 'loss', 'content': 0.2167099267244339, 'timestamp': '2025-10-02 00:13:39.472791', 'step': 1529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:39.526281', 'step': 1529, 'epoch': 1}
{'type': 'loss', 'content': 0.2206561416387558, 'timestamp': '2025-10-02 00:13:39.528492', 'step': 1530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:39.581352', 'step': 1530, 'epoch': 1}
{'type': 'loss', 'content': 0.1143004447221756, 'timestamp': '2025-10-02 00:13:39.583359', 'step': 1531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:39.637463', 'step': 1531, 'epoch': 1}
{'type': 'loss', 'content': 0.0679512545466423, 'timestamp': '2025-10-02 00:13:39.645635', 'step': 1532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:39.698029', 'step': 1532, 'epoch': 1}
{'type': 'loss', 'content': 0.12633828818798065, 'timestamp': '2025-10-02 00:13:39.700809', 'step': 1533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:39.755187', 'step': 1533, 'epoch': 1}
{'type': 'loss', 'content': 0.0396469421684742, 'timestamp': '2025-10-02 00:13:39.764523', 'step': 1534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:39.819240', 'step': 1534, 'epoch': 1}
{'type': 'loss', 'content': 0.08698026090860367, 'timestamp': '2025-10-02 00:13:39.826434', 'step': 1535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:39.880391', 'step': 1535, 'epoch': 1}
{'type': 'loss', 'content': 0.13038702309131622, 'timestamp': '2025-10-02 00:13:39.886980', 'step': 1536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:39.940042', 'step': 1536, 'epoch': 1}
{'type': 'loss', 'content': 0.11513539403676987, 'timestamp': '2025-10-02 00:13:39.942441', 'step': 1537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:39.995930', 'step': 1537, 'epoch': 1}
{'type': 'loss', 'content': 0.08450762927532196, 'timestamp': '2025-10-02 00:13:39.998338', 'step': 1538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:40.051611', 'step': 1538, 'epoch': 1}
{'type': 'loss', 'content': 0.19191664457321167, 'timestamp': '2025-10-02 00:13:40.054213', 'step': 1539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:40.108294', 'step': 1539, 'epoch': 1}
{'type': 'loss', 'content': 0.13363949954509735, 'timestamp': '2025-10-02 00:13:40.115058', 'step': 1540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:40.168137', 'step': 1540, 'epoch': 1}
{'type': 'loss', 'content': 0.06675820052623749, 'timestamp': '2025-10-02 00:13:40.170929', 'step': 1541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:40.225063', 'step': 1541, 'epoch': 1}
{'type': 'loss', 'content': 0.07477638870477676, 'timestamp': '2025-10-02 00:13:40.227439', 'step': 1542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:40.288695', 'step': 1542, 'epoch': 1}
{'type': 'loss', 'content': 0.027788840234279633, 'timestamp': '2025-10-02 00:13:40.299178', 'step': 1543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:40.354706', 'step': 1543, 'epoch': 1}
{'type': 'loss', 'content': 0.05815216526389122, 'timestamp': '2025-10-02 00:13:40.365062', 'step': 1544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:40.418195', 'step': 1544, 'epoch': 1}
{'type': 'loss', 'content': 0.0682629868388176, 'timestamp': '2025-10-02 00:13:40.424213', 'step': 1545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:40.477976', 'step': 1545, 'epoch': 1}
{'type': 'loss', 'content': 0.0713922530412674, 'timestamp': '2025-10-02 00:13:40.480478', 'step': 1546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:40.534524', 'step': 1546, 'epoch': 1}
{'type': 'loss', 'content': 0.2370743453502655, 'timestamp': '2025-10-02 00:13:40.536707', 'step': 1547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:40.590601', 'step': 1547, 'epoch': 1}
{'type': 'loss', 'content': 0.13120193779468536, 'timestamp': '2025-10-02 00:13:40.596200', 'step': 1548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:13:40.661752', 'step': 1548, 'epoch': 1}
{'type': 'loss', 'content': 0.033181723207235336, 'timestamp': '2025-10-02 00:13:40.674744', 'step': 1549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:13:40.735962', 'step': 1549, 'epoch': 1}
{'type': 'loss', 'content': 0.030420703813433647, 'timestamp': '2025-10-02 00:13:40.746634', 'step': 1550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:40.800819', 'step': 1550, 'epoch': 1}
{'type': 'loss', 'content': 0.05732853710651398, 'timestamp': '2025-10-02 00:13:40.803048', 'step': 1551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:13:40.865343', 'step': 1551, 'epoch': 1}
{'type': 'loss', 'content': 0.04477686434984207, 'timestamp': '2025-10-02 00:13:40.876777', 'step': 1552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:40.937895', 'step': 1552, 'epoch': 1}
{'type': 'loss', 'content': 0.1661294847726822, 'timestamp': '2025-10-02 00:13:40.940537', 'step': 1553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:40.999332', 'step': 1553, 'epoch': 1}
{'type': 'loss', 'content': 0.10785199701786041, 'timestamp': '2025-10-02 00:13:41.001549', 'step': 1554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:41.059784', 'step': 1554, 'epoch': 1}
{'type': 'loss', 'content': 0.19133684039115906, 'timestamp': '2025-10-02 00:13:41.064847', 'step': 1555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:41.124928', 'step': 1555, 'epoch': 1}
{'type': 'loss', 'content': 0.03629430755972862, 'timestamp': '2025-10-02 00:13:41.132149', 'step': 1556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:41.191144', 'step': 1556, 'epoch': 1}
{'type': 'loss', 'content': 0.09863752871751785, 'timestamp': '2025-10-02 00:13:41.194279', 'step': 1557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:41.254338', 'step': 1557, 'epoch': 1}
{'type': 'loss', 'content': 0.0602908730506897, 'timestamp': '2025-10-02 00:13:41.257510', 'step': 1558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:41.316638', 'step': 1558, 'epoch': 1}
{'type': 'loss', 'content': 0.04005230963230133, 'timestamp': '2025-10-02 00:13:41.319376', 'step': 1559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:41.377743', 'step': 1559, 'epoch': 1}
{'type': 'loss', 'content': 0.1886948049068451, 'timestamp': '2025-10-02 00:13:41.384608', 'step': 1560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:41.440241', 'step': 1560, 'epoch': 1}
{'type': 'loss', 'content': 0.08154699206352234, 'timestamp': '2025-10-02 00:13:41.442879', 'step': 1561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:41.498622', 'step': 1561, 'epoch': 1}
{'type': 'loss', 'content': 0.039118196815252304, 'timestamp': '2025-10-02 00:13:41.508131', 'step': 1562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:41.562450', 'step': 1562, 'epoch': 1}
{'type': 'loss', 'content': 0.09933780878782272, 'timestamp': '2025-10-02 00:13:41.565760', 'step': 1563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:41.620134', 'step': 1563, 'epoch': 1}
{'type': 'loss', 'content': 0.053480956703424454, 'timestamp': '2025-10-02 00:13:41.626359', 'step': 1564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:41.679723', 'step': 1564, 'epoch': 1}
{'type': 'loss', 'content': 0.0775437206029892, 'timestamp': '2025-10-02 00:13:41.689267', 'step': 1565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:41.744714', 'step': 1565, 'epoch': 1}
{'type': 'loss', 'content': 0.02175424061715603, 'timestamp': '2025-10-02 00:13:41.754244', 'step': 1566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:41.812659', 'step': 1566, 'epoch': 1}
{'type': 'loss', 'content': 0.12958382070064545, 'timestamp': '2025-10-02 00:13:41.822764', 'step': 1567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:41.875843', 'step': 1567, 'epoch': 1}
{'type': 'loss', 'content': 0.1083144024014473, 'timestamp': '2025-10-02 00:13:41.883166', 'step': 1568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:41.936680', 'step': 1568, 'epoch': 1}
{'type': 'loss', 'content': 0.03372178226709366, 'timestamp': '2025-10-02 00:13:41.938900', 'step': 1569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:41.993926', 'step': 1569, 'epoch': 1}
{'type': 'loss', 'content': 0.180843785405159, 'timestamp': '2025-10-02 00:13:41.999355', 'step': 1570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:42.061886', 'step': 1570, 'epoch': 1}
{'type': 'loss', 'content': 0.043004319071769714, 'timestamp': '2025-10-02 00:13:42.072346', 'step': 1571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:42.127260', 'step': 1571, 'epoch': 1}
{'type': 'loss', 'content': 0.058263957500457764, 'timestamp': '2025-10-02 00:13:42.133558', 'step': 1572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:42.186327', 'step': 1572, 'epoch': 1}
{'type': 'loss', 'content': 0.2624801695346832, 'timestamp': '2025-10-02 00:13:42.188450', 'step': 1573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:42.241351', 'step': 1573, 'epoch': 1}
{'type': 'loss', 'content': 0.12926755845546722, 'timestamp': '2025-10-02 00:13:42.247197', 'step': 1574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:42.302057', 'step': 1574, 'epoch': 1}
{'type': 'loss', 'content': 0.15767133235931396, 'timestamp': '2025-10-02 00:13:42.304620', 'step': 1575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:42.357823', 'step': 1575, 'epoch': 1}
{'type': 'loss', 'content': 0.2888585329055786, 'timestamp': '2025-10-02 00:13:42.364387', 'step': 1576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:42.421067', 'step': 1576, 'epoch': 1}
{'type': 'loss', 'content': 0.0530044324696064, 'timestamp': '2025-10-02 00:13:42.429874', 'step': 1577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:42.484909', 'step': 1577, 'epoch': 1}
{'type': 'loss', 'content': 0.1250067949295044, 'timestamp': '2025-10-02 00:13:42.487299', 'step': 1578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:42.540203', 'step': 1578, 'epoch': 1}
{'type': 'loss', 'content': 0.18662606179714203, 'timestamp': '2025-10-02 00:13:42.542509', 'step': 1579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:42.595901', 'step': 1579, 'epoch': 1}
{'type': 'loss', 'content': 0.07003575563430786, 'timestamp': '2025-10-02 00:13:42.605936', 'step': 1580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:42.660318', 'step': 1580, 'epoch': 1}
{'type': 'loss', 'content': 0.14825421571731567, 'timestamp': '2025-10-02 00:13:42.662656', 'step': 1581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:42.716595', 'step': 1581, 'epoch': 1}
{'type': 'loss', 'content': 0.16511934995651245, 'timestamp': '2025-10-02 00:13:42.719269', 'step': 1582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:42.776502', 'step': 1582, 'epoch': 1}
{'type': 'loss', 'content': 0.07590720802545547, 'timestamp': '2025-10-02 00:13:42.782002', 'step': 1583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:42.836332', 'step': 1583, 'epoch': 1}
{'type': 'loss', 'content': 0.04481545463204384, 'timestamp': '2025-10-02 00:13:42.846077', 'step': 1584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:42.900604', 'step': 1584, 'epoch': 1}
{'type': 'loss', 'content': 0.026689162477850914, 'timestamp': '2025-10-02 00:13:42.906334', 'step': 1585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:42.960586', 'step': 1585, 'epoch': 1}
{'type': 'loss', 'content': 0.2426590472459793, 'timestamp': '2025-10-02 00:13:42.962993', 'step': 1586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:13:43.016929', 'step': 1586, 'epoch': 1}
{'type': 'loss', 'content': 0.12022223323583603, 'timestamp': '2025-10-02 00:13:43.019255', 'step': 1587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:43.073348', 'step': 1587, 'epoch': 1}
{'type': 'loss', 'content': 0.09983332455158234, 'timestamp': '2025-10-02 00:13:43.079898', 'step': 1588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:43.132026', 'step': 1588, 'epoch': 1}
{'type': 'loss', 'content': 0.16003085672855377, 'timestamp': '2025-10-02 00:13:43.134780', 'step': 1589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:43.189122', 'step': 1589, 'epoch': 1}
{'type': 'loss', 'content': 0.07342957705259323, 'timestamp': '2025-10-02 00:13:43.191292', 'step': 1590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:43.251810', 'step': 1590, 'epoch': 1}
{'type': 'loss', 'content': 0.09376605600118637, 'timestamp': '2025-10-02 00:13:43.254257', 'step': 1591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:43.308154', 'step': 1591, 'epoch': 1}
{'type': 'loss', 'content': 0.14675770699977875, 'timestamp': '2025-10-02 00:13:43.314342', 'step': 1592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:43.367830', 'step': 1592, 'epoch': 1}
{'type': 'loss', 'content': 0.05268692597746849, 'timestamp': '2025-10-02 00:13:43.370118', 'step': 1593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:43.424985', 'step': 1593, 'epoch': 1}
{'type': 'loss', 'content': 0.03274393454194069, 'timestamp': '2025-10-02 00:13:43.427794', 'step': 1594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:43.481893', 'step': 1594, 'epoch': 1}
{'type': 'loss', 'content': 0.20802991092205048, 'timestamp': '2025-10-02 00:13:43.484973', 'step': 1595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:43.538523', 'step': 1595, 'epoch': 1}
{'type': 'loss', 'content': 0.1527060717344284, 'timestamp': '2025-10-02 00:13:43.545296', 'step': 1596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:43.598462', 'step': 1596, 'epoch': 1}
{'type': 'loss', 'content': 0.08086562901735306, 'timestamp': '2025-10-02 00:13:43.601666', 'step': 1597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:43.656013', 'step': 1597, 'epoch': 1}
{'type': 'loss', 'content': 0.09147275239229202, 'timestamp': '2025-10-02 00:13:43.662032', 'step': 1598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:43.716730', 'step': 1598, 'epoch': 1}
{'type': 'loss', 'content': 0.09624785929918289, 'timestamp': '2025-10-02 00:13:43.720128', 'step': 1599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:13:43.774671', 'step': 1599, 'epoch': 1}
{'type': 'loss', 'content': 0.1417759209871292, 'timestamp': '2025-10-02 00:13:43.781180', 'step': 1600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:43.836092', 'step': 1600, 'epoch': 1}
{'type': 'loss', 'content': 0.17355404794216156, 'timestamp': '2025-10-02 00:13:43.839480', 'step': 1601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:43.894596', 'step': 1601, 'epoch': 1}
{'type': 'loss', 'content': 0.11427946388721466, 'timestamp': '2025-10-02 00:13:43.897513', 'step': 1602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:43.952309', 'step': 1602, 'epoch': 1}
{'type': 'loss', 'content': 0.11478651314973831, 'timestamp': '2025-10-02 00:13:43.955369', 'step': 1603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:44.011406', 'step': 1603, 'epoch': 1}
{'type': 'loss', 'content': 0.10575058311223984, 'timestamp': '2025-10-02 00:13:44.019573', 'step': 1604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:44.075462', 'step': 1604, 'epoch': 1}
{'type': 'loss', 'content': 0.03751330077648163, 'timestamp': '2025-10-02 00:13:44.078440', 'step': 1605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:44.134513', 'step': 1605, 'epoch': 1}
{'type': 'loss', 'content': 0.04940302297472954, 'timestamp': '2025-10-02 00:13:44.140427', 'step': 1606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:44.199629', 'step': 1606, 'epoch': 1}
{'type': 'loss', 'content': 0.21795713901519775, 'timestamp': '2025-10-02 00:13:44.202530', 'step': 1607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:44.258594', 'step': 1607, 'epoch': 1}
{'type': 'loss', 'content': 0.08681734651327133, 'timestamp': '2025-10-02 00:13:44.265392', 'step': 1608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:44.321837', 'step': 1608, 'epoch': 1}
{'type': 'loss', 'content': 0.0641876682639122, 'timestamp': '2025-10-02 00:13:44.324922', 'step': 1609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:44.380841', 'step': 1609, 'epoch': 1}
{'type': 'loss', 'content': 0.12092114984989166, 'timestamp': '2025-10-02 00:13:44.383830', 'step': 1610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:44.440858', 'step': 1610, 'epoch': 1}
{'type': 'loss', 'content': 0.053719066083431244, 'timestamp': '2025-10-02 00:13:44.450410', 'step': 1611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:44.506036', 'step': 1611, 'epoch': 1}
{'type': 'loss', 'content': 0.26176512241363525, 'timestamp': '2025-10-02 00:13:44.512514', 'step': 1612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:44.567625', 'step': 1612, 'epoch': 1}
{'type': 'loss', 'content': 0.03709130734205246, 'timestamp': '2025-10-02 00:13:44.570051', 'step': 1613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:44.626285', 'step': 1613, 'epoch': 1}
{'type': 'loss', 'content': 0.05989037826657295, 'timestamp': '2025-10-02 00:13:44.633882', 'step': 1614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:44.688303', 'step': 1614, 'epoch': 1}
{'type': 'loss', 'content': 0.12930138409137726, 'timestamp': '2025-10-02 00:13:44.691804', 'step': 1615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:44.748127', 'step': 1615, 'epoch': 1}
{'type': 'loss', 'content': 0.13454139232635498, 'timestamp': '2025-10-02 00:13:44.754563', 'step': 1616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:13:44.808263', 'step': 1616, 'epoch': 1}
{'type': 'loss', 'content': 0.3184809684753418, 'timestamp': '2025-10-02 00:13:44.811560', 'step': 1617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:44.867302', 'step': 1617, 'epoch': 1}
{'type': 'loss', 'content': 0.19579099118709564, 'timestamp': '2025-10-02 00:13:44.870399', 'step': 1618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:13:44.933242', 'step': 1618, 'epoch': 1}
{'type': 'loss', 'content': 0.06096704676747322, 'timestamp': '2025-10-02 00:13:44.943890', 'step': 1619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:44.999772', 'step': 1619, 'epoch': 1}
{'type': 'loss', 'content': 0.023328732699155807, 'timestamp': '2025-10-02 00:13:45.009918', 'step': 1620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:45.064354', 'step': 1620, 'epoch': 1}
{'type': 'loss', 'content': 0.17120017111301422, 'timestamp': '2025-10-02 00:13:45.067553', 'step': 1621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:45.121894', 'step': 1621, 'epoch': 1}
{'type': 'loss', 'content': 0.11237530410289764, 'timestamp': '2025-10-02 00:13:45.127826', 'step': 1622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:45.183721', 'step': 1622, 'epoch': 1}
{'type': 'loss', 'content': 0.12162856757640839, 'timestamp': '2025-10-02 00:13:45.186464', 'step': 1623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:45.241059', 'step': 1623, 'epoch': 1}
{'type': 'loss', 'content': 0.1068657636642456, 'timestamp': '2025-10-02 00:13:45.247502', 'step': 1624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:45.302489', 'step': 1624, 'epoch': 1}
{'type': 'loss', 'content': 0.0420910120010376, 'timestamp': '2025-10-02 00:13:45.305495', 'step': 1625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:45.362354', 'step': 1625, 'epoch': 1}
{'type': 'loss', 'content': 0.06205877289175987, 'timestamp': '2025-10-02 00:13:45.371920', 'step': 1626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:45.426831', 'step': 1626, 'epoch': 1}
{'type': 'loss', 'content': 0.09364096075296402, 'timestamp': '2025-10-02 00:13:45.434293', 'step': 1627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:45.491905', 'step': 1627, 'epoch': 1}
{'type': 'loss', 'content': 0.0850491151213646, 'timestamp': '2025-10-02 00:13:45.503238', 'step': 1628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:45.558051', 'step': 1628, 'epoch': 1}
{'type': 'loss', 'content': 0.06573101878166199, 'timestamp': '2025-10-02 00:13:45.566010', 'step': 1629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:45.623339', 'step': 1629, 'epoch': 1}
{'type': 'loss', 'content': 0.1062106043100357, 'timestamp': '2025-10-02 00:13:45.625756', 'step': 1630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:45.679354', 'step': 1630, 'epoch': 1}
{'type': 'loss', 'content': 0.16580477356910706, 'timestamp': '2025-10-02 00:13:45.681697', 'step': 1631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:45.736183', 'step': 1631, 'epoch': 1}
{'type': 'loss', 'content': 0.14950668811798096, 'timestamp': '2025-10-02 00:13:45.742251', 'step': 1632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:45.795135', 'step': 1632, 'epoch': 1}
{'type': 'loss', 'content': 0.09684113413095474, 'timestamp': '2025-10-02 00:13:45.797420', 'step': 1633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:45.850632', 'step': 1633, 'epoch': 1}
{'type': 'loss', 'content': 0.05747082084417343, 'timestamp': '2025-10-02 00:13:45.852911', 'step': 1634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:45.907326', 'step': 1634, 'epoch': 1}
{'type': 'loss', 'content': 0.08529429137706757, 'timestamp': '2025-10-02 00:13:45.913070', 'step': 1635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:13:45.983143', 'step': 1635, 'epoch': 1}
{'type': 'loss', 'content': 0.0469181090593338, 'timestamp': '2025-10-02 00:13:45.996546', 'step': 1636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:46.049011', 'step': 1636, 'epoch': 1}
{'type': 'loss', 'content': 0.13660922646522522, 'timestamp': '2025-10-02 00:13:46.051517', 'step': 1637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:46.104119', 'step': 1637, 'epoch': 1}
{'type': 'loss', 'content': 0.19886937737464905, 'timestamp': '2025-10-02 00:13:46.106122', 'step': 1638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:46.159165', 'step': 1638, 'epoch': 1}
{'type': 'loss', 'content': 0.0390903614461422, 'timestamp': '2025-10-02 00:13:46.161965', 'step': 1639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:46.217076', 'step': 1639, 'epoch': 1}
{'type': 'loss', 'content': 0.0514712929725647, 'timestamp': '2025-10-02 00:13:46.223094', 'step': 1640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:46.282305', 'step': 1640, 'epoch': 1}
{'type': 'loss', 'content': 0.08125846832990646, 'timestamp': '2025-10-02 00:13:46.293613', 'step': 1641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:46.348147', 'step': 1641, 'epoch': 1}
{'type': 'loss', 'content': 0.016033999621868134, 'timestamp': '2025-10-02 00:13:46.350507', 'step': 1642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:13:46.411488', 'step': 1642, 'epoch': 1}
{'type': 'loss', 'content': 0.11706975102424622, 'timestamp': '2025-10-02 00:13:46.422148', 'step': 1643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:46.475775', 'step': 1643, 'epoch': 1}
{'type': 'loss', 'content': 0.047274354845285416, 'timestamp': '2025-10-02 00:13:46.482334', 'step': 1644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:46.534982', 'step': 1644, 'epoch': 1}
{'type': 'loss', 'content': 0.11962374299764633, 'timestamp': '2025-10-02 00:13:46.544371', 'step': 1645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:46.598122', 'step': 1645, 'epoch': 1}
{'type': 'loss', 'content': 0.027598511427640915, 'timestamp': '2025-10-02 00:13:46.604039', 'step': 1646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:46.658045', 'step': 1646, 'epoch': 1}
{'type': 'loss', 'content': 0.1806730180978775, 'timestamp': '2025-10-02 00:13:46.660598', 'step': 1647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:46.713354', 'step': 1647, 'epoch': 1}
{'type': 'loss', 'content': 0.2019239217042923, 'timestamp': '2025-10-02 00:13:46.719506', 'step': 1648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:46.772470', 'step': 1648, 'epoch': 1}
{'type': 'loss', 'content': 0.08332667499780655, 'timestamp': '2025-10-02 00:13:46.778730', 'step': 1649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:46.851060', 'step': 1649, 'epoch': 1}
{'type': 'loss', 'content': 0.1918746680021286, 'timestamp': '2025-10-02 00:13:46.869502', 'step': 1650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:46.973616', 'step': 1650, 'epoch': 1}
{'type': 'loss', 'content': 0.1532614529132843, 'timestamp': '2025-10-02 00:13:46.992016', 'step': 1651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:47.049620', 'step': 1651, 'epoch': 1}
{'type': 'loss', 'content': 0.1056237667798996, 'timestamp': '2025-10-02 00:13:47.071736', 'step': 1652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:47.145751', 'step': 1652, 'epoch': 1}
{'type': 'loss', 'content': 0.17217156291007996, 'timestamp': '2025-10-02 00:13:47.163943', 'step': 1653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:47.249587', 'step': 1653, 'epoch': 1}
{'type': 'loss', 'content': 0.020902151241898537, 'timestamp': '2025-10-02 00:13:47.269707', 'step': 1654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:47.378779', 'step': 1654, 'epoch': 1}
{'type': 'loss', 'content': 0.15403883159160614, 'timestamp': '2025-10-02 00:13:47.395729', 'step': 1655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:47.494619', 'step': 1655, 'epoch': 1}
{'type': 'loss', 'content': 0.027567114681005478, 'timestamp': '2025-10-02 00:13:47.504670', 'step': 1656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:47.588572', 'step': 1656, 'epoch': 1}
{'type': 'loss', 'content': 0.057455968111753464, 'timestamp': '2025-10-02 00:13:47.599585', 'step': 1657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:47.679496', 'step': 1657, 'epoch': 1}
{'type': 'loss', 'content': 0.13268187642097473, 'timestamp': '2025-10-02 00:13:47.682629', 'step': 1658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:47.739171', 'step': 1658, 'epoch': 1}
{'type': 'loss', 'content': 0.1257985681295395, 'timestamp': '2025-10-02 00:13:47.742958', 'step': 1659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:47.799655', 'step': 1659, 'epoch': 1}
{'type': 'loss', 'content': 0.05658833310008049, 'timestamp': '2025-10-02 00:13:47.806569', 'step': 1660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:13:47.869931', 'step': 1660, 'epoch': 1}
{'type': 'loss', 'content': 0.09026850014925003, 'timestamp': '2025-10-02 00:13:47.881434', 'step': 1661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:13:47.948591', 'step': 1661, 'epoch': 1}
{'type': 'loss', 'content': 0.24017484486103058, 'timestamp': '2025-10-02 00:13:47.953926', 'step': 1662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:48.016794', 'step': 1662, 'epoch': 1}
{'type': 'loss', 'content': 0.24392010271549225, 'timestamp': '2025-10-02 00:13:48.026097', 'step': 1663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:48.090517', 'step': 1663, 'epoch': 1}
{'type': 'loss', 'content': 0.1970309615135193, 'timestamp': '2025-10-02 00:13:48.097768', 'step': 1664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:48.155137', 'step': 1664, 'epoch': 1}
{'type': 'loss', 'content': 0.05046187341213226, 'timestamp': '2025-10-02 00:13:48.164485', 'step': 1665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:48.225047', 'step': 1665, 'epoch': 1}
{'type': 'loss', 'content': 0.08212030678987503, 'timestamp': '2025-10-02 00:13:48.228222', 'step': 1666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:48.286740', 'step': 1666, 'epoch': 1}
{'type': 'loss', 'content': 0.12496517598628998, 'timestamp': '2025-10-02 00:13:48.290126', 'step': 1667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:48.346515', 'step': 1667, 'epoch': 1}
{'type': 'loss', 'content': 0.1526545286178589, 'timestamp': '2025-10-02 00:13:48.353944', 'step': 1668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:48.410243', 'step': 1668, 'epoch': 1}
{'type': 'loss', 'content': 0.1979817897081375, 'timestamp': '2025-10-02 00:13:48.413032', 'step': 1669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:48.471705', 'step': 1669, 'epoch': 1}
{'type': 'loss', 'content': 0.07864585518836975, 'timestamp': '2025-10-02 00:13:48.474919', 'step': 1670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:48.544623', 'step': 1670, 'epoch': 1}
{'type': 'loss', 'content': 0.06432192027568817, 'timestamp': '2025-10-02 00:13:48.554770', 'step': 1671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:48.615260', 'step': 1671, 'epoch': 1}
{'type': 'loss', 'content': 0.03267728164792061, 'timestamp': '2025-10-02 00:13:48.623754', 'step': 1672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:48.688960', 'step': 1672, 'epoch': 1}
{'type': 'loss', 'content': 0.10666190832853317, 'timestamp': '2025-10-02 00:13:48.692872', 'step': 1673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:48.751441', 'step': 1673, 'epoch': 1}
{'type': 'loss', 'content': 0.19562382996082306, 'timestamp': '2025-10-02 00:13:48.761465', 'step': 1674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:48.836992', 'step': 1674, 'epoch': 1}
{'type': 'loss', 'content': 0.116523377597332, 'timestamp': '2025-10-02 00:13:48.846534', 'step': 1675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:48.907637', 'step': 1675, 'epoch': 1}
{'type': 'loss', 'content': 0.0813954696059227, 'timestamp': '2025-10-02 00:13:48.914875', 'step': 1676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:48.983535', 'step': 1676, 'epoch': 1}
{'type': 'loss', 'content': 0.1701059192419052, 'timestamp': '2025-10-02 00:13:48.986482', 'step': 1677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:13:49.060201', 'step': 1677, 'epoch': 1}
{'type': 'loss', 'content': 0.051586516201496124, 'timestamp': '2025-10-02 00:13:49.071032', 'step': 1678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:13:49.134821', 'step': 1678, 'epoch': 1}
{'type': 'loss', 'content': 0.17049044370651245, 'timestamp': '2025-10-02 00:13:49.138003', 'step': 1679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:49.217312', 'step': 1679, 'epoch': 1}
{'type': 'loss', 'content': 0.04662102088332176, 'timestamp': '2025-10-02 00:13:49.228541', 'step': 1680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:49.284951', 'step': 1680, 'epoch': 1}
{'type': 'loss', 'content': 0.029260171577334404, 'timestamp': '2025-10-02 00:13:49.290667', 'step': 1681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:49.359675', 'step': 1681, 'epoch': 1}
{'type': 'loss', 'content': 0.12799414992332458, 'timestamp': '2025-10-02 00:13:49.362487', 'step': 1682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:49.427815', 'step': 1682, 'epoch': 1}
{'type': 'loss', 'content': 0.0696951299905777, 'timestamp': '2025-10-02 00:13:49.438070', 'step': 1683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:49.498790', 'step': 1683, 'epoch': 1}
{'type': 'loss', 'content': 0.1392732560634613, 'timestamp': '2025-10-02 00:13:49.510510', 'step': 1684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:49.567315', 'step': 1684, 'epoch': 1}
{'type': 'loss', 'content': 0.1947145164012909, 'timestamp': '2025-10-02 00:13:49.571240', 'step': 1685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:49.629850', 'step': 1685, 'epoch': 1}
{'type': 'loss', 'content': 0.06082002446055412, 'timestamp': '2025-10-02 00:13:49.639423', 'step': 1686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:49.701263', 'step': 1686, 'epoch': 1}
{'type': 'loss', 'content': 0.1120399683713913, 'timestamp': '2025-10-02 00:13:49.706378', 'step': 1687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:49.770122', 'step': 1687, 'epoch': 1}
{'type': 'loss', 'content': 0.026525968685746193, 'timestamp': '2025-10-02 00:13:49.777912', 'step': 1688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:49.842563', 'step': 1688, 'epoch': 1}
{'type': 'loss', 'content': 0.054735973477363586, 'timestamp': '2025-10-02 00:13:49.845287', 'step': 1689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:49.899678', 'step': 1689, 'epoch': 1}
{'type': 'loss', 'content': 0.11102086305618286, 'timestamp': '2025-10-02 00:13:49.904721', 'step': 1690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:49.980258', 'step': 1690, 'epoch': 1}
{'type': 'loss', 'content': 0.06907501071691513, 'timestamp': '2025-10-02 00:13:49.989357', 'step': 1691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:50.052311', 'step': 1691, 'epoch': 1}
{'type': 'loss', 'content': 0.05225064605474472, 'timestamp': '2025-10-02 00:13:50.063559', 'step': 1692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:13:50.142696', 'step': 1692, 'epoch': 1}
{'type': 'loss', 'content': 0.09654475748538971, 'timestamp': '2025-10-02 00:13:50.147550', 'step': 1693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:50.211338', 'step': 1693, 'epoch': 1}
{'type': 'loss', 'content': 0.015336040407419205, 'timestamp': '2025-10-02 00:13:50.220709', 'step': 1694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:50.283245', 'step': 1694, 'epoch': 1}
{'type': 'loss', 'content': 0.05900159850716591, 'timestamp': '2025-10-02 00:13:50.286648', 'step': 1695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:50.349256', 'step': 1695, 'epoch': 1}
{'type': 'loss', 'content': 0.06379429250955582, 'timestamp': '2025-10-02 00:13:50.360307', 'step': 1696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:50.439315', 'step': 1696, 'epoch': 1}
{'type': 'loss', 'content': 0.09908943623304367, 'timestamp': '2025-10-02 00:13:50.442343', 'step': 1697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:50.498958', 'step': 1697, 'epoch': 1}
{'type': 'loss', 'content': 0.24578483402729034, 'timestamp': '2025-10-02 00:13:50.502594', 'step': 1698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:50.571181', 'step': 1698, 'epoch': 1}
{'type': 'loss', 'content': 0.058815233409404755, 'timestamp': '2025-10-02 00:13:50.575719', 'step': 1699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:50.636759', 'step': 1699, 'epoch': 1}
{'type': 'loss', 'content': 0.10122073441743851, 'timestamp': '2025-10-02 00:13:50.643300', 'step': 1700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:50.703708', 'step': 1700, 'epoch': 1}
{'type': 'loss', 'content': 0.0278372373431921, 'timestamp': '2025-10-02 00:13:50.715524', 'step': 1701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:50.795718', 'step': 1701, 'epoch': 1}
{'type': 'loss', 'content': 0.0712709054350853, 'timestamp': '2025-10-02 00:13:50.802866', 'step': 1702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:50.884805', 'step': 1702, 'epoch': 1}
{'type': 'loss', 'content': 0.16717715561389923, 'timestamp': '2025-10-02 00:13:50.895508', 'step': 1703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:50.963323', 'step': 1703, 'epoch': 1}
{'type': 'loss', 'content': 0.0455191470682621, 'timestamp': '2025-10-02 00:13:50.980444', 'step': 1704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:51.067707', 'step': 1704, 'epoch': 1}
{'type': 'loss', 'content': 0.034606028348207474, 'timestamp': '2025-10-02 00:13:51.078229', 'step': 1705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:51.160499', 'step': 1705, 'epoch': 1}
{'type': 'loss', 'content': 0.09014645963907242, 'timestamp': '2025-10-02 00:13:51.172765', 'step': 1706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:51.249845', 'step': 1706, 'epoch': 1}
{'type': 'loss', 'content': 0.0759170800447464, 'timestamp': '2025-10-02 00:13:51.260815', 'step': 1707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:51.337176', 'step': 1707, 'epoch': 1}
{'type': 'loss', 'content': 0.10302355140447617, 'timestamp': '2025-10-02 00:13:51.350857', 'step': 1708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:51.423377', 'step': 1708, 'epoch': 1}
{'type': 'loss', 'content': 0.12225735187530518, 'timestamp': '2025-10-02 00:13:51.434250', 'step': 1709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:51.512961', 'step': 1709, 'epoch': 1}
{'type': 'loss', 'content': 0.09983732551336288, 'timestamp': '2025-10-02 00:13:51.527594', 'step': 1710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:51.610757', 'step': 1710, 'epoch': 1}
{'type': 'loss', 'content': 0.11401563882827759, 'timestamp': '2025-10-02 00:13:51.623900', 'step': 1711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:51.704955', 'step': 1711, 'epoch': 1}
{'type': 'loss', 'content': 0.206134632229805, 'timestamp': '2025-10-02 00:13:51.716932', 'step': 1712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:51.794360', 'step': 1712, 'epoch': 1}
{'type': 'loss', 'content': 0.20997254550457, 'timestamp': '2025-10-02 00:13:51.803223', 'step': 1713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:51.881020', 'step': 1713, 'epoch': 1}
{'type': 'loss', 'content': 0.03494331240653992, 'timestamp': '2025-10-02 00:13:51.891006', 'step': 1714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:51.969928', 'step': 1714, 'epoch': 1}
{'type': 'loss', 'content': 0.07978406548500061, 'timestamp': '2025-10-02 00:13:51.976794', 'step': 1715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:52.054594', 'step': 1715, 'epoch': 1}
{'type': 'loss', 'content': 0.19161562621593475, 'timestamp': '2025-10-02 00:13:52.070690', 'step': 1716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:52.148402', 'step': 1716, 'epoch': 1}
{'type': 'loss', 'content': 0.07800236344337463, 'timestamp': '2025-10-02 00:13:52.161383', 'step': 1717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:52.237010', 'step': 1717, 'epoch': 1}
{'type': 'loss', 'content': 0.07105205953121185, 'timestamp': '2025-10-02 00:13:52.245705', 'step': 1718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:13:52.329170', 'step': 1718, 'epoch': 1}
{'type': 'loss', 'content': 0.08157779276371002, 'timestamp': '2025-10-02 00:13:52.339847', 'step': 1719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:52.412798', 'step': 1719, 'epoch': 1}
{'type': 'loss', 'content': 0.03370572254061699, 'timestamp': '2025-10-02 00:13:52.422832', 'step': 1720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:52.489021', 'step': 1720, 'epoch': 1}
{'type': 'loss', 'content': 0.1761375069618225, 'timestamp': '2025-10-02 00:13:52.492085', 'step': 1721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:13:52.562341', 'step': 1721, 'epoch': 1}
{'type': 'loss', 'content': 0.08273832499980927, 'timestamp': '2025-10-02 00:13:52.574310', 'step': 1722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:52.631540', 'step': 1722, 'epoch': 1}
{'type': 'loss', 'content': 0.03761407360434532, 'timestamp': '2025-10-02 00:13:52.635590', 'step': 1723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:52.697505', 'step': 1723, 'epoch': 1}
{'type': 'loss', 'content': 0.1233377531170845, 'timestamp': '2025-10-02 00:13:52.706809', 'step': 1724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:52.768730', 'step': 1724, 'epoch': 1}
{'type': 'loss', 'content': 0.09434708207845688, 'timestamp': '2025-10-02 00:13:52.774608', 'step': 1725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:52.851302', 'step': 1725, 'epoch': 1}
{'type': 'loss', 'content': 0.04607989639043808, 'timestamp': '2025-10-02 00:13:52.863391', 'step': 1726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:52.937374', 'step': 1726, 'epoch': 1}
{'type': 'loss', 'content': 0.04127497598528862, 'timestamp': '2025-10-02 00:13:52.949909', 'step': 1727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:53.015369', 'step': 1727, 'epoch': 1}
{'type': 'loss', 'content': 0.11328849196434021, 'timestamp': '2025-10-02 00:13:53.021879', 'step': 1728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:53.081037', 'step': 1728, 'epoch': 1}
{'type': 'loss', 'content': 0.062474168837070465, 'timestamp': '2025-10-02 00:13:53.084483', 'step': 1729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:53.141623', 'step': 1729, 'epoch': 1}
{'type': 'loss', 'content': 0.1810912787914276, 'timestamp': '2025-10-02 00:13:53.145663', 'step': 1730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:53.203824', 'step': 1730, 'epoch': 1}
{'type': 'loss', 'content': 0.015238828957080841, 'timestamp': '2025-10-02 00:13:53.211386', 'step': 1731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:53.268629', 'step': 1731, 'epoch': 1}
{'type': 'loss', 'content': 0.13415206968784332, 'timestamp': '2025-10-02 00:13:53.275988', 'step': 1732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:53.346493', 'step': 1732, 'epoch': 1}
{'type': 'loss', 'content': 0.06209415942430496, 'timestamp': '2025-10-02 00:13:53.353303', 'step': 1733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:53.414840', 'step': 1733, 'epoch': 1}
{'type': 'loss', 'content': 0.06389673054218292, 'timestamp': '2025-10-02 00:13:53.420757', 'step': 1734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:53.477944', 'step': 1734, 'epoch': 1}
{'type': 'loss', 'content': 0.029916275292634964, 'timestamp': '2025-10-02 00:13:53.481770', 'step': 1735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:53.540534', 'step': 1735, 'epoch': 1}
{'type': 'loss', 'content': 0.17411406338214874, 'timestamp': '2025-10-02 00:13:53.546913', 'step': 1736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:53.605493', 'step': 1736, 'epoch': 1}
{'type': 'loss', 'content': 0.15469276905059814, 'timestamp': '2025-10-02 00:13:53.609523', 'step': 1737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:53.674670', 'step': 1737, 'epoch': 1}
{'type': 'loss', 'content': 0.12455596029758453, 'timestamp': '2025-10-02 00:13:53.678353', 'step': 1738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:53.736055', 'step': 1738, 'epoch': 1}
{'type': 'loss', 'content': 0.05906539037823677, 'timestamp': '2025-10-02 00:13:53.739728', 'step': 1739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:53.810900', 'step': 1739, 'epoch': 1}
{'type': 'loss', 'content': 0.101267971098423, 'timestamp': '2025-10-02 00:13:53.826785', 'step': 1740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:53.885350', 'step': 1740, 'epoch': 1}
{'type': 'loss', 'content': 0.10631463676691055, 'timestamp': '2025-10-02 00:13:53.888757', 'step': 1741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:53.946813', 'step': 1741, 'epoch': 1}
{'type': 'loss', 'content': 0.1384720355272293, 'timestamp': '2025-10-02 00:13:53.956657', 'step': 1742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:54.016740', 'step': 1742, 'epoch': 1}
{'type': 'loss', 'content': 0.053277041763067245, 'timestamp': '2025-10-02 00:13:54.020287', 'step': 1743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:54.094652', 'step': 1743, 'epoch': 1}
{'type': 'loss', 'content': 0.06642182171344757, 'timestamp': '2025-10-02 00:13:54.101399', 'step': 1744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:54.160401', 'step': 1744, 'epoch': 1}
{'type': 'loss', 'content': 0.015393666923046112, 'timestamp': '2025-10-02 00:13:54.163898', 'step': 1745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:54.225113', 'step': 1745, 'epoch': 1}
{'type': 'loss', 'content': 0.18058760464191437, 'timestamp': '2025-10-02 00:13:54.228637', 'step': 1746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:54.295403', 'step': 1746, 'epoch': 1}
{'type': 'loss', 'content': 0.28868958353996277, 'timestamp': '2025-10-02 00:13:54.299243', 'step': 1747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:54.356240', 'step': 1747, 'epoch': 1}
{'type': 'loss', 'content': 0.09767773002386093, 'timestamp': '2025-10-02 00:13:54.364977', 'step': 1748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:54.422400', 'step': 1748, 'epoch': 1}
{'type': 'loss', 'content': 0.11367882788181305, 'timestamp': '2025-10-02 00:13:54.426661', 'step': 1749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:54.485689', 'step': 1749, 'epoch': 1}
{'type': 'loss', 'content': 0.05647807568311691, 'timestamp': '2025-10-02 00:13:54.491123', 'step': 1750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:54.574185', 'step': 1750, 'epoch': 1}
{'type': 'loss', 'content': 0.06139698997139931, 'timestamp': '2025-10-02 00:13:54.577443', 'step': 1751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:54.648819', 'step': 1751, 'epoch': 1}
{'type': 'loss', 'content': 0.10547444224357605, 'timestamp': '2025-10-02 00:13:54.659743', 'step': 1752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:54.722717', 'step': 1752, 'epoch': 1}
{'type': 'loss', 'content': 0.0352126806974411, 'timestamp': '2025-10-02 00:13:54.728443', 'step': 1753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:54.802518', 'step': 1753, 'epoch': 1}
{'type': 'loss', 'content': 0.09850365668535233, 'timestamp': '2025-10-02 00:13:54.814048', 'step': 1754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:54.874858', 'step': 1754, 'epoch': 1}
{'type': 'loss', 'content': 0.14344701170921326, 'timestamp': '2025-10-02 00:13:54.878303', 'step': 1755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:54.950394', 'step': 1755, 'epoch': 1}
{'type': 'loss', 'content': 0.07677175104618073, 'timestamp': '2025-10-02 00:13:54.958464', 'step': 1756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:55.017689', 'step': 1756, 'epoch': 1}
{'type': 'loss', 'content': 0.16814924776554108, 'timestamp': '2025-10-02 00:13:55.029630', 'step': 1757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:55.101982', 'step': 1757, 'epoch': 1}
{'type': 'loss', 'content': 0.10623288154602051, 'timestamp': '2025-10-02 00:13:55.104345', 'step': 1758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:55.165287', 'step': 1758, 'epoch': 1}
{'type': 'loss', 'content': 0.08198944479227066, 'timestamp': '2025-10-02 00:13:55.170952', 'step': 1759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:55.233293', 'step': 1759, 'epoch': 1}
{'type': 'loss', 'content': 0.110767662525177, 'timestamp': '2025-10-02 00:13:55.244265', 'step': 1760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:55.301348', 'step': 1760, 'epoch': 1}
{'type': 'loss', 'content': 0.14866885542869568, 'timestamp': '2025-10-02 00:13:55.308523', 'step': 1761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:55.367696', 'step': 1761, 'epoch': 1}
{'type': 'loss', 'content': 0.2259301096200943, 'timestamp': '2025-10-02 00:13:55.372009', 'step': 1762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:55.431840', 'step': 1762, 'epoch': 1}
{'type': 'loss', 'content': 0.2632802128791809, 'timestamp': '2025-10-02 00:13:55.435548', 'step': 1763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:55.504198', 'step': 1763, 'epoch': 1}
{'type': 'loss', 'content': 0.04754377156496048, 'timestamp': '2025-10-02 00:13:55.510743', 'step': 1764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:55.569743', 'step': 1764, 'epoch': 1}
{'type': 'loss', 'content': 0.12194424867630005, 'timestamp': '2025-10-02 00:13:55.580113', 'step': 1765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:55.647677', 'step': 1765, 'epoch': 1}
{'type': 'loss', 'content': 0.1065484881401062, 'timestamp': '2025-10-02 00:13:55.657156', 'step': 1766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:13:55.735797', 'step': 1766, 'epoch': 1}
{'type': 'loss', 'content': 0.0399596206843853, 'timestamp': '2025-10-02 00:13:55.746618', 'step': 1767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:55.815061', 'step': 1767, 'epoch': 1}
{'type': 'loss', 'content': 0.07786030322313309, 'timestamp': '2025-10-02 00:13:55.826033', 'step': 1768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:55.881932', 'step': 1768, 'epoch': 1}
{'type': 'loss', 'content': 0.08772914111614227, 'timestamp': '2025-10-02 00:13:55.885069', 'step': 1769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:55.940642', 'step': 1769, 'epoch': 1}
{'type': 'loss', 'content': 0.11807297170162201, 'timestamp': '2025-10-02 00:13:55.950524', 'step': 1770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:56.017808', 'step': 1770, 'epoch': 1}
{'type': 'loss', 'content': 0.054875697940588, 'timestamp': '2025-10-02 00:13:56.025246', 'step': 1771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:13:56.095731', 'step': 1771, 'epoch': 1}
{'type': 'loss', 'content': 0.07171012461185455, 'timestamp': '2025-10-02 00:13:56.106678', 'step': 1772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:56.171026', 'step': 1772, 'epoch': 1}
{'type': 'loss', 'content': 0.24560751020908356, 'timestamp': '2025-10-02 00:13:56.184033', 'step': 1773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:56.261422', 'step': 1773, 'epoch': 1}
{'type': 'loss', 'content': 0.07670050859451294, 'timestamp': '2025-10-02 00:13:56.275930', 'step': 1774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:56.358876', 'step': 1774, 'epoch': 1}
{'type': 'loss', 'content': 0.11140840500593185, 'timestamp': '2025-10-02 00:13:56.367777', 'step': 1775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:13:56.472828', 'step': 1775, 'epoch': 1}
{'type': 'loss', 'content': 0.058066461235284805, 'timestamp': '2025-10-02 00:13:56.488342', 'step': 1776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:56.571679', 'step': 1776, 'epoch': 1}
{'type': 'loss', 'content': 0.1357322782278061, 'timestamp': '2025-10-02 00:13:56.590282', 'step': 1777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:56.664128', 'step': 1777, 'epoch': 1}
{'type': 'loss', 'content': 0.057276032865047455, 'timestamp': '2025-10-02 00:13:56.681119', 'step': 1778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:56.763581', 'step': 1778, 'epoch': 1}
{'type': 'loss', 'content': 0.02425628900527954, 'timestamp': '2025-10-02 00:13:56.776221', 'step': 1779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:56.862861', 'step': 1779, 'epoch': 1}
{'type': 'loss', 'content': 0.09558941423892975, 'timestamp': '2025-10-02 00:13:56.884792', 'step': 1780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:56.983992', 'step': 1780, 'epoch': 1}
{'type': 'loss', 'content': 0.21390332281589508, 'timestamp': '2025-10-02 00:13:56.988664', 'step': 1781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:57.050864', 'step': 1781, 'epoch': 1}
{'type': 'loss', 'content': 0.09242883324623108, 'timestamp': '2025-10-02 00:13:57.054805', 'step': 1782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:57.117961', 'step': 1782, 'epoch': 1}
{'type': 'loss', 'content': 0.13715849816799164, 'timestamp': '2025-10-02 00:13:57.122881', 'step': 1783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:57.203711', 'step': 1783, 'epoch': 1}
{'type': 'loss', 'content': 0.07404151558876038, 'timestamp': '2025-10-02 00:13:57.213612', 'step': 1784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:13:57.282818', 'step': 1784, 'epoch': 1}
{'type': 'loss', 'content': 0.15826518833637238, 'timestamp': '2025-10-02 00:13:57.285494', 'step': 1785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:57.351123', 'step': 1785, 'epoch': 1}
{'type': 'loss', 'content': 0.11609986424446106, 'timestamp': '2025-10-02 00:13:57.360447', 'step': 1786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:57.420798', 'step': 1786, 'epoch': 1}
{'type': 'loss', 'content': 0.04190349578857422, 'timestamp': '2025-10-02 00:13:57.434728', 'step': 1787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:57.500769', 'step': 1787, 'epoch': 1}
{'type': 'loss', 'content': 0.09685002267360687, 'timestamp': '2025-10-02 00:13:57.509135', 'step': 1788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:57.569932', 'step': 1788, 'epoch': 1}
{'type': 'loss', 'content': 0.046849094331264496, 'timestamp': '2025-10-02 00:13:57.580293', 'step': 1789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:57.638670', 'step': 1789, 'epoch': 1}
{'type': 'loss', 'content': 0.05520295724272728, 'timestamp': '2025-10-02 00:13:57.651942', 'step': 1790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:57.713151', 'step': 1790, 'epoch': 1}
{'type': 'loss', 'content': 0.07774855941534042, 'timestamp': '2025-10-02 00:13:57.717397', 'step': 1791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:13:57.781615', 'step': 1791, 'epoch': 1}
{'type': 'loss', 'content': 0.07214191555976868, 'timestamp': '2025-10-02 00:13:57.789424', 'step': 1792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:57.846597', 'step': 1792, 'epoch': 1}
{'type': 'loss', 'content': 0.06229567900300026, 'timestamp': '2025-10-02 00:13:57.856881', 'step': 1793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:57.914073', 'step': 1793, 'epoch': 1}
{'type': 'loss', 'content': 0.016164211556315422, 'timestamp': '2025-10-02 00:13:57.917674', 'step': 1794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:57.974448', 'step': 1794, 'epoch': 1}
{'type': 'loss', 'content': 0.12877818942070007, 'timestamp': '2025-10-02 00:13:57.977365', 'step': 1795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:13:58.036943', 'step': 1795, 'epoch': 1}
{'type': 'loss', 'content': 0.13159772753715515, 'timestamp': '2025-10-02 00:13:58.044366', 'step': 1796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:58.102156', 'step': 1796, 'epoch': 1}
{'type': 'loss', 'content': 0.1494256854057312, 'timestamp': '2025-10-02 00:13:58.106388', 'step': 1797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:58.164604', 'step': 1797, 'epoch': 1}
{'type': 'loss', 'content': 0.10518413037061691, 'timestamp': '2025-10-02 00:13:58.168446', 'step': 1798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:58.242738', 'step': 1798, 'epoch': 1}
{'type': 'loss', 'content': 0.19929078221321106, 'timestamp': '2025-10-02 00:13:58.246075', 'step': 1799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:13:58.308149', 'step': 1799, 'epoch': 1}
{'type': 'loss', 'content': 0.026726167649030685, 'timestamp': '2025-10-02 00:13:58.316382', 'step': 1800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:13:58.402491', 'step': 1800, 'epoch': 1}
{'type': 'loss', 'content': 0.024838201701641083, 'timestamp': '2025-10-02 00:13:58.418975', 'step': 1801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:58.483961', 'step': 1801, 'epoch': 1}
{'type': 'loss', 'content': 0.024024737998843193, 'timestamp': '2025-10-02 00:13:58.494461', 'step': 1802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:13:58.565519', 'step': 1802, 'epoch': 1}
{'type': 'loss', 'content': 0.16123349964618683, 'timestamp': '2025-10-02 00:13:58.575004', 'step': 1803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:13:58.633078', 'step': 1803, 'epoch': 1}
{'type': 'loss', 'content': 0.16441309452056885, 'timestamp': '2025-10-02 00:13:58.641274', 'step': 1804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:58.706735', 'step': 1804, 'epoch': 1}
{'type': 'loss', 'content': 0.04068395867943764, 'timestamp': '2025-10-02 00:13:58.713965', 'step': 1805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:13:58.775952', 'step': 1805, 'epoch': 1}
{'type': 'loss', 'content': 0.12047536671161652, 'timestamp': '2025-10-02 00:13:58.780655', 'step': 1806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:58.839489', 'step': 1806, 'epoch': 1}
{'type': 'loss', 'content': 0.1627676635980606, 'timestamp': '2025-10-02 00:13:58.844282', 'step': 1807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:58.914791', 'step': 1807, 'epoch': 1}
{'type': 'loss', 'content': 0.2439933717250824, 'timestamp': '2025-10-02 00:13:58.921015', 'step': 1808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:58.982306', 'step': 1808, 'epoch': 1}
{'type': 'loss', 'content': 0.08071957528591156, 'timestamp': '2025-10-02 00:13:58.989523', 'step': 1809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:13:59.057220', 'step': 1809, 'epoch': 1}
{'type': 'loss', 'content': 0.32587969303131104, 'timestamp': '2025-10-02 00:13:59.059920', 'step': 1810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:59.117499', 'step': 1810, 'epoch': 1}
{'type': 'loss', 'content': 0.14363807439804077, 'timestamp': '2025-10-02 00:13:59.122071', 'step': 1811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:59.180299', 'step': 1811, 'epoch': 1}
{'type': 'loss', 'content': 0.05775003880262375, 'timestamp': '2025-10-02 00:13:59.190345', 'step': 1812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:13:59.252017', 'step': 1812, 'epoch': 1}
{'type': 'loss', 'content': 0.021196721121668816, 'timestamp': '2025-10-02 00:13:59.267406', 'step': 1813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:13:59.345675', 'step': 1813, 'epoch': 1}
{'type': 'loss', 'content': 0.12201941758394241, 'timestamp': '2025-10-02 00:13:59.355030', 'step': 1814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:13:59.412765', 'step': 1814, 'epoch': 1}
{'type': 'loss', 'content': 0.08611995726823807, 'timestamp': '2025-10-02 00:13:59.417761', 'step': 1815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:13:59.484468', 'step': 1815, 'epoch': 1}
{'type': 'loss', 'content': 0.20399156212806702, 'timestamp': '2025-10-02 00:13:59.491586', 'step': 1816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:13:59.566644', 'step': 1816, 'epoch': 1}
{'type': 'loss', 'content': 0.017446592450141907, 'timestamp': '2025-10-02 00:13:59.578003', 'step': 1817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:59.638620', 'step': 1817, 'epoch': 1}
{'type': 'loss', 'content': 0.03760146722197533, 'timestamp': '2025-10-02 00:13:59.644106', 'step': 1818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:13:59.707776', 'step': 1818, 'epoch': 1}
{'type': 'loss', 'content': 0.058777716010808945, 'timestamp': '2025-10-02 00:13:59.722779', 'step': 1819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:13:59.807265', 'step': 1819, 'epoch': 1}
{'type': 'loss', 'content': 0.07210396230220795, 'timestamp': '2025-10-02 00:13:59.820765', 'step': 1820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:13:59.896991', 'step': 1820, 'epoch': 1}
{'type': 'loss', 'content': 0.10433609038591385, 'timestamp': '2025-10-02 00:13:59.907907', 'step': 1821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:13:59.997999', 'step': 1821, 'epoch': 1}
{'type': 'loss', 'content': 0.019359007477760315, 'timestamp': '2025-10-02 00:14:00.010372', 'step': 1822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:00.086918', 'step': 1822, 'epoch': 1}
{'type': 'loss', 'content': 0.20285288989543915, 'timestamp': '2025-10-02 00:14:00.098482', 'step': 1823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:14:00.197477', 'step': 1823, 'epoch': 1}
{'type': 'loss', 'content': 0.026318365707993507, 'timestamp': '2025-10-02 00:14:00.212997', 'step': 1824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:00.276799', 'step': 1824, 'epoch': 1}
{'type': 'loss', 'content': 0.2784781754016876, 'timestamp': '2025-10-02 00:14:00.288730', 'step': 1825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:14:00.369610', 'step': 1825, 'epoch': 1}
{'type': 'loss', 'content': 0.07579725980758667, 'timestamp': '2025-10-02 00:14:00.380464', 'step': 1826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:00.455903', 'step': 1826, 'epoch': 1}
{'type': 'loss', 'content': 0.12211913615465164, 'timestamp': '2025-10-02 00:14:00.465355', 'step': 1827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:00.558792', 'step': 1827, 'epoch': 1}
{'type': 'loss', 'content': 0.09189164638519287, 'timestamp': '2025-10-02 00:14:00.580115', 'step': 1828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:00.674205', 'step': 1828, 'epoch': 1}
{'type': 'loss', 'content': 0.172374427318573, 'timestamp': '2025-10-02 00:14:00.678059', 'step': 1829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:00.750622', 'step': 1829, 'epoch': 1}
{'type': 'loss', 'content': 0.10591313987970352, 'timestamp': '2025-10-02 00:14:00.760050', 'step': 1830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:00.816177', 'step': 1830, 'epoch': 1}
{'type': 'loss', 'content': 0.036332618445158005, 'timestamp': '2025-10-02 00:14:00.819299', 'step': 1831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:00.884469', 'step': 1831, 'epoch': 1}
{'type': 'loss', 'content': 0.11502860486507416, 'timestamp': '2025-10-02 00:14:00.892977', 'step': 1832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:00.963205', 'step': 1832, 'epoch': 1}
{'type': 'loss', 'content': 0.08534254878759384, 'timestamp': '2025-10-02 00:14:00.974487', 'step': 1833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:01.035510', 'step': 1833, 'epoch': 1}
{'type': 'loss', 'content': 0.17768117785453796, 'timestamp': '2025-10-02 00:14:01.040559', 'step': 1834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:01.102444', 'step': 1834, 'epoch': 1}
{'type': 'loss', 'content': 0.23140749335289001, 'timestamp': '2025-10-02 00:14:01.106800', 'step': 1835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:01.171794', 'step': 1835, 'epoch': 1}
{'type': 'loss', 'content': 0.13717593252658844, 'timestamp': '2025-10-02 00:14:01.187692', 'step': 1836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:01.246026', 'step': 1836, 'epoch': 1}
{'type': 'loss', 'content': 0.12010963261127472, 'timestamp': '2025-10-02 00:14:01.258064', 'step': 1837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:01.333997', 'step': 1837, 'epoch': 1}
{'type': 'loss', 'content': 0.16531573235988617, 'timestamp': '2025-10-02 00:14:01.337537', 'step': 1838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:01.409019', 'step': 1838, 'epoch': 1}
{'type': 'loss', 'content': 0.11025591194629669, 'timestamp': '2025-10-02 00:14:01.420908', 'step': 1839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:01.493469', 'step': 1839, 'epoch': 1}
{'type': 'loss', 'content': 0.13592252135276794, 'timestamp': '2025-10-02 00:14:01.501829', 'step': 1840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:01.562087', 'step': 1840, 'epoch': 1}
{'type': 'loss', 'content': 0.20959250628948212, 'timestamp': '2025-10-02 00:14:01.565577', 'step': 1841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:01.636318', 'step': 1841, 'epoch': 1}
{'type': 'loss', 'content': 0.08992529660463333, 'timestamp': '2025-10-02 00:14:01.639565', 'step': 1842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:14:01.727137', 'step': 1842, 'epoch': 1}
{'type': 'loss', 'content': 0.021176094189286232, 'timestamp': '2025-10-02 00:14:01.739271', 'step': 1843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:01.806823', 'step': 1843, 'epoch': 1}
{'type': 'loss', 'content': 0.1586974710226059, 'timestamp': '2025-10-02 00:14:01.814893', 'step': 1844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:01.872827', 'step': 1844, 'epoch': 1}
{'type': 'loss', 'content': 0.04466506838798523, 'timestamp': '2025-10-02 00:14:01.879329', 'step': 1845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:01.943056', 'step': 1845, 'epoch': 1}
{'type': 'loss', 'content': 0.023609856143593788, 'timestamp': '2025-10-02 00:14:01.953469', 'step': 1846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:02.010538', 'step': 1846, 'epoch': 1}
{'type': 'loss', 'content': 0.05515522137284279, 'timestamp': '2025-10-02 00:14:02.014056', 'step': 1847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:02.080297', 'step': 1847, 'epoch': 1}
{'type': 'loss', 'content': 0.04290284588932991, 'timestamp': '2025-10-02 00:14:02.090200', 'step': 1848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:02.161495', 'step': 1848, 'epoch': 1}
{'type': 'loss', 'content': 0.11100049316883087, 'timestamp': '2025-10-02 00:14:02.171426', 'step': 1849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:02.249843', 'step': 1849, 'epoch': 1}
{'type': 'loss', 'content': 0.13289254903793335, 'timestamp': '2025-10-02 00:14:02.262579', 'step': 1850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:02.344681', 'step': 1850, 'epoch': 1}
{'type': 'loss', 'content': 0.22924326360225677, 'timestamp': '2025-10-02 00:14:02.348245', 'step': 1851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:02.405702', 'step': 1851, 'epoch': 1}
{'type': 'loss', 'content': 0.03418809175491333, 'timestamp': '2025-10-02 00:14:02.413524', 'step': 1852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:02.488946', 'step': 1852, 'epoch': 1}
{'type': 'loss', 'content': 0.04784710705280304, 'timestamp': '2025-10-02 00:14:02.499621', 'step': 1853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:02.592361', 'step': 1853, 'epoch': 1}
{'type': 'loss', 'content': 0.07529543340206146, 'timestamp': '2025-10-02 00:14:02.595606', 'step': 1854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:02.653933', 'step': 1854, 'epoch': 1}
{'type': 'loss', 'content': 0.2708908021450043, 'timestamp': '2025-10-02 00:14:02.671510', 'step': 1855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:02.759889', 'step': 1855, 'epoch': 1}
{'type': 'loss', 'content': 0.18791453540325165, 'timestamp': '2025-10-02 00:14:02.769237', 'step': 1856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:02.828945', 'step': 1856, 'epoch': 1}
{'type': 'loss', 'content': 0.16140024363994598, 'timestamp': '2025-10-02 00:14:02.832533', 'step': 1857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:02.910677', 'step': 1857, 'epoch': 1}
{'type': 'loss', 'content': 0.019529927521944046, 'timestamp': '2025-10-02 00:14:02.922465', 'step': 1858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:14:03.003540', 'step': 1858, 'epoch': 1}
{'type': 'loss', 'content': 0.08089706301689148, 'timestamp': '2025-10-02 00:14:03.014094', 'step': 1859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:14:03.094100', 'step': 1859, 'epoch': 1}
{'type': 'loss', 'content': 0.14256194233894348, 'timestamp': '2025-10-02 00:14:03.107240', 'step': 1860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:14:03.182187', 'step': 1860, 'epoch': 1}
{'type': 'loss', 'content': 0.09299834072589874, 'timestamp': '2025-10-02 00:14:03.193681', 'step': 1861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:03.265360', 'step': 1861, 'epoch': 1}
{'type': 'loss', 'content': 0.05155254900455475, 'timestamp': '2025-10-02 00:14:03.274708', 'step': 1862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:03.353267', 'step': 1862, 'epoch': 1}
{'type': 'loss', 'content': 0.04109129682183266, 'timestamp': '2025-10-02 00:14:03.363890', 'step': 1863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:03.434821', 'step': 1863, 'epoch': 1}
{'type': 'loss', 'content': 0.08997049182653427, 'timestamp': '2025-10-02 00:14:03.441736', 'step': 1864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:03.509801', 'step': 1864, 'epoch': 1}
{'type': 'loss', 'content': 0.15749342739582062, 'timestamp': '2025-10-02 00:14:03.512921', 'step': 1865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:03.586773', 'step': 1865, 'epoch': 1}
{'type': 'loss', 'content': 0.04248945042490959, 'timestamp': '2025-10-02 00:14:03.596463', 'step': 1866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:03.675383', 'step': 1866, 'epoch': 1}
{'type': 'loss', 'content': 0.11720627546310425, 'timestamp': '2025-10-02 00:14:03.683507', 'step': 1867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:03.752463', 'step': 1867, 'epoch': 1}
{'type': 'loss', 'content': 0.05037393420934677, 'timestamp': '2025-10-02 00:14:03.765598', 'step': 1868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:03.827912', 'step': 1868, 'epoch': 1}
{'type': 'loss', 'content': 0.1161346584558487, 'timestamp': '2025-10-02 00:14:03.838106', 'step': 1869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:03.899740', 'step': 1869, 'epoch': 1}
{'type': 'loss', 'content': 0.07542793452739716, 'timestamp': '2025-10-02 00:14:03.908993', 'step': 1870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:03.991170', 'step': 1870, 'epoch': 1}
{'type': 'loss', 'content': 0.15093520283699036, 'timestamp': '2025-10-02 00:14:04.003145', 'step': 1871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:04.074069', 'step': 1871, 'epoch': 1}
{'type': 'loss', 'content': 0.1021626889705658, 'timestamp': '2025-10-02 00:14:04.084388', 'step': 1872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:04.150527', 'step': 1872, 'epoch': 1}
{'type': 'loss', 'content': 0.07089661061763763, 'timestamp': '2025-10-02 00:14:04.160013', 'step': 1873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:04.224343', 'step': 1873, 'epoch': 1}
{'type': 'loss', 'content': 0.19104038178920746, 'timestamp': '2025-10-02 00:14:04.227419', 'step': 1874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:04.292684', 'step': 1874, 'epoch': 1}
{'type': 'loss', 'content': 0.06936129182577133, 'timestamp': '2025-10-02 00:14:04.302852', 'step': 1875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:14:04.382650', 'step': 1875, 'epoch': 1}
{'type': 'loss', 'content': 0.040379948914051056, 'timestamp': '2025-10-02 00:14:04.395884', 'step': 1876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:04.458991', 'step': 1876, 'epoch': 1}
{'type': 'loss', 'content': 0.21364730596542358, 'timestamp': '2025-10-02 00:14:04.462394', 'step': 1877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:04.526533', 'step': 1877, 'epoch': 1}
{'type': 'loss', 'content': 0.10807251185178757, 'timestamp': '2025-10-02 00:14:04.530825', 'step': 1878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:04.594554', 'step': 1878, 'epoch': 1}
{'type': 'loss', 'content': 0.047467924654483795, 'timestamp': '2025-10-02 00:14:04.604758', 'step': 1879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:04.674737', 'step': 1879, 'epoch': 1}
{'type': 'loss', 'content': 0.06187722459435463, 'timestamp': '2025-10-02 00:14:04.685644', 'step': 1880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:04.754450', 'step': 1880, 'epoch': 1}
{'type': 'loss', 'content': 0.023232104256749153, 'timestamp': '2025-10-02 00:14:04.765430', 'step': 1881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:04.832189', 'step': 1881, 'epoch': 1}
{'type': 'loss', 'content': 0.05239678546786308, 'timestamp': '2025-10-02 00:14:04.841761', 'step': 1882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:04.909862', 'step': 1882, 'epoch': 1}
{'type': 'loss', 'content': 0.21394506096839905, 'timestamp': '2025-10-02 00:14:04.916705', 'step': 1883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:04.981269', 'step': 1883, 'epoch': 1}
{'type': 'loss', 'content': 0.06827681511640549, 'timestamp': '2025-10-02 00:14:04.991225', 'step': 1884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:05.054948', 'step': 1884, 'epoch': 1}
{'type': 'loss', 'content': 0.08004060387611389, 'timestamp': '2025-10-02 00:14:05.060074', 'step': 1885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:05.121864', 'step': 1885, 'epoch': 1}
{'type': 'loss', 'content': 0.18773356080055237, 'timestamp': '2025-10-02 00:14:05.128694', 'step': 1886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:05.192537', 'step': 1886, 'epoch': 1}
{'type': 'loss', 'content': 0.07474203407764435, 'timestamp': '2025-10-02 00:14:05.199808', 'step': 1887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:05.260815', 'step': 1887, 'epoch': 1}
{'type': 'loss', 'content': 0.0413314513862133, 'timestamp': '2025-10-02 00:14:05.273844', 'step': 1888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:05.348166', 'step': 1888, 'epoch': 1}
{'type': 'loss', 'content': 0.05295967683196068, 'timestamp': '2025-10-02 00:14:05.358464', 'step': 1889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:05.420230', 'step': 1889, 'epoch': 1}
{'type': 'loss', 'content': 0.06273345649242401, 'timestamp': '2025-10-02 00:14:05.423459', 'step': 1890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:05.489650', 'step': 1890, 'epoch': 1}
{'type': 'loss', 'content': 0.16971202194690704, 'timestamp': '2025-10-02 00:14:05.495997', 'step': 1891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:05.560653', 'step': 1891, 'epoch': 1}
{'type': 'loss', 'content': 0.12774483859539032, 'timestamp': '2025-10-02 00:14:05.570539', 'step': 1892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:05.635613', 'step': 1892, 'epoch': 1}
{'type': 'loss', 'content': 0.040602147579193115, 'timestamp': '2025-10-02 00:14:05.641930', 'step': 1893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:05.707600', 'step': 1893, 'epoch': 1}
{'type': 'loss', 'content': 0.08235495537519455, 'timestamp': '2025-10-02 00:14:05.716925', 'step': 1894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:05.785988', 'step': 1894, 'epoch': 1}
{'type': 'loss', 'content': 0.03480346128344536, 'timestamp': '2025-10-02 00:14:05.793316', 'step': 1895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:05.857514', 'step': 1895, 'epoch': 1}
{'type': 'loss', 'content': 0.04983267933130264, 'timestamp': '2025-10-02 00:14:05.872405', 'step': 1896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:05.940981', 'step': 1896, 'epoch': 1}
{'type': 'loss', 'content': 0.12522977590560913, 'timestamp': '2025-10-02 00:14:05.943545', 'step': 1897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:06.007894', 'step': 1897, 'epoch': 1}
{'type': 'loss', 'content': 0.09137000888586044, 'timestamp': '2025-10-02 00:14:06.017251', 'step': 1898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:06.078631', 'step': 1898, 'epoch': 1}
{'type': 'loss', 'content': 0.12291412055492401, 'timestamp': '2025-10-02 00:14:06.081039', 'step': 1899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:06.134654', 'step': 1899, 'epoch': 1}
{'type': 'loss', 'content': 0.2312360554933548, 'timestamp': '2025-10-02 00:14:06.140381', 'step': 1900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:06.193379', 'step': 1900, 'epoch': 1}
{'type': 'loss', 'content': 0.05392494797706604, 'timestamp': '2025-10-02 00:14:06.195624', 'step': 1901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:06.248310', 'step': 1901, 'epoch': 1}
{'type': 'loss', 'content': 0.1484144926071167, 'timestamp': '2025-10-02 00:14:06.250414', 'step': 1902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:06.303150', 'step': 1902, 'epoch': 1}
{'type': 'loss', 'content': 0.14860063791275024, 'timestamp': '2025-10-02 00:14:06.305326', 'step': 1903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:06.358908', 'step': 1903, 'epoch': 1}
{'type': 'loss', 'content': 0.0884508416056633, 'timestamp': '2025-10-02 00:14:06.364527', 'step': 1904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:06.418583', 'step': 1904, 'epoch': 1}
{'type': 'loss', 'content': 0.12427626550197601, 'timestamp': '2025-10-02 00:14:06.420775', 'step': 1905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:06.474596', 'step': 1905, 'epoch': 1}
{'type': 'loss', 'content': 0.0688927099108696, 'timestamp': '2025-10-02 00:14:06.476825', 'step': 1906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:06.532137', 'step': 1906, 'epoch': 1}
{'type': 'loss', 'content': 0.059434689581394196, 'timestamp': '2025-10-02 00:14:06.541701', 'step': 1907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:06.595988', 'step': 1907, 'epoch': 1}
{'type': 'loss', 'content': 0.20015980303287506, 'timestamp': '2025-10-02 00:14:06.601902', 'step': 1908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:06.654814', 'step': 1908, 'epoch': 1}
{'type': 'loss', 'content': 0.18961367011070251, 'timestamp': '2025-10-02 00:14:06.657129', 'step': 1909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:06.710373', 'step': 1909, 'epoch': 1}
{'type': 'loss', 'content': 0.1359727680683136, 'timestamp': '2025-10-02 00:14:06.712761', 'step': 1910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:06.766445', 'step': 1910, 'epoch': 1}
{'type': 'loss', 'content': 0.11934056133031845, 'timestamp': '2025-10-02 00:14:06.768676', 'step': 1911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:06.822485', 'step': 1911, 'epoch': 1}
{'type': 'loss', 'content': 0.05623162165284157, 'timestamp': '2025-10-02 00:14:06.828338', 'step': 1912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:06.882238', 'step': 1912, 'epoch': 1}
{'type': 'loss', 'content': 0.08594737201929092, 'timestamp': '2025-10-02 00:14:06.889756', 'step': 1913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:06.943490', 'step': 1913, 'epoch': 1}
{'type': 'loss', 'content': 0.18659387528896332, 'timestamp': '2025-10-02 00:14:06.945717', 'step': 1914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:07.005308', 'step': 1914, 'epoch': 1}
{'type': 'loss', 'content': 0.05596550181508064, 'timestamp': '2025-10-02 00:14:07.015541', 'step': 1915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:07.070102', 'step': 1915, 'epoch': 1}
{'type': 'loss', 'content': 0.05881378427147865, 'timestamp': '2025-10-02 00:14:07.076195', 'step': 1916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:07.131022', 'step': 1916, 'epoch': 1}
{'type': 'loss', 'content': 0.03619726374745369, 'timestamp': '2025-10-02 00:14:07.141314', 'step': 1917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:07.194710', 'step': 1917, 'epoch': 1}
{'type': 'loss', 'content': 0.059712477028369904, 'timestamp': '2025-10-02 00:14:07.204081', 'step': 1918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:07.257467', 'step': 1918, 'epoch': 1}
{'type': 'loss', 'content': 0.1470484882593155, 'timestamp': '2025-10-02 00:14:07.260105', 'step': 1919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:07.313712', 'step': 1919, 'epoch': 1}
{'type': 'loss', 'content': 0.25708532333374023, 'timestamp': '2025-10-02 00:14:07.319470', 'step': 1920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:07.372575', 'step': 1920, 'epoch': 1}
{'type': 'loss', 'content': 0.058627884835004807, 'timestamp': '2025-10-02 00:14:07.378560', 'step': 1921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:07.433491', 'step': 1921, 'epoch': 1}
{'type': 'loss', 'content': 0.09165512770414352, 'timestamp': '2025-10-02 00:14:07.442993', 'step': 1922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:07.495652', 'step': 1922, 'epoch': 1}
{'type': 'loss', 'content': 0.16782720386981964, 'timestamp': '2025-10-02 00:14:07.498347', 'step': 1923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:07.552039', 'step': 1923, 'epoch': 1}
{'type': 'loss', 'content': 0.1086573377251625, 'timestamp': '2025-10-02 00:14:07.557543', 'step': 1924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:07.610334', 'step': 1924, 'epoch': 1}
{'type': 'loss', 'content': 0.11276204138994217, 'timestamp': '2025-10-02 00:14:07.612607', 'step': 1925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:07.666484', 'step': 1925, 'epoch': 1}
{'type': 'loss', 'content': 0.10503225028514862, 'timestamp': '2025-10-02 00:14:07.672249', 'step': 1926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:07.726160', 'step': 1926, 'epoch': 1}
{'type': 'loss', 'content': 0.13592898845672607, 'timestamp': '2025-10-02 00:14:07.733465', 'step': 1927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:07.786406', 'step': 1927, 'epoch': 1}
{'type': 'loss', 'content': 0.027550524100661278, 'timestamp': '2025-10-02 00:14:07.796585', 'step': 1928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:07.849848', 'step': 1928, 'epoch': 1}
{'type': 'loss', 'content': 0.10400302708148956, 'timestamp': '2025-10-02 00:14:07.852186', 'step': 1929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:07.912666', 'step': 1929, 'epoch': 1}
{'type': 'loss', 'content': 0.09261426329612732, 'timestamp': '2025-10-02 00:14:07.914800', 'step': 1930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:07.968895', 'step': 1930, 'epoch': 1}
{'type': 'loss', 'content': 0.04541522264480591, 'timestamp': '2025-10-02 00:14:07.978259', 'step': 1931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:08.031533', 'step': 1931, 'epoch': 1}
{'type': 'loss', 'content': 0.16473384201526642, 'timestamp': '2025-10-02 00:14:08.037118', 'step': 1932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:08.090315', 'step': 1932, 'epoch': 1}
{'type': 'loss', 'content': 0.06311561912298203, 'timestamp': '2025-10-02 00:14:08.096187', 'step': 1933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:08.149410', 'step': 1933, 'epoch': 1}
{'type': 'loss', 'content': 0.19901831448078156, 'timestamp': '2025-10-02 00:14:08.153041', 'step': 1934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:08.206657', 'step': 1934, 'epoch': 1}
{'type': 'loss', 'content': 0.07192578166723251, 'timestamp': '2025-10-02 00:14:08.209053', 'step': 1935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:08.262368', 'step': 1935, 'epoch': 1}
{'type': 'loss', 'content': 0.15690460801124573, 'timestamp': '2025-10-02 00:14:08.268477', 'step': 1936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:08.320152', 'step': 1936, 'epoch': 1}
{'type': 'loss', 'content': 0.25115084648132324, 'timestamp': '2025-10-02 00:14:08.322275', 'step': 1937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:08.375036', 'step': 1937, 'epoch': 1}
{'type': 'loss', 'content': 0.09306776523590088, 'timestamp': '2025-10-02 00:14:08.377422', 'step': 1938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:08.431492', 'step': 1938, 'epoch': 1}
{'type': 'loss', 'content': 0.09334275871515274, 'timestamp': '2025-10-02 00:14:08.437229', 'step': 1939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:08.491438', 'step': 1939, 'epoch': 1}
{'type': 'loss', 'content': 0.04158684238791466, 'timestamp': '2025-10-02 00:14:08.501857', 'step': 1940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:08.555051', 'step': 1940, 'epoch': 1}
{'type': 'loss', 'content': 0.1068517416715622, 'timestamp': '2025-10-02 00:14:08.560926', 'step': 1941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:08.614663', 'step': 1941, 'epoch': 1}
{'type': 'loss', 'content': 0.097820945084095, 'timestamp': '2025-10-02 00:14:08.617075', 'step': 1942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:08.670634', 'step': 1942, 'epoch': 1}
{'type': 'loss', 'content': 0.18741653859615326, 'timestamp': '2025-10-02 00:14:08.672547', 'step': 1943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:08.727091', 'step': 1943, 'epoch': 1}
{'type': 'loss', 'content': 0.05171247944235802, 'timestamp': '2025-10-02 00:14:08.737220', 'step': 1944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:08.792484', 'step': 1944, 'epoch': 1}
{'type': 'loss', 'content': 0.06251110136508942, 'timestamp': '2025-10-02 00:14:08.795234', 'step': 1945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:08.848839', 'step': 1945, 'epoch': 1}
{'type': 'loss', 'content': 0.2028118222951889, 'timestamp': '2025-10-02 00:14:08.851307', 'step': 1946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:08.905006', 'step': 1946, 'epoch': 1}
{'type': 'loss', 'content': 0.10899105668067932, 'timestamp': '2025-10-02 00:14:08.907331', 'step': 1947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:08.961635', 'step': 1947, 'epoch': 1}
{'type': 'loss', 'content': 0.025422656908631325, 'timestamp': '2025-10-02 00:14:08.967432', 'step': 1948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:09.019955', 'step': 1948, 'epoch': 1}
{'type': 'loss', 'content': 0.1640271693468094, 'timestamp': '2025-10-02 00:14:09.022842', 'step': 1949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:09.078040', 'step': 1949, 'epoch': 1}
{'type': 'loss', 'content': 0.19110026955604553, 'timestamp': '2025-10-02 00:14:09.080820', 'step': 1950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:09.137284', 'step': 1950, 'epoch': 1}
{'type': 'loss', 'content': 0.21607714891433716, 'timestamp': '2025-10-02 00:14:09.139578', 'step': 1951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:09.193223', 'step': 1951, 'epoch': 1}
{'type': 'loss', 'content': 0.06016693264245987, 'timestamp': '2025-10-02 00:14:09.199488', 'step': 1952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:09.252780', 'step': 1952, 'epoch': 1}
{'type': 'loss', 'content': 0.06816580146551132, 'timestamp': '2025-10-02 00:14:09.258697', 'step': 1953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:09.314002', 'step': 1953, 'epoch': 1}
{'type': 'loss', 'content': 0.04073754698038101, 'timestamp': '2025-10-02 00:14:09.323364', 'step': 1954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:09.379149', 'step': 1954, 'epoch': 1}
{'type': 'loss', 'content': 0.09397388249635696, 'timestamp': '2025-10-02 00:14:09.381880', 'step': 1955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:09.439332', 'step': 1955, 'epoch': 1}
{'type': 'loss', 'content': 0.10749347507953644, 'timestamp': '2025-10-02 00:14:09.445800', 'step': 1956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:09.503182', 'step': 1956, 'epoch': 1}
{'type': 'loss', 'content': 0.11973102390766144, 'timestamp': '2025-10-02 00:14:09.514141', 'step': 1957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:09.568501', 'step': 1957, 'epoch': 1}
{'type': 'loss', 'content': 0.15531964600086212, 'timestamp': '2025-10-02 00:14:09.571643', 'step': 1958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:09.627154', 'step': 1958, 'epoch': 1}
{'type': 'loss', 'content': 0.07827316224575043, 'timestamp': '2025-10-02 00:14:09.634408', 'step': 1959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:09.690634', 'step': 1959, 'epoch': 1}
{'type': 'loss', 'content': 0.0641016960144043, 'timestamp': '2025-10-02 00:14:09.697360', 'step': 1960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:09.750876', 'step': 1960, 'epoch': 1}
{'type': 'loss', 'content': 0.2647241950035095, 'timestamp': '2025-10-02 00:14:09.753869', 'step': 1961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:09.808168', 'step': 1961, 'epoch': 1}
{'type': 'loss', 'content': 0.2302180826663971, 'timestamp': '2025-10-02 00:14:09.810460', 'step': 1962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:09.866916', 'step': 1962, 'epoch': 1}
{'type': 'loss', 'content': 0.1281210035085678, 'timestamp': '2025-10-02 00:14:09.869380', 'step': 1963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:09.923737', 'step': 1963, 'epoch': 1}
{'type': 'loss', 'content': 0.17628830671310425, 'timestamp': '2025-10-02 00:14:09.930351', 'step': 1964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:09.985169', 'step': 1964, 'epoch': 1}
{'type': 'loss', 'content': 0.08804886788129807, 'timestamp': '2025-10-02 00:14:09.987980', 'step': 1965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:10.042903', 'step': 1965, 'epoch': 1}
{'type': 'loss', 'content': 0.2083219289779663, 'timestamp': '2025-10-02 00:14:10.045764', 'step': 1966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:10.100604', 'step': 1966, 'epoch': 1}
{'type': 'loss', 'content': 0.11661220341920853, 'timestamp': '2025-10-02 00:14:10.107982', 'step': 1967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:10.163427', 'step': 1967, 'epoch': 1}
{'type': 'loss', 'content': 0.061432383954524994, 'timestamp': '2025-10-02 00:14:10.169681', 'step': 1968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:10.223845', 'step': 1968, 'epoch': 1}
{'type': 'loss', 'content': 0.24483099579811096, 'timestamp': '2025-10-02 00:14:10.226691', 'step': 1969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:10.281583', 'step': 1969, 'epoch': 1}
{'type': 'loss', 'content': 0.08704578131437302, 'timestamp': '2025-10-02 00:14:10.284025', 'step': 1970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:10.339990', 'step': 1970, 'epoch': 1}
{'type': 'loss', 'content': 0.062005672603845596, 'timestamp': '2025-10-02 00:14:10.349316', 'step': 1971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:10.405290', 'step': 1971, 'epoch': 1}
{'type': 'loss', 'content': 0.09337172657251358, 'timestamp': '2025-10-02 00:14:10.411883', 'step': 1972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:10.467041', 'step': 1972, 'epoch': 1}
{'type': 'loss', 'content': 0.11011247336864471, 'timestamp': '2025-10-02 00:14:10.469697', 'step': 1973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:10.524705', 'step': 1973, 'epoch': 1}
{'type': 'loss', 'content': 0.07666773349046707, 'timestamp': '2025-10-02 00:14:10.528175', 'step': 1974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:10.583870', 'step': 1974, 'epoch': 1}
{'type': 'loss', 'content': 0.07415945082902908, 'timestamp': '2025-10-02 00:14:10.586757', 'step': 1975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:10.645209', 'step': 1975, 'epoch': 1}
{'type': 'loss', 'content': 0.06324619799852371, 'timestamp': '2025-10-02 00:14:10.651541', 'step': 1976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:10.706237', 'step': 1976, 'epoch': 1}
{'type': 'loss', 'content': 0.11321748793125153, 'timestamp': '2025-10-02 00:14:10.712113', 'step': 1977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:10.767638', 'step': 1977, 'epoch': 1}
{'type': 'loss', 'content': 0.04485200718045235, 'timestamp': '2025-10-02 00:14:10.776994', 'step': 1978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:10.831957', 'step': 1978, 'epoch': 1}
{'type': 'loss', 'content': 0.05755145475268364, 'timestamp': '2025-10-02 00:14:10.837700', 'step': 1979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:10.898222', 'step': 1979, 'epoch': 1}
{'type': 'loss', 'content': 0.07227788120508194, 'timestamp': '2025-10-02 00:14:10.909162', 'step': 1980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:10.969982', 'step': 1980, 'epoch': 1}
{'type': 'loss', 'content': 0.04391038790345192, 'timestamp': '2025-10-02 00:14:10.981309', 'step': 1981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:11.034758', 'step': 1981, 'epoch': 1}
{'type': 'loss', 'content': 0.1479477882385254, 'timestamp': '2025-10-02 00:14:11.036944', 'step': 1982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:11.090772', 'step': 1982, 'epoch': 1}
{'type': 'loss', 'content': 0.052369095385074615, 'timestamp': '2025-10-02 00:14:11.098150', 'step': 1983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:11.158248', 'step': 1983, 'epoch': 1}
{'type': 'loss', 'content': 0.04241035133600235, 'timestamp': '2025-10-02 00:14:11.169471', 'step': 1984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:11.222120', 'step': 1984, 'epoch': 1}
{'type': 'loss', 'content': 0.10341968387365341, 'timestamp': '2025-10-02 00:14:11.224551', 'step': 1985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:11.278385', 'step': 1985, 'epoch': 1}
{'type': 'loss', 'content': 0.05265069007873535, 'timestamp': '2025-10-02 00:14:11.281021', 'step': 1986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:11.335305', 'step': 1986, 'epoch': 1}
{'type': 'loss', 'content': 0.16577167809009552, 'timestamp': '2025-10-02 00:14:11.338281', 'step': 1987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:11.391293', 'step': 1987, 'epoch': 1}
{'type': 'loss', 'content': 0.09983652085065842, 'timestamp': '2025-10-02 00:14:11.397089', 'step': 1988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:11.450196', 'step': 1988, 'epoch': 1}
{'type': 'loss', 'content': 0.13056841492652893, 'timestamp': '2025-10-02 00:14:11.452783', 'step': 1989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:14:11.514115', 'step': 1989, 'epoch': 1}
{'type': 'loss', 'content': 0.049156613647937775, 'timestamp': '2025-10-02 00:14:11.524969', 'step': 1990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:11.578602', 'step': 1990, 'epoch': 1}
{'type': 'loss', 'content': 0.1627970039844513, 'timestamp': '2025-10-02 00:14:11.580881', 'step': 1991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:11.635074', 'step': 1991, 'epoch': 1}
{'type': 'loss', 'content': 0.15260976552963257, 'timestamp': '2025-10-02 00:14:11.640935', 'step': 1992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:14:11.706342', 'step': 1992, 'epoch': 1}
{'type': 'loss', 'content': 0.03275681287050247, 'timestamp': '2025-10-02 00:14:11.719336', 'step': 1993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:11.773295', 'step': 1993, 'epoch': 1}
{'type': 'loss', 'content': 0.08943759649991989, 'timestamp': '2025-10-02 00:14:11.775885', 'step': 1994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:11.829489', 'step': 1994, 'epoch': 1}
{'type': 'loss', 'content': 0.05838332697749138, 'timestamp': '2025-10-02 00:14:11.836829', 'step': 1995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:11.894692', 'step': 1995, 'epoch': 1}
{'type': 'loss', 'content': 0.06703711301088333, 'timestamp': '2025-10-02 00:14:11.905678', 'step': 1996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:14:11.967090', 'step': 1996, 'epoch': 1}
{'type': 'loss', 'content': 0.03851532191038132, 'timestamp': '2025-10-02 00:14:11.978576', 'step': 1997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:12.033793', 'step': 1997, 'epoch': 1}
{'type': 'loss', 'content': 0.09292388707399368, 'timestamp': '2025-10-02 00:14:12.043390', 'step': 1998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:12.097753', 'step': 1998, 'epoch': 1}
{'type': 'loss', 'content': 0.06350670009851456, 'timestamp': '2025-10-02 00:14:12.099948', 'step': 1999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:12.154788', 'step': 1999, 'epoch': 1}
{'type': 'loss', 'content': 0.2922550439834595, 'timestamp': '2025-10-02 00:14:12.160404', 'step': 2000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-10-02 00:14:12.584706', 'step': 2000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:12.641227', 'step': 2000, 'epoch': 1}
{'type': 'loss', 'content': 0.05722607299685478, 'timestamp': '2025-10-02 00:14:12.644074', 'step': 2001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:12.703124', 'step': 2001, 'epoch': 1}
{'type': 'loss', 'content': 0.052676282823085785, 'timestamp': '2025-10-02 00:14:12.713342', 'step': 2002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:12.767685', 'step': 2002, 'epoch': 1}
{'type': 'loss', 'content': 0.09338691085577011, 'timestamp': '2025-10-02 00:14:12.774986', 'step': 2003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:12.829393', 'step': 2003, 'epoch': 1}
{'type': 'loss', 'content': 0.051521897315979004, 'timestamp': '2025-10-02 00:14:12.835934', 'step': 2004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:12.889357', 'step': 2004, 'epoch': 1}
{'type': 'loss', 'content': 0.07147959619760513, 'timestamp': '2025-10-02 00:14:12.899608', 'step': 2005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:12.958196', 'step': 2005, 'epoch': 1}
{'type': 'loss', 'content': 0.028643470257520676, 'timestamp': '2025-10-02 00:14:12.968382', 'step': 2006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:13.022230', 'step': 2006, 'epoch': 1}
{'type': 'loss', 'content': 0.18902038037776947, 'timestamp': '2025-10-02 00:14:13.024705', 'step': 2007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:13.079405', 'step': 2007, 'epoch': 1}
{'type': 'loss', 'content': 0.08927789330482483, 'timestamp': '2025-10-02 00:14:13.085462', 'step': 2008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:13.139231', 'step': 2008, 'epoch': 1}
{'type': 'loss', 'content': 0.10994888097047806, 'timestamp': '2025-10-02 00:14:13.141631', 'step': 2009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:13.196863', 'step': 2009, 'epoch': 1}
{'type': 'loss', 'content': 0.10543283075094223, 'timestamp': '2025-10-02 00:14:13.206415', 'step': 2010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:13.260812', 'step': 2010, 'epoch': 1}
{'type': 'loss', 'content': 0.0454520508646965, 'timestamp': '2025-10-02 00:14:13.268123', 'step': 2011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:13.328982', 'step': 2011, 'epoch': 1}
{'type': 'loss', 'content': 0.03799646720290184, 'timestamp': '2025-10-02 00:14:13.340252', 'step': 2012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:13.394059', 'step': 2012, 'epoch': 1}
{'type': 'loss', 'content': 0.0718800351023674, 'timestamp': '2025-10-02 00:14:13.399984', 'step': 2013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:13.454753', 'step': 2013, 'epoch': 1}
{'type': 'loss', 'content': 0.0373152419924736, 'timestamp': '2025-10-02 00:14:13.464267', 'step': 2014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:13.519336', 'step': 2014, 'epoch': 1}
{'type': 'loss', 'content': 0.10890673100948334, 'timestamp': '2025-10-02 00:14:13.521627', 'step': 2015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:14:13.590078', 'step': 2015, 'epoch': 1}
{'type': 'loss', 'content': 0.024591604247689247, 'timestamp': '2025-10-02 00:14:13.603153', 'step': 2016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:13.658010', 'step': 2016, 'epoch': 1}
{'type': 'loss', 'content': 0.0600348636507988, 'timestamp': '2025-10-02 00:14:13.665438', 'step': 2017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:13.719780', 'step': 2017, 'epoch': 1}
{'type': 'loss', 'content': 0.09496669471263885, 'timestamp': '2025-10-02 00:14:13.722081', 'step': 2018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:13.775580', 'step': 2018, 'epoch': 1}
{'type': 'loss', 'content': 0.22496652603149414, 'timestamp': '2025-10-02 00:14:13.777924', 'step': 2019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:13.832106', 'step': 2019, 'epoch': 1}
{'type': 'loss', 'content': 0.20842544734477997, 'timestamp': '2025-10-02 00:14:13.837669', 'step': 2020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:13.891417', 'step': 2020, 'epoch': 1}
{'type': 'loss', 'content': 0.1502762734889984, 'timestamp': '2025-10-02 00:14:13.894683', 'step': 2021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:13.949021', 'step': 2021, 'epoch': 1}
{'type': 'loss', 'content': 0.21806086599826813, 'timestamp': '2025-10-02 00:14:13.951881', 'step': 2022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:14.005775', 'step': 2022, 'epoch': 1}
{'type': 'loss', 'content': 0.16839557886123657, 'timestamp': '2025-10-02 00:14:14.008243', 'step': 2023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:14.062803', 'step': 2023, 'epoch': 1}
{'type': 'loss', 'content': 0.13881853222846985, 'timestamp': '2025-10-02 00:14:14.069412', 'step': 2024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:14.122411', 'step': 2024, 'epoch': 1}
{'type': 'loss', 'content': 0.1722412407398224, 'timestamp': '2025-10-02 00:14:14.124677', 'step': 2025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:14.178382', 'step': 2025, 'epoch': 1}
{'type': 'loss', 'content': 0.08770604431629181, 'timestamp': '2025-10-02 00:14:14.180869', 'step': 2026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:14.234922', 'step': 2026, 'epoch': 1}
{'type': 'loss', 'content': 0.09828172624111176, 'timestamp': '2025-10-02 00:14:14.237218', 'step': 2027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:14.291358', 'step': 2027, 'epoch': 1}
{'type': 'loss', 'content': 0.08633172512054443, 'timestamp': '2025-10-02 00:14:14.297076', 'step': 2028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:14.350680', 'step': 2028, 'epoch': 1}
{'type': 'loss', 'content': 0.0709109678864479, 'timestamp': '2025-10-02 00:14:14.352702', 'step': 2029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:14.405823', 'step': 2029, 'epoch': 1}
{'type': 'loss', 'content': 0.09748965501785278, 'timestamp': '2025-10-02 00:14:14.408113', 'step': 2030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:14.462221', 'step': 2030, 'epoch': 1}
{'type': 'loss', 'content': 0.058807190507650375, 'timestamp': '2025-10-02 00:14:14.464566', 'step': 2031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:14.519656', 'step': 2031, 'epoch': 1}
{'type': 'loss', 'content': 0.14004752039909363, 'timestamp': '2025-10-02 00:14:14.525508', 'step': 2032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:14.578192', 'step': 2032, 'epoch': 1}
{'type': 'loss', 'content': 0.15876108407974243, 'timestamp': '2025-10-02 00:14:14.580829', 'step': 2033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:14.634419', 'step': 2033, 'epoch': 1}
{'type': 'loss', 'content': 0.061947643756866455, 'timestamp': '2025-10-02 00:14:14.643800', 'step': 2034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:14.698079', 'step': 2034, 'epoch': 1}
{'type': 'loss', 'content': 0.04507119208574295, 'timestamp': '2025-10-02 00:14:14.703913', 'step': 2035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:14.767491', 'step': 2035, 'epoch': 1}
{'type': 'loss', 'content': 0.08336152881383896, 'timestamp': '2025-10-02 00:14:14.779892', 'step': 2036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:14.835645', 'step': 2036, 'epoch': 1}
{'type': 'loss', 'content': 0.04158030450344086, 'timestamp': '2025-10-02 00:14:14.845867', 'step': 2037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:14.899552', 'step': 2037, 'epoch': 1}
{'type': 'loss', 'content': 0.04292059317231178, 'timestamp': '2025-10-02 00:14:14.902645', 'step': 2038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:14.961816', 'step': 2038, 'epoch': 1}
{'type': 'loss', 'content': 0.04483857378363609, 'timestamp': '2025-10-02 00:14:14.971963', 'step': 2039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:15.025568', 'step': 2039, 'epoch': 1}
{'type': 'loss', 'content': 0.1292165070772171, 'timestamp': '2025-10-02 00:14:15.031937', 'step': 2040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:15.084909', 'step': 2040, 'epoch': 1}
{'type': 'loss', 'content': 0.1264832466840744, 'timestamp': '2025-10-02 00:14:15.089037', 'step': 2041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:15.142529', 'step': 2041, 'epoch': 1}
{'type': 'loss', 'content': 0.16176415979862213, 'timestamp': '2025-10-02 00:14:15.151907', 'step': 2042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:15.206165', 'step': 2042, 'epoch': 1}
{'type': 'loss', 'content': 0.025734201073646545, 'timestamp': '2025-10-02 00:14:15.208594', 'step': 2043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:15.276111', 'step': 2043, 'epoch': 1}
{'type': 'loss', 'content': 0.08344829827547073, 'timestamp': '2025-10-02 00:14:15.287090', 'step': 2044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:15.344768', 'step': 2044, 'epoch': 1}
{'type': 'loss', 'content': 0.05414051562547684, 'timestamp': '2025-10-02 00:14:15.355738', 'step': 2045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:15.408353', 'step': 2045, 'epoch': 1}
{'type': 'loss', 'content': 0.2328866422176361, 'timestamp': '2025-10-02 00:14:15.414015', 'step': 2046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:15.468676', 'step': 2046, 'epoch': 1}
{'type': 'loss', 'content': 0.05043084919452667, 'timestamp': '2025-10-02 00:14:15.475943', 'step': 2047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:15.528906', 'step': 2047, 'epoch': 1}
{'type': 'loss', 'content': 0.13526242971420288, 'timestamp': '2025-10-02 00:14:15.534427', 'step': 2048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:15.586625', 'step': 2048, 'epoch': 1}
{'type': 'loss', 'content': 0.13018591701984406, 'timestamp': '2025-10-02 00:14:15.590665', 'step': 2049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:15.645623', 'step': 2049, 'epoch': 1}
{'type': 'loss', 'content': 0.10617542266845703, 'timestamp': '2025-10-02 00:14:15.655137', 'step': 2050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:15.709858', 'step': 2050, 'epoch': 1}
{'type': 'loss', 'content': 0.038104068487882614, 'timestamp': '2025-10-02 00:14:15.717276', 'step': 2051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:15.774884', 'step': 2051, 'epoch': 1}
{'type': 'loss', 'content': 0.019413042813539505, 'timestamp': '2025-10-02 00:14:15.785204', 'step': 2052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:15.839655', 'step': 2052, 'epoch': 1}
{'type': 'loss', 'content': 0.055625200271606445, 'timestamp': '2025-10-02 00:14:15.849247', 'step': 2053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:15.907612', 'step': 2053, 'epoch': 1}
{'type': 'loss', 'content': 0.04278707131743431, 'timestamp': '2025-10-02 00:14:15.917818', 'step': 2054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:15.973023', 'step': 2054, 'epoch': 1}
{'type': 'loss', 'content': 0.22368694841861725, 'timestamp': '2025-10-02 00:14:15.975472', 'step': 2055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:16.030585', 'step': 2055, 'epoch': 1}
{'type': 'loss', 'content': 0.02958100102841854, 'timestamp': '2025-10-02 00:14:16.040910', 'step': 2056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:16.095262', 'step': 2056, 'epoch': 1}
{'type': 'loss', 'content': 0.03202943503856659, 'timestamp': '2025-10-02 00:14:16.105490', 'step': 2057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:16.159688', 'step': 2057, 'epoch': 1}
{'type': 'loss', 'content': 0.18011821806430817, 'timestamp': '2025-10-02 00:14:16.161977', 'step': 2058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:14:16.225094', 'step': 2058, 'epoch': 1}
{'type': 'loss', 'content': 0.02940717525780201, 'timestamp': '2025-10-02 00:14:16.235970', 'step': 2059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:16.296269', 'step': 2059, 'epoch': 1}
{'type': 'loss', 'content': 0.1220712959766388, 'timestamp': '2025-10-02 00:14:16.302250', 'step': 2060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:16.361648', 'step': 2060, 'epoch': 1}
{'type': 'loss', 'content': 0.029561791568994522, 'timestamp': '2025-10-02 00:14:16.372918', 'step': 2061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:16.427103', 'step': 2061, 'epoch': 1}
{'type': 'loss', 'content': 0.1112266555428505, 'timestamp': '2025-10-02 00:14:16.432949', 'step': 2062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:16.486830', 'step': 2062, 'epoch': 1}
{'type': 'loss', 'content': 0.08202587813138962, 'timestamp': '2025-10-02 00:14:16.492608', 'step': 2063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:16.546163', 'step': 2063, 'epoch': 1}
{'type': 'loss', 'content': 0.09021894633769989, 'timestamp': '2025-10-02 00:14:16.551840', 'step': 2064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:16.605378', 'step': 2064, 'epoch': 1}
{'type': 'loss', 'content': 0.040630586445331573, 'timestamp': '2025-10-02 00:14:16.611245', 'step': 2065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:16.665283', 'step': 2065, 'epoch': 1}
{'type': 'loss', 'content': 0.1545981466770172, 'timestamp': '2025-10-02 00:14:16.667655', 'step': 2066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:16.723032', 'step': 2066, 'epoch': 1}
{'type': 'loss', 'content': 0.03509385883808136, 'timestamp': '2025-10-02 00:14:16.732521', 'step': 2067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:16.787390', 'step': 2067, 'epoch': 1}
{'type': 'loss', 'content': 0.14254507422447205, 'timestamp': '2025-10-02 00:14:16.793289', 'step': 2068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:16.850620', 'step': 2068, 'epoch': 1}
{'type': 'loss', 'content': 0.029397718608379364, 'timestamp': '2025-10-02 00:14:16.861582', 'step': 2069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:16.915740', 'step': 2069, 'epoch': 1}
{'type': 'loss', 'content': 0.14301317930221558, 'timestamp': '2025-10-02 00:14:16.917945', 'step': 2070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:16.972144', 'step': 2070, 'epoch': 1}
{'type': 'loss', 'content': 0.08647340536117554, 'timestamp': '2025-10-02 00:14:16.974445', 'step': 2071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:17.027923', 'step': 2071, 'epoch': 1}
{'type': 'loss', 'content': 0.022522080689668655, 'timestamp': '2025-10-02 00:14:17.038060', 'step': 2072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:17.091390', 'step': 2072, 'epoch': 1}
{'type': 'loss', 'content': 0.06027175858616829, 'timestamp': '2025-10-02 00:14:17.093644', 'step': 2073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:17.148192', 'step': 2073, 'epoch': 1}
{'type': 'loss', 'content': 0.07413507252931595, 'timestamp': '2025-10-02 00:14:17.157557', 'step': 2074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:17.221295', 'step': 2074, 'epoch': 1}
{'type': 'loss', 'content': 0.10158494114875793, 'timestamp': '2025-10-02 00:14:17.226836', 'step': 2075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:17.283073', 'step': 2075, 'epoch': 1}
{'type': 'loss', 'content': 0.12759006023406982, 'timestamp': '2025-10-02 00:14:17.290140', 'step': 2076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:17.346286', 'step': 2076, 'epoch': 1}
{'type': 'loss', 'content': 0.05006873607635498, 'timestamp': '2025-10-02 00:14:17.349526', 'step': 2077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:17.404163', 'step': 2077, 'epoch': 1}
{'type': 'loss', 'content': 0.05939536541700363, 'timestamp': '2025-10-02 00:14:17.407837', 'step': 2078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:17.462671', 'step': 2078, 'epoch': 1}
{'type': 'loss', 'content': 0.12699481844902039, 'timestamp': '2025-10-02 00:14:17.465643', 'step': 2079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:17.520067', 'step': 2079, 'epoch': 1}
{'type': 'loss', 'content': 0.09228450059890747, 'timestamp': '2025-10-02 00:14:17.528839', 'step': 2080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:17.583584', 'step': 2080, 'epoch': 1}
{'type': 'loss', 'content': 0.20396259427070618, 'timestamp': '2025-10-02 00:14:17.589453', 'step': 2081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:17.647501', 'step': 2081, 'epoch': 1}
{'type': 'loss', 'content': 0.02165048196911812, 'timestamp': '2025-10-02 00:14:17.651049', 'step': 2082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:17.708293', 'step': 2082, 'epoch': 1}
{'type': 'loss', 'content': 0.09786718338727951, 'timestamp': '2025-10-02 00:14:17.712027', 'step': 2083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:17.767958', 'step': 2083, 'epoch': 1}
{'type': 'loss', 'content': 0.15430937707424164, 'timestamp': '2025-10-02 00:14:17.774122', 'step': 2084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:17.827721', 'step': 2084, 'epoch': 1}
{'type': 'loss', 'content': 0.21090362966060638, 'timestamp': '2025-10-02 00:14:17.831562', 'step': 2085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:17.892282', 'step': 2085, 'epoch': 1}
{'type': 'loss', 'content': 0.2156917005777359, 'timestamp': '2025-10-02 00:14:17.895392', 'step': 2086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:14:17.949831', 'step': 2086, 'epoch': 1}
{'type': 'loss', 'content': 0.25519582629203796, 'timestamp': '2025-10-02 00:14:17.952295', 'step': 2087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:18.007503', 'step': 2087, 'epoch': 1}
{'type': 'loss', 'content': 0.027303559705615044, 'timestamp': '2025-10-02 00:14:18.015824', 'step': 2088, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:14:44.407112', 'step': 2088, 'epoch': 1}
{'type': 'pplx', 'content': 85.07206471891028, 'timestamp': '2025-10-02 00:14:44.410537', 'step': 2088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:44.464278', 'step': 2088, 'epoch': 1}
{'type': 'loss', 'content': 0.13826791942119598, 'timestamp': '2025-10-02 00:14:44.466886', 'step': 2089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:44.529945', 'step': 2089, 'epoch': 1}
{'type': 'loss', 'content': 0.10508573800325394, 'timestamp': '2025-10-02 00:14:44.533056', 'step': 2090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:44.591100', 'step': 2090, 'epoch': 1}
{'type': 'loss', 'content': 0.15912795066833496, 'timestamp': '2025-10-02 00:14:44.595072', 'step': 2091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:44.653276', 'step': 2091, 'epoch': 1}
{'type': 'loss', 'content': 0.13080823421478271, 'timestamp': '2025-10-02 00:14:44.660755', 'step': 2092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:44.731819', 'step': 2092, 'epoch': 1}
{'type': 'loss', 'content': 0.06655904650688171, 'timestamp': '2025-10-02 00:14:44.742271', 'step': 2093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:44.815576', 'step': 2093, 'epoch': 1}
{'type': 'loss', 'content': 0.05544386804103851, 'timestamp': '2025-10-02 00:14:44.826042', 'step': 2094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:44.886229', 'step': 2094, 'epoch': 1}
{'type': 'loss', 'content': 0.22293516993522644, 'timestamp': '2025-10-02 00:14:44.893790', 'step': 2095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:14:44.969840', 'step': 2095, 'epoch': 1}
{'type': 'loss', 'content': 0.045477576553821564, 'timestamp': '2025-10-02 00:14:44.983467', 'step': 2096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:45.058548', 'step': 2096, 'epoch': 1}
{'type': 'loss', 'content': 0.11649088561534882, 'timestamp': '2025-10-02 00:14:45.067366', 'step': 2097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:45.137741', 'step': 2097, 'epoch': 1}
{'type': 'loss', 'content': 0.22536861896514893, 'timestamp': '2025-10-02 00:14:45.143552', 'step': 2098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:45.213680', 'step': 2098, 'epoch': 1}
{'type': 'loss', 'content': 0.1888246387243271, 'timestamp': '2025-10-02 00:14:45.216648', 'step': 2099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:45.272987', 'step': 2099, 'epoch': 1}
{'type': 'loss', 'content': 0.0763496607542038, 'timestamp': '2025-10-02 00:14:45.286605', 'step': 2100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:45.358672', 'step': 2100, 'epoch': 1}
{'type': 'loss', 'content': 0.03329334408044815, 'timestamp': '2025-10-02 00:14:45.369964', 'step': 2101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:45.441251', 'step': 2101, 'epoch': 1}
{'type': 'loss', 'content': 0.05106791481375694, 'timestamp': '2025-10-02 00:14:45.450855', 'step': 2102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:45.515238', 'step': 2102, 'epoch': 1}
{'type': 'loss', 'content': 0.08039799332618713, 'timestamp': '2025-10-02 00:14:45.519014', 'step': 2103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:45.578734', 'step': 2103, 'epoch': 1}
{'type': 'loss', 'content': 0.18284760415554047, 'timestamp': '2025-10-02 00:14:45.590219', 'step': 2104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:45.663758', 'step': 2104, 'epoch': 1}
{'type': 'loss', 'content': 0.058698296546936035, 'timestamp': '2025-10-02 00:14:45.672026', 'step': 2105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:45.734812', 'step': 2105, 'epoch': 1}
{'type': 'loss', 'content': 0.10075031965970993, 'timestamp': '2025-10-02 00:14:45.742234', 'step': 2106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:45.808090', 'step': 2106, 'epoch': 1}
{'type': 'loss', 'content': 0.05557423084974289, 'timestamp': '2025-10-02 00:14:45.813704', 'step': 2107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:45.876641', 'step': 2107, 'epoch': 1}
{'type': 'loss', 'content': 0.08654306828975677, 'timestamp': '2025-10-02 00:14:45.884264', 'step': 2108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:45.957243', 'step': 2108, 'epoch': 1}
{'type': 'loss', 'content': 0.14481808245182037, 'timestamp': '2025-10-02 00:14:45.965521', 'step': 2109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:46.032623', 'step': 2109, 'epoch': 1}
{'type': 'loss', 'content': 0.11804595589637756, 'timestamp': '2025-10-02 00:14:46.038806', 'step': 2110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:46.109631', 'step': 2110, 'epoch': 1}
{'type': 'loss', 'content': 0.08725085109472275, 'timestamp': '2025-10-02 00:14:46.117386', 'step': 2111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:46.180382', 'step': 2111, 'epoch': 1}
{'type': 'loss', 'content': 0.12377294898033142, 'timestamp': '2025-10-02 00:14:46.192947', 'step': 2112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:46.255252', 'step': 2112, 'epoch': 1}
{'type': 'loss', 'content': 0.13942483067512512, 'timestamp': '2025-10-02 00:14:46.265790', 'step': 2113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:46.330449', 'step': 2113, 'epoch': 1}
{'type': 'loss', 'content': 0.022952662780880928, 'timestamp': '2025-10-02 00:14:46.337908', 'step': 2114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:46.397103', 'step': 2114, 'epoch': 1}
{'type': 'loss', 'content': 0.08941877633333206, 'timestamp': '2025-10-02 00:14:46.401217', 'step': 2115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:46.457701', 'step': 2115, 'epoch': 1}
{'type': 'loss', 'content': 0.06507303565740585, 'timestamp': '2025-10-02 00:14:46.470143', 'step': 2116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:46.530118', 'step': 2116, 'epoch': 1}
{'type': 'loss', 'content': 0.123157799243927, 'timestamp': '2025-10-02 00:14:46.539953', 'step': 2117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:46.596420', 'step': 2117, 'epoch': 1}
{'type': 'loss', 'content': 0.056973524391651154, 'timestamp': '2025-10-02 00:14:46.599863', 'step': 2118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:46.657631', 'step': 2118, 'epoch': 1}
{'type': 'loss', 'content': 0.06941212713718414, 'timestamp': '2025-10-02 00:14:46.660977', 'step': 2119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:46.722133', 'step': 2119, 'epoch': 1}
{'type': 'loss', 'content': 0.0543721504509449, 'timestamp': '2025-10-02 00:14:46.728932', 'step': 2120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:46.783347', 'step': 2120, 'epoch': 1}
{'type': 'loss', 'content': 0.17075133323669434, 'timestamp': '2025-10-02 00:14:46.794226', 'step': 2121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:46.871270', 'step': 2121, 'epoch': 1}
{'type': 'loss', 'content': 0.09827904403209686, 'timestamp': '2025-10-02 00:14:46.881729', 'step': 2122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:46.939546', 'step': 2122, 'epoch': 1}
{'type': 'loss', 'content': 0.1368761956691742, 'timestamp': '2025-10-02 00:14:46.947946', 'step': 2123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:47.024220', 'step': 2123, 'epoch': 1}
{'type': 'loss', 'content': 0.1220482736825943, 'timestamp': '2025-10-02 00:14:47.035652', 'step': 2124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:14:47.108777', 'step': 2124, 'epoch': 1}
{'type': 'loss', 'content': 0.03537152707576752, 'timestamp': '2025-10-02 00:14:47.120545', 'step': 2125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:47.195103', 'step': 2125, 'epoch': 1}
{'type': 'loss', 'content': 0.08150948584079742, 'timestamp': '2025-10-02 00:14:47.198647', 'step': 2126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:47.262037', 'step': 2126, 'epoch': 1}
{'type': 'loss', 'content': 0.04614461585879326, 'timestamp': '2025-10-02 00:14:47.272818', 'step': 2127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:47.339486', 'step': 2127, 'epoch': 1}
{'type': 'loss', 'content': 0.1575147807598114, 'timestamp': '2025-10-02 00:14:47.352254', 'step': 2128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:47.416824', 'step': 2128, 'epoch': 1}
{'type': 'loss', 'content': 0.08400996029376984, 'timestamp': '2025-10-02 00:14:47.422931', 'step': 2129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:47.478933', 'step': 2129, 'epoch': 1}
{'type': 'loss', 'content': 0.1266193389892578, 'timestamp': '2025-10-02 00:14:47.483213', 'step': 2130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:47.539406', 'step': 2130, 'epoch': 1}
{'type': 'loss', 'content': 0.08145710080862045, 'timestamp': '2025-10-02 00:14:47.542575', 'step': 2131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:14:47.616804', 'step': 2131, 'epoch': 1}
{'type': 'loss', 'content': 0.06767363101243973, 'timestamp': '2025-10-02 00:14:47.629780', 'step': 2132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:47.692655', 'step': 2132, 'epoch': 1}
{'type': 'loss', 'content': 0.04516463354229927, 'timestamp': '2025-10-02 00:14:47.695773', 'step': 2133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:47.752081', 'step': 2133, 'epoch': 1}
{'type': 'loss', 'content': 0.10316654294729233, 'timestamp': '2025-10-02 00:14:47.759768', 'step': 2134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:47.815576', 'step': 2134, 'epoch': 1}
{'type': 'loss', 'content': 0.096970334649086, 'timestamp': '2025-10-02 00:14:47.823806', 'step': 2135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:14:47.901534', 'step': 2135, 'epoch': 1}
{'type': 'loss', 'content': 0.01873750612139702, 'timestamp': '2025-10-02 00:14:47.913219', 'step': 2136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:47.967708', 'step': 2136, 'epoch': 1}
{'type': 'loss', 'content': 0.20007340610027313, 'timestamp': '2025-10-02 00:14:47.971042', 'step': 2137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:48.029605', 'step': 2137, 'epoch': 1}
{'type': 'loss', 'content': 0.186638742685318, 'timestamp': '2025-10-02 00:14:48.032232', 'step': 2138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:48.086912', 'step': 2138, 'epoch': 1}
{'type': 'loss', 'content': 0.10465873777866364, 'timestamp': '2025-10-02 00:14:48.090127', 'step': 2139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:48.146896', 'step': 2139, 'epoch': 1}
{'type': 'loss', 'content': 0.07881029695272446, 'timestamp': '2025-10-02 00:14:48.153149', 'step': 2140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 11520070000896.0}, 'timestamp': '2025-10-02 00:14:48.246329', 'step': 2140, 'epoch': 1}
{'type': 'loss', 'content': 0.01700630970299244, 'timestamp': '2025-10-02 00:14:48.263177', 'step': 2141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:48.332458', 'step': 2141, 'epoch': 1}
{'type': 'loss', 'content': 0.08330295979976654, 'timestamp': '2025-10-02 00:14:48.343888', 'step': 2142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:48.413357', 'step': 2142, 'epoch': 1}
{'type': 'loss', 'content': 0.05418609827756882, 'timestamp': '2025-10-02 00:14:48.421143', 'step': 2143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:48.489936', 'step': 2143, 'epoch': 1}
{'type': 'loss', 'content': 0.09088211506605148, 'timestamp': '2025-10-02 00:14:48.507058', 'step': 2144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:48.584240', 'step': 2144, 'epoch': 1}
{'type': 'loss', 'content': 0.26241689920425415, 'timestamp': '2025-10-02 00:14:48.595705', 'step': 2145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:48.670360', 'step': 2145, 'epoch': 1}
{'type': 'loss', 'content': 0.10146648436784744, 'timestamp': '2025-10-02 00:14:48.674226', 'step': 2146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:48.734252', 'step': 2146, 'epoch': 1}
{'type': 'loss', 'content': 0.07094713300466537, 'timestamp': '2025-10-02 00:14:48.754965', 'step': 2147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:48.827911', 'step': 2147, 'epoch': 1}
{'type': 'loss', 'content': 0.1354474276304245, 'timestamp': '2025-10-02 00:14:48.836255', 'step': 2148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:48.907890', 'step': 2148, 'epoch': 1}
{'type': 'loss', 'content': 0.1254938542842865, 'timestamp': '2025-10-02 00:14:48.912923', 'step': 2149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:49.001741', 'step': 2149, 'epoch': 1}
{'type': 'loss', 'content': 0.08162729442119598, 'timestamp': '2025-10-02 00:14:49.020839', 'step': 2150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:49.097083', 'step': 2150, 'epoch': 1}
{'type': 'loss', 'content': 0.05706474557518959, 'timestamp': '2025-10-02 00:14:49.102939', 'step': 2151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:49.174496', 'step': 2151, 'epoch': 1}
{'type': 'loss', 'content': 0.0511101670563221, 'timestamp': '2025-10-02 00:14:49.181734', 'step': 2152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:49.244511', 'step': 2152, 'epoch': 1}
{'type': 'loss', 'content': 0.026025179773569107, 'timestamp': '2025-10-02 00:14:49.251365', 'step': 2153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:49.313140', 'step': 2153, 'epoch': 1}
{'type': 'loss', 'content': 0.08642377704381943, 'timestamp': '2025-10-02 00:14:49.316625', 'step': 2154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:49.373850', 'step': 2154, 'epoch': 1}
{'type': 'loss', 'content': 0.04518410935997963, 'timestamp': '2025-10-02 00:14:49.379825', 'step': 2155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:49.443796', 'step': 2155, 'epoch': 1}
{'type': 'loss', 'content': 0.07252799719572067, 'timestamp': '2025-10-02 00:14:49.455033', 'step': 2156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:49.509837', 'step': 2156, 'epoch': 1}
{'type': 'loss', 'content': 0.046443965286016464, 'timestamp': '2025-10-02 00:14:49.513232', 'step': 2157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:49.570341', 'step': 2157, 'epoch': 1}
{'type': 'loss', 'content': 0.08621774613857269, 'timestamp': '2025-10-02 00:14:49.576520', 'step': 2158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:14:49.641016', 'step': 2158, 'epoch': 1}
{'type': 'loss', 'content': 0.026708846911787987, 'timestamp': '2025-10-02 00:14:49.652143', 'step': 2159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:49.723343', 'step': 2159, 'epoch': 1}
{'type': 'loss', 'content': 0.07397628575563431, 'timestamp': '2025-10-02 00:14:49.730301', 'step': 2160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:14:49.804538', 'step': 2160, 'epoch': 1}
{'type': 'loss', 'content': 0.03196088224649429, 'timestamp': '2025-10-02 00:14:49.816357', 'step': 2161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:49.886669', 'step': 2161, 'epoch': 1}
{'type': 'loss', 'content': 0.08957973122596741, 'timestamp': '2025-10-02 00:14:49.890256', 'step': 2162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:49.981140', 'step': 2162, 'epoch': 1}
{'type': 'loss', 'content': 0.15792733430862427, 'timestamp': '2025-10-02 00:14:49.994599', 'step': 2163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:14:50.083053', 'step': 2163, 'epoch': 1}
{'type': 'loss', 'content': 0.026933060958981514, 'timestamp': '2025-10-02 00:14:50.094769', 'step': 2164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:50.164031', 'step': 2164, 'epoch': 1}
{'type': 'loss', 'content': 0.06567204743623734, 'timestamp': '2025-10-02 00:14:50.171764', 'step': 2165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:14:50.252267', 'step': 2165, 'epoch': 1}
{'type': 'loss', 'content': 0.08068620413541794, 'timestamp': '2025-10-02 00:14:50.264868', 'step': 2166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:50.323879', 'step': 2166, 'epoch': 1}
{'type': 'loss', 'content': 0.1167747750878334, 'timestamp': '2025-10-02 00:14:50.328278', 'step': 2167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:50.385078', 'step': 2167, 'epoch': 1}
{'type': 'loss', 'content': 0.1863027811050415, 'timestamp': '2025-10-02 00:14:50.401623', 'step': 2168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:50.461052', 'step': 2168, 'epoch': 1}
{'type': 'loss', 'content': 0.14011959731578827, 'timestamp': '2025-10-02 00:14:50.464854', 'step': 2169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:50.521895', 'step': 2169, 'epoch': 1}
{'type': 'loss', 'content': 0.13783948123455048, 'timestamp': '2025-10-02 00:14:50.526485', 'step': 2170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:50.585571', 'step': 2170, 'epoch': 1}
{'type': 'loss', 'content': 0.08111312240362167, 'timestamp': '2025-10-02 00:14:50.595389', 'step': 2171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:50.659136', 'step': 2171, 'epoch': 1}
{'type': 'loss', 'content': 0.2058427631855011, 'timestamp': '2025-10-02 00:14:50.673138', 'step': 2172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:50.731897', 'step': 2172, 'epoch': 1}
{'type': 'loss', 'content': 0.14936712384223938, 'timestamp': '2025-10-02 00:14:50.735793', 'step': 2173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:50.792700', 'step': 2173, 'epoch': 1}
{'type': 'loss', 'content': 0.14146554470062256, 'timestamp': '2025-10-02 00:14:50.796565', 'step': 2174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:14:50.875181', 'step': 2174, 'epoch': 1}
{'type': 'loss', 'content': 0.01719149947166443, 'timestamp': '2025-10-02 00:14:50.887765', 'step': 2175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:50.950863', 'step': 2175, 'epoch': 1}
{'type': 'loss', 'content': 0.04541226848959923, 'timestamp': '2025-10-02 00:14:50.966411', 'step': 2176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:51.024615', 'step': 2176, 'epoch': 1}
{'type': 'loss', 'content': 0.12137354910373688, 'timestamp': '2025-10-02 00:14:51.032473', 'step': 2177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:51.097478', 'step': 2177, 'epoch': 1}
{'type': 'loss', 'content': 0.09516070783138275, 'timestamp': '2025-10-02 00:14:51.103703', 'step': 2178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:51.164912', 'step': 2178, 'epoch': 1}
{'type': 'loss', 'content': 0.057316746562719345, 'timestamp': '2025-10-02 00:14:51.168870', 'step': 2179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:51.228858', 'step': 2179, 'epoch': 1}
{'type': 'loss', 'content': 0.07411698997020721, 'timestamp': '2025-10-02 00:14:51.237274', 'step': 2180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:51.308772', 'step': 2180, 'epoch': 1}
{'type': 'loss', 'content': 0.2523661255836487, 'timestamp': '2025-10-02 00:14:51.312135', 'step': 2181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:51.371714', 'step': 2181, 'epoch': 1}
{'type': 'loss', 'content': 0.07492705434560776, 'timestamp': '2025-10-02 00:14:51.382505', 'step': 2182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:51.440201', 'step': 2182, 'epoch': 1}
{'type': 'loss', 'content': 0.2119845151901245, 'timestamp': '2025-10-02 00:14:51.444366', 'step': 2183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:51.509109', 'step': 2183, 'epoch': 1}
{'type': 'loss', 'content': 0.1770027130842209, 'timestamp': '2025-10-02 00:14:51.516300', 'step': 2184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:51.573984', 'step': 2184, 'epoch': 1}
{'type': 'loss', 'content': 0.0657091960310936, 'timestamp': '2025-10-02 00:14:51.586336', 'step': 2185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:51.653710', 'step': 2185, 'epoch': 1}
{'type': 'loss', 'content': 0.17555689811706543, 'timestamp': '2025-10-02 00:14:51.665228', 'step': 2186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:51.750242', 'step': 2186, 'epoch': 1}
{'type': 'loss', 'content': 0.12016153335571289, 'timestamp': '2025-10-02 00:14:51.754033', 'step': 2187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:51.825140', 'step': 2187, 'epoch': 1}
{'type': 'loss', 'content': 0.07907241582870483, 'timestamp': '2025-10-02 00:14:51.840305', 'step': 2188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:51.910114', 'step': 2188, 'epoch': 1}
{'type': 'loss', 'content': 0.04740811884403229, 'timestamp': '2025-10-02 00:14:51.913139', 'step': 2189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:51.978086', 'step': 2189, 'epoch': 1}
{'type': 'loss', 'content': 0.14728659391403198, 'timestamp': '2025-10-02 00:14:51.982340', 'step': 2190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:52.040665', 'step': 2190, 'epoch': 1}
{'type': 'loss', 'content': 0.06477521359920502, 'timestamp': '2025-10-02 00:14:52.044795', 'step': 2191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:52.111168', 'step': 2191, 'epoch': 1}
{'type': 'loss', 'content': 0.21471117436885834, 'timestamp': '2025-10-02 00:14:52.118338', 'step': 2192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:52.188223', 'step': 2192, 'epoch': 1}
{'type': 'loss', 'content': 0.08014080673456192, 'timestamp': '2025-10-02 00:14:52.195942', 'step': 2193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:52.275109', 'step': 2193, 'epoch': 1}
{'type': 'loss', 'content': 0.08654943853616714, 'timestamp': '2025-10-02 00:14:52.287997', 'step': 2194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:52.353881', 'step': 2194, 'epoch': 1}
{'type': 'loss', 'content': 0.02367687225341797, 'timestamp': '2025-10-02 00:14:52.364652', 'step': 2195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:52.424029', 'step': 2195, 'epoch': 1}
{'type': 'loss', 'content': 0.08656350523233414, 'timestamp': '2025-10-02 00:14:52.430475', 'step': 2196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:52.497548', 'step': 2196, 'epoch': 1}
{'type': 'loss', 'content': 0.14554935693740845, 'timestamp': '2025-10-02 00:14:52.500816', 'step': 2197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:52.557231', 'step': 2197, 'epoch': 1}
{'type': 'loss', 'content': 0.07865044474601746, 'timestamp': '2025-10-02 00:14:52.564981', 'step': 2198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:52.628095', 'step': 2198, 'epoch': 1}
{'type': 'loss', 'content': 0.13254015147686005, 'timestamp': '2025-10-02 00:14:52.634777', 'step': 2199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:52.698450', 'step': 2199, 'epoch': 1}
{'type': 'loss', 'content': 0.07227484881877899, 'timestamp': '2025-10-02 00:14:52.707598', 'step': 2200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:52.769560', 'step': 2200, 'epoch': 1}
{'type': 'loss', 'content': 0.05118963122367859, 'timestamp': '2025-10-02 00:14:52.782136', 'step': 2201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:52.843745', 'step': 2201, 'epoch': 1}
{'type': 'loss', 'content': 0.07564673572778702, 'timestamp': '2025-10-02 00:14:52.850010', 'step': 2202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:14:52.925958', 'step': 2202, 'epoch': 1}
{'type': 'loss', 'content': 0.038993097841739655, 'timestamp': '2025-10-02 00:14:52.939673', 'step': 2203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:52.997712', 'step': 2203, 'epoch': 1}
{'type': 'loss', 'content': 0.18786513805389404, 'timestamp': '2025-10-02 00:14:53.004369', 'step': 2204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:53.067989', 'step': 2204, 'epoch': 1}
{'type': 'loss', 'content': 0.01644187979400158, 'timestamp': '2025-10-02 00:14:53.079252', 'step': 2205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:53.140361', 'step': 2205, 'epoch': 1}
{'type': 'loss', 'content': 0.09821530431509018, 'timestamp': '2025-10-02 00:14:53.142947', 'step': 2206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:53.199267', 'step': 2206, 'epoch': 1}
{'type': 'loss', 'content': 0.1704043447971344, 'timestamp': '2025-10-02 00:14:53.210966', 'step': 2207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:53.268904', 'step': 2207, 'epoch': 1}
{'type': 'loss', 'content': 0.17228366434574127, 'timestamp': '2025-10-02 00:14:53.290853', 'step': 2208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:53.354859', 'step': 2208, 'epoch': 1}
{'type': 'loss', 'content': 0.020106270909309387, 'timestamp': '2025-10-02 00:14:53.365411', 'step': 2209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:53.438391', 'step': 2209, 'epoch': 1}
{'type': 'loss', 'content': 0.11281288415193558, 'timestamp': '2025-10-02 00:14:53.442222', 'step': 2210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:53.506904', 'step': 2210, 'epoch': 1}
{'type': 'loss', 'content': 0.04602651670575142, 'timestamp': '2025-10-02 00:14:53.514674', 'step': 2211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:53.574160', 'step': 2211, 'epoch': 1}
{'type': 'loss', 'content': 0.2965264320373535, 'timestamp': '2025-10-02 00:14:53.580446', 'step': 2212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:53.633489', 'step': 2212, 'epoch': 1}
{'type': 'loss', 'content': 0.09011638164520264, 'timestamp': '2025-10-02 00:14:53.643335', 'step': 2213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:53.698702', 'step': 2213, 'epoch': 1}
{'type': 'loss', 'content': 0.18681640923023224, 'timestamp': '2025-10-02 00:14:53.702515', 'step': 2214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:53.759660', 'step': 2214, 'epoch': 1}
{'type': 'loss', 'content': 0.10817506164312363, 'timestamp': '2025-10-02 00:14:53.762585', 'step': 2215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:53.819813', 'step': 2215, 'epoch': 1}
{'type': 'loss', 'content': 0.13393157720565796, 'timestamp': '2025-10-02 00:14:53.826525', 'step': 2216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:53.882371', 'step': 2216, 'epoch': 1}
{'type': 'loss', 'content': 0.18334700167179108, 'timestamp': '2025-10-02 00:14:53.886104', 'step': 2217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:53.942479', 'step': 2217, 'epoch': 1}
{'type': 'loss', 'content': 0.09937245398759842, 'timestamp': '2025-10-02 00:14:53.946992', 'step': 2218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:54.009931', 'step': 2218, 'epoch': 1}
{'type': 'loss', 'content': 0.10779408365488052, 'timestamp': '2025-10-02 00:14:54.015981', 'step': 2219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:54.074357', 'step': 2219, 'epoch': 1}
{'type': 'loss', 'content': 0.11824400722980499, 'timestamp': '2025-10-02 00:14:54.083641', 'step': 2220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:54.143532', 'step': 2220, 'epoch': 1}
{'type': 'loss', 'content': 0.08140306174755096, 'timestamp': '2025-10-02 00:14:54.149658', 'step': 2221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 640], 'flops': 12800077771264.0}, 'timestamp': '2025-10-02 00:14:54.244530', 'step': 2221, 'epoch': 1}
{'type': 'loss', 'content': 0.03581773117184639, 'timestamp': '2025-10-02 00:14:54.261679', 'step': 2222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:54.316266', 'step': 2222, 'epoch': 1}
{'type': 'loss', 'content': 0.1046287938952446, 'timestamp': '2025-10-02 00:14:54.325671', 'step': 2223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:54.387860', 'step': 2223, 'epoch': 1}
{'type': 'loss', 'content': 0.04540369659662247, 'timestamp': '2025-10-02 00:14:54.399401', 'step': 2224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:54.453471', 'step': 2224, 'epoch': 1}
{'type': 'loss', 'content': 0.05688599869608879, 'timestamp': '2025-10-02 00:14:54.463921', 'step': 2225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:54.518079', 'step': 2225, 'epoch': 1}
{'type': 'loss', 'content': 0.09508291631937027, 'timestamp': '2025-10-02 00:14:54.520657', 'step': 2226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:54.575748', 'step': 2226, 'epoch': 1}
{'type': 'loss', 'content': 0.08954878896474838, 'timestamp': '2025-10-02 00:14:54.581848', 'step': 2227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:54.636454', 'step': 2227, 'epoch': 1}
{'type': 'loss', 'content': 0.09314562380313873, 'timestamp': '2025-10-02 00:14:54.644936', 'step': 2228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:14:54.727588', 'step': 2228, 'epoch': 1}
{'type': 'loss', 'content': 0.019585825502872467, 'timestamp': '2025-10-02 00:14:54.743913', 'step': 2229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:54.799248', 'step': 2229, 'epoch': 1}
{'type': 'loss', 'content': 0.19006551802158356, 'timestamp': '2025-10-02 00:14:54.802614', 'step': 2230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:14:54.866787', 'step': 2230, 'epoch': 1}
{'type': 'loss', 'content': 0.06225181370973587, 'timestamp': '2025-10-02 00:14:54.877883', 'step': 2231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:54.935665', 'step': 2231, 'epoch': 1}
{'type': 'loss', 'content': 0.24428904056549072, 'timestamp': '2025-10-02 00:14:54.941784', 'step': 2232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:54.999032', 'step': 2232, 'epoch': 1}
{'type': 'loss', 'content': 0.12725524604320526, 'timestamp': '2025-10-02 00:14:55.004199', 'step': 2233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:55.065030', 'step': 2233, 'epoch': 1}
{'type': 'loss', 'content': 0.17953212559223175, 'timestamp': '2025-10-02 00:14:55.068377', 'step': 2234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:55.136855', 'step': 2234, 'epoch': 1}
{'type': 'loss', 'content': 0.04444895684719086, 'timestamp': '2025-10-02 00:14:55.147589', 'step': 2235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:55.207922', 'step': 2235, 'epoch': 1}
{'type': 'loss', 'content': 0.05792634189128876, 'timestamp': '2025-10-02 00:14:55.215250', 'step': 2236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:55.270547', 'step': 2236, 'epoch': 1}
{'type': 'loss', 'content': 0.09027066826820374, 'timestamp': '2025-10-02 00:14:55.273695', 'step': 2237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:55.328852', 'step': 2237, 'epoch': 1}
{'type': 'loss', 'content': 0.2549980580806732, 'timestamp': '2025-10-02 00:14:55.331548', 'step': 2238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:55.392541', 'step': 2238, 'epoch': 1}
{'type': 'loss', 'content': 0.04499240592122078, 'timestamp': '2025-10-02 00:14:55.403222', 'step': 2239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:55.458441', 'step': 2239, 'epoch': 1}
{'type': 'loss', 'content': 0.05582072585821152, 'timestamp': '2025-10-02 00:14:55.464973', 'step': 2240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:55.519795', 'step': 2240, 'epoch': 1}
{'type': 'loss', 'content': 0.06490346789360046, 'timestamp': '2025-10-02 00:14:55.522754', 'step': 2241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:55.580981', 'step': 2241, 'epoch': 1}
{'type': 'loss', 'content': 0.08563366532325745, 'timestamp': '2025-10-02 00:14:55.584345', 'step': 2242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:55.638913', 'step': 2242, 'epoch': 1}
{'type': 'loss', 'content': 0.03665018826723099, 'timestamp': '2025-10-02 00:14:55.641449', 'step': 2243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:55.695506', 'step': 2243, 'epoch': 1}
{'type': 'loss', 'content': 0.12407030165195465, 'timestamp': '2025-10-02 00:14:55.701550', 'step': 2244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:55.754513', 'step': 2244, 'epoch': 1}
{'type': 'loss', 'content': 0.15742015838623047, 'timestamp': '2025-10-02 00:14:55.757394', 'step': 2245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:55.812498', 'step': 2245, 'epoch': 1}
{'type': 'loss', 'content': 0.0725608691573143, 'timestamp': '2025-10-02 00:14:55.822284', 'step': 2246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:55.876471', 'step': 2246, 'epoch': 1}
{'type': 'loss', 'content': 0.18456891179084778, 'timestamp': '2025-10-02 00:14:55.878785', 'step': 2247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:55.932245', 'step': 2247, 'epoch': 1}
{'type': 'loss', 'content': 0.15838047862052917, 'timestamp': '2025-10-02 00:14:55.939198', 'step': 2248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:55.994959', 'step': 2248, 'epoch': 1}
{'type': 'loss', 'content': 0.08944600820541382, 'timestamp': '2025-10-02 00:14:56.001060', 'step': 2249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:56.056034', 'step': 2249, 'epoch': 1}
{'type': 'loss', 'content': 0.15173573791980743, 'timestamp': '2025-10-02 00:14:56.058518', 'step': 2250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:14:56.112484', 'step': 2250, 'epoch': 1}
{'type': 'loss', 'content': 0.10777489840984344, 'timestamp': '2025-10-02 00:14:56.115124', 'step': 2251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:56.169742', 'step': 2251, 'epoch': 1}
{'type': 'loss', 'content': 0.13230541348457336, 'timestamp': '2025-10-02 00:14:56.176601', 'step': 2252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:56.236511', 'step': 2252, 'epoch': 1}
{'type': 'loss', 'content': 0.01691051945090294, 'timestamp': '2025-10-02 00:14:56.247982', 'step': 2253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:56.302058', 'step': 2253, 'epoch': 1}
{'type': 'loss', 'content': 0.11594205349683762, 'timestamp': '2025-10-02 00:14:56.304448', 'step': 2254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:56.358015', 'step': 2254, 'epoch': 1}
{'type': 'loss', 'content': 0.28858163952827454, 'timestamp': '2025-10-02 00:14:56.360433', 'step': 2255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:56.415860', 'step': 2255, 'epoch': 1}
{'type': 'loss', 'content': 0.11435377597808838, 'timestamp': '2025-10-02 00:14:56.421896', 'step': 2256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:56.474738', 'step': 2256, 'epoch': 1}
{'type': 'loss', 'content': 0.17109645903110504, 'timestamp': '2025-10-02 00:14:56.477389', 'step': 2257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:56.531425', 'step': 2257, 'epoch': 1}
{'type': 'loss', 'content': 0.04499183967709541, 'timestamp': '2025-10-02 00:14:56.533883', 'step': 2258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:56.587643', 'step': 2258, 'epoch': 1}
{'type': 'loss', 'content': 0.11232511699199677, 'timestamp': '2025-10-02 00:14:56.593482', 'step': 2259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:56.647919', 'step': 2259, 'epoch': 1}
{'type': 'loss', 'content': 0.04433683678507805, 'timestamp': '2025-10-02 00:14:56.654132', 'step': 2260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:56.707563', 'step': 2260, 'epoch': 1}
{'type': 'loss', 'content': 0.06982339173555374, 'timestamp': '2025-10-02 00:14:56.713507', 'step': 2261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:56.768863', 'step': 2261, 'epoch': 1}
{'type': 'loss', 'content': 0.11846385896205902, 'timestamp': '2025-10-02 00:14:56.773812', 'step': 2262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:56.828567', 'step': 2262, 'epoch': 1}
{'type': 'loss', 'content': 0.10395419597625732, 'timestamp': '2025-10-02 00:14:56.831018', 'step': 2263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:56.885185', 'step': 2263, 'epoch': 1}
{'type': 'loss', 'content': 0.1821485310792923, 'timestamp': '2025-10-02 00:14:56.891410', 'step': 2264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:56.943982', 'step': 2264, 'epoch': 1}
{'type': 'loss', 'content': 0.2709905803203583, 'timestamp': '2025-10-02 00:14:56.948003', 'step': 2265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:57.006597', 'step': 2265, 'epoch': 1}
{'type': 'loss', 'content': 0.19412776827812195, 'timestamp': '2025-10-02 00:14:57.011227', 'step': 2266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:57.068811', 'step': 2266, 'epoch': 1}
{'type': 'loss', 'content': 0.103963702917099, 'timestamp': '2025-10-02 00:14:57.078599', 'step': 2267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:57.135714', 'step': 2267, 'epoch': 1}
{'type': 'loss', 'content': 0.21097545325756073, 'timestamp': '2025-10-02 00:14:57.142087', 'step': 2268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:57.202783', 'step': 2268, 'epoch': 1}
{'type': 'loss', 'content': 0.10204142332077026, 'timestamp': '2025-10-02 00:14:57.206284', 'step': 2269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:57.260795', 'step': 2269, 'epoch': 1}
{'type': 'loss', 'content': 0.03779718279838562, 'timestamp': '2025-10-02 00:14:57.270270', 'step': 2270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:57.331172', 'step': 2270, 'epoch': 1}
{'type': 'loss', 'content': 0.033057503402233124, 'timestamp': '2025-10-02 00:14:57.341586', 'step': 2271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:14:57.402934', 'step': 2271, 'epoch': 1}
{'type': 'loss', 'content': 0.05228983238339424, 'timestamp': '2025-10-02 00:14:57.414461', 'step': 2272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:57.468751', 'step': 2272, 'epoch': 1}
{'type': 'loss', 'content': 0.018891161307692528, 'timestamp': '2025-10-02 00:14:57.478200', 'step': 2273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:57.536197', 'step': 2273, 'epoch': 1}
{'type': 'loss', 'content': 0.06713397800922394, 'timestamp': '2025-10-02 00:14:57.546000', 'step': 2274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:57.602425', 'step': 2274, 'epoch': 1}
{'type': 'loss', 'content': 0.14433227479457855, 'timestamp': '2025-10-02 00:14:57.605029', 'step': 2275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:57.660185', 'step': 2275, 'epoch': 1}
{'type': 'loss', 'content': 0.12250538170337677, 'timestamp': '2025-10-02 00:14:57.666214', 'step': 2276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:57.719366', 'step': 2276, 'epoch': 1}
{'type': 'loss', 'content': 0.1525198370218277, 'timestamp': '2025-10-02 00:14:57.722393', 'step': 2277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:57.777421', 'step': 2277, 'epoch': 1}
{'type': 'loss', 'content': 0.18554648756980896, 'timestamp': '2025-10-02 00:14:57.780654', 'step': 2278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:57.839151', 'step': 2278, 'epoch': 1}
{'type': 'loss', 'content': 0.012102716602385044, 'timestamp': '2025-10-02 00:14:57.846722', 'step': 2279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:14:57.905997', 'step': 2279, 'epoch': 1}
{'type': 'loss', 'content': 0.025530725717544556, 'timestamp': '2025-10-02 00:14:57.917220', 'step': 2280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:57.971278', 'step': 2280, 'epoch': 1}
{'type': 'loss', 'content': 0.16018284857273102, 'timestamp': '2025-10-02 00:14:57.974544', 'step': 2281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:58.030377', 'step': 2281, 'epoch': 1}
{'type': 'loss', 'content': 0.10351944714784622, 'timestamp': '2025-10-02 00:14:58.036545', 'step': 2282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:58.091405', 'step': 2282, 'epoch': 1}
{'type': 'loss', 'content': 0.04301334545016289, 'timestamp': '2025-10-02 00:14:58.098995', 'step': 2283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:58.153711', 'step': 2283, 'epoch': 1}
{'type': 'loss', 'content': 0.2463681399822235, 'timestamp': '2025-10-02 00:14:58.160223', 'step': 2284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:58.216539', 'step': 2284, 'epoch': 1}
{'type': 'loss', 'content': 0.09252723306417465, 'timestamp': '2025-10-02 00:14:58.219023', 'step': 2285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:58.275472', 'step': 2285, 'epoch': 1}
{'type': 'loss', 'content': 0.058474961668252945, 'timestamp': '2025-10-02 00:14:58.285276', 'step': 2286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:58.340267', 'step': 2286, 'epoch': 1}
{'type': 'loss', 'content': 0.11661908775568008, 'timestamp': '2025-10-02 00:14:58.342695', 'step': 2287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:58.398629', 'step': 2287, 'epoch': 1}
{'type': 'loss', 'content': 0.08889924734830856, 'timestamp': '2025-10-02 00:14:58.406863', 'step': 2288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:58.461417', 'step': 2288, 'epoch': 1}
{'type': 'loss', 'content': 0.09478065371513367, 'timestamp': '2025-10-02 00:14:58.463870', 'step': 2289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:14:58.519142', 'step': 2289, 'epoch': 1}
{'type': 'loss', 'content': 0.09610825777053833, 'timestamp': '2025-10-02 00:14:58.526830', 'step': 2290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:14:58.582915', 'step': 2290, 'epoch': 1}
{'type': 'loss', 'content': 0.03979707881808281, 'timestamp': '2025-10-02 00:14:58.592690', 'step': 2291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:58.649182', 'step': 2291, 'epoch': 1}
{'type': 'loss', 'content': 0.2260725498199463, 'timestamp': '2025-10-02 00:14:58.655317', 'step': 2292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:58.711458', 'step': 2292, 'epoch': 1}
{'type': 'loss', 'content': 0.0821651965379715, 'timestamp': '2025-10-02 00:14:58.717190', 'step': 2293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:58.771882', 'step': 2293, 'epoch': 1}
{'type': 'loss', 'content': 0.06420990079641342, 'timestamp': '2025-10-02 00:14:58.775309', 'step': 2294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:58.829475', 'step': 2294, 'epoch': 1}
{'type': 'loss', 'content': 0.1722337305545807, 'timestamp': '2025-10-02 00:14:58.832236', 'step': 2295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:58.900418', 'step': 2295, 'epoch': 1}
{'type': 'loss', 'content': 0.053497616201639175, 'timestamp': '2025-10-02 00:14:58.907035', 'step': 2296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:14:58.960246', 'step': 2296, 'epoch': 1}
{'type': 'loss', 'content': 0.0535813607275486, 'timestamp': '2025-10-02 00:14:58.963529', 'step': 2297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:14:59.021067', 'step': 2297, 'epoch': 1}
{'type': 'loss', 'content': 0.15330742299556732, 'timestamp': '2025-10-02 00:14:59.024278', 'step': 2298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:14:59.079350', 'step': 2298, 'epoch': 1}
{'type': 'loss', 'content': 0.14591331779956818, 'timestamp': '2025-10-02 00:14:59.082249', 'step': 2299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:59.135756', 'step': 2299, 'epoch': 1}
{'type': 'loss', 'content': 0.11070401221513748, 'timestamp': '2025-10-02 00:14:59.142006', 'step': 2300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:14:59.195008', 'step': 2300, 'epoch': 1}
{'type': 'loss', 'content': 0.15985752642154694, 'timestamp': '2025-10-02 00:14:59.197495', 'step': 2301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:59.252349', 'step': 2301, 'epoch': 1}
{'type': 'loss', 'content': 0.10403012484312057, 'timestamp': '2025-10-02 00:14:59.258306', 'step': 2302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:14:59.313253', 'step': 2302, 'epoch': 1}
{'type': 'loss', 'content': 0.08307938277721405, 'timestamp': '2025-10-02 00:14:59.315709', 'step': 2303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:59.369250', 'step': 2303, 'epoch': 1}
{'type': 'loss', 'content': 0.10748428106307983, 'timestamp': '2025-10-02 00:14:59.375361', 'step': 2304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:14:59.428575', 'step': 2304, 'epoch': 1}
{'type': 'loss', 'content': 0.06626421958208084, 'timestamp': '2025-10-02 00:14:59.438129', 'step': 2305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:14:59.492158', 'step': 2305, 'epoch': 1}
{'type': 'loss', 'content': 0.09807801246643066, 'timestamp': '2025-10-02 00:14:59.494752', 'step': 2306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:14:59.549107', 'step': 2306, 'epoch': 1}
{'type': 'loss', 'content': 0.14598461985588074, 'timestamp': '2025-10-02 00:14:59.551494', 'step': 2307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:14:59.606254', 'step': 2307, 'epoch': 1}
{'type': 'loss', 'content': 0.16212694346904755, 'timestamp': '2025-10-02 00:14:59.612782', 'step': 2308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:59.665418', 'step': 2308, 'epoch': 1}
{'type': 'loss', 'content': 0.09899842739105225, 'timestamp': '2025-10-02 00:14:59.671321', 'step': 2309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:59.725139', 'step': 2309, 'epoch': 1}
{'type': 'loss', 'content': 0.1039736345410347, 'timestamp': '2025-10-02 00:14:59.730858', 'step': 2310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:59.784540', 'step': 2310, 'epoch': 1}
{'type': 'loss', 'content': 0.11064047366380692, 'timestamp': '2025-10-02 00:14:59.786970', 'step': 2311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:14:59.841197', 'step': 2311, 'epoch': 1}
{'type': 'loss', 'content': 0.07608895003795624, 'timestamp': '2025-10-02 00:14:59.847790', 'step': 2312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:14:59.901355', 'step': 2312, 'epoch': 1}
{'type': 'loss', 'content': 0.08331815898418427, 'timestamp': '2025-10-02 00:14:59.903563', 'step': 2313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:14:59.957194', 'step': 2313, 'epoch': 1}
{'type': 'loss', 'content': 0.06802108138799667, 'timestamp': '2025-10-02 00:14:59.959560', 'step': 2314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:00.013426', 'step': 2314, 'epoch': 1}
{'type': 'loss', 'content': 0.1720910668373108, 'timestamp': '2025-10-02 00:15:00.017927', 'step': 2315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:00.074459', 'step': 2315, 'epoch': 1}
{'type': 'loss', 'content': 0.0708642229437828, 'timestamp': '2025-10-02 00:15:00.081325', 'step': 2316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:15:00.141241', 'step': 2316, 'epoch': 1}
{'type': 'loss', 'content': 0.0743018239736557, 'timestamp': '2025-10-02 00:15:00.152974', 'step': 2317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:00.207074', 'step': 2317, 'epoch': 1}
{'type': 'loss', 'content': 0.18613356351852417, 'timestamp': '2025-10-02 00:15:00.210042', 'step': 2318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:00.271423', 'step': 2318, 'epoch': 1}
{'type': 'loss', 'content': 0.027598027139902115, 'timestamp': '2025-10-02 00:15:00.282096', 'step': 2319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 00:15:00.373512', 'step': 2319, 'epoch': 1}
{'type': 'loss', 'content': 0.019348958507180214, 'timestamp': '2025-10-02 00:15:00.390700', 'step': 2320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:00.445169', 'step': 2320, 'epoch': 1}
{'type': 'loss', 'content': 0.07011564075946808, 'timestamp': '2025-10-02 00:15:00.447753', 'step': 2321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:00.501956', 'step': 2321, 'epoch': 1}
{'type': 'loss', 'content': 0.13554023206233978, 'timestamp': '2025-10-02 00:15:00.504054', 'step': 2322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:00.557633', 'step': 2322, 'epoch': 1}
{'type': 'loss', 'content': 0.19748927652835846, 'timestamp': '2025-10-02 00:15:00.560458', 'step': 2323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:15:00.613706', 'step': 2323, 'epoch': 1}
{'type': 'loss', 'content': 0.2783958911895752, 'timestamp': '2025-10-02 00:15:00.619800', 'step': 2324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:00.672539', 'step': 2324, 'epoch': 1}
{'type': 'loss', 'content': 0.19194015860557556, 'timestamp': '2025-10-02 00:15:00.675120', 'step': 2325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:00.728987', 'step': 2325, 'epoch': 1}
{'type': 'loss', 'content': 0.18996278941631317, 'timestamp': '2025-10-02 00:15:00.731417', 'step': 2326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:00.785046', 'step': 2326, 'epoch': 1}
{'type': 'loss', 'content': 0.06335984915494919, 'timestamp': '2025-10-02 00:15:00.792607', 'step': 2327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:00.855324', 'step': 2327, 'epoch': 1}
{'type': 'loss', 'content': 0.03933179751038551, 'timestamp': '2025-10-02 00:15:00.867112', 'step': 2328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:00.932186', 'step': 2328, 'epoch': 1}
{'type': 'loss', 'content': 0.042727936059236526, 'timestamp': '2025-10-02 00:15:00.943815', 'step': 2329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:00.997610', 'step': 2329, 'epoch': 1}
{'type': 'loss', 'content': 0.03897201269865036, 'timestamp': '2025-10-02 00:15:01.005337', 'step': 2330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:01.065818', 'step': 2330, 'epoch': 1}
{'type': 'loss', 'content': 0.03146462142467499, 'timestamp': '2025-10-02 00:15:01.076364', 'step': 2331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:01.130756', 'step': 2331, 'epoch': 1}
{'type': 'loss', 'content': 0.0870310589671135, 'timestamp': '2025-10-02 00:15:01.137833', 'step': 2332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:01.191466', 'step': 2332, 'epoch': 1}
{'type': 'loss', 'content': 0.1111266165971756, 'timestamp': '2025-10-02 00:15:01.193840', 'step': 2333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:01.246888', 'step': 2333, 'epoch': 1}
{'type': 'loss', 'content': 0.13743209838867188, 'timestamp': '2025-10-02 00:15:01.249900', 'step': 2334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:01.303919', 'step': 2334, 'epoch': 1}
{'type': 'loss', 'content': 0.07310911267995834, 'timestamp': '2025-10-02 00:15:01.306728', 'step': 2335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:01.360942', 'step': 2335, 'epoch': 1}
{'type': 'loss', 'content': 0.1970515251159668, 'timestamp': '2025-10-02 00:15:01.366930', 'step': 2336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:01.420225', 'step': 2336, 'epoch': 1}
{'type': 'loss', 'content': 0.042012955993413925, 'timestamp': '2025-10-02 00:15:01.422751', 'step': 2337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:01.477262', 'step': 2337, 'epoch': 1}
{'type': 'loss', 'content': 0.09464339166879654, 'timestamp': '2025-10-02 00:15:01.487064', 'step': 2338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:01.539957', 'step': 2338, 'epoch': 1}
{'type': 'loss', 'content': 0.08079373836517334, 'timestamp': '2025-10-02 00:15:01.542652', 'step': 2339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:01.596487', 'step': 2339, 'epoch': 1}
{'type': 'loss', 'content': 0.043802518397569656, 'timestamp': '2025-10-02 00:15:01.604905', 'step': 2340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:01.658790', 'step': 2340, 'epoch': 1}
{'type': 'loss', 'content': 0.0566597618162632, 'timestamp': '2025-10-02 00:15:01.661346', 'step': 2341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:01.714964', 'step': 2341, 'epoch': 1}
{'type': 'loss', 'content': 0.08628404140472412, 'timestamp': '2025-10-02 00:15:01.717381', 'step': 2342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:01.770402', 'step': 2342, 'epoch': 1}
{'type': 'loss', 'content': 0.09889282286167145, 'timestamp': '2025-10-02 00:15:01.774245', 'step': 2343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:01.828188', 'step': 2343, 'epoch': 1}
{'type': 'loss', 'content': 0.056850895285606384, 'timestamp': '2025-10-02 00:15:01.833869', 'step': 2344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:01.888676', 'step': 2344, 'epoch': 1}
{'type': 'loss', 'content': 0.059115830808877945, 'timestamp': '2025-10-02 00:15:01.890906', 'step': 2345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:01.946774', 'step': 2345, 'epoch': 1}
{'type': 'loss', 'content': 0.09982315450906754, 'timestamp': '2025-10-02 00:15:01.949261', 'step': 2346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:02.003582', 'step': 2346, 'epoch': 1}
{'type': 'loss', 'content': 0.16913698613643646, 'timestamp': '2025-10-02 00:15:02.013155', 'step': 2347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:02.069798', 'step': 2347, 'epoch': 1}
{'type': 'loss', 'content': 0.05004366487264633, 'timestamp': '2025-10-02 00:15:02.078264', 'step': 2348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:02.132546', 'step': 2348, 'epoch': 1}
{'type': 'loss', 'content': 0.04864199087023735, 'timestamp': '2025-10-02 00:15:02.134750', 'step': 2349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:02.188792', 'step': 2349, 'epoch': 1}
{'type': 'loss', 'content': 0.1275050938129425, 'timestamp': '2025-10-02 00:15:02.191312', 'step': 2350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:02.245361', 'step': 2350, 'epoch': 1}
{'type': 'loss', 'content': 0.09352995455265045, 'timestamp': '2025-10-02 00:15:02.248280', 'step': 2351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:15:02.327026', 'step': 2351, 'epoch': 1}
{'type': 'loss', 'content': 0.04265742003917694, 'timestamp': '2025-10-02 00:15:02.341576', 'step': 2352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:02.395081', 'step': 2352, 'epoch': 1}
{'type': 'loss', 'content': 0.17345832288265228, 'timestamp': '2025-10-02 00:15:02.402808', 'step': 2353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:02.457050', 'step': 2353, 'epoch': 1}
{'type': 'loss', 'content': 0.0392562597990036, 'timestamp': '2025-10-02 00:15:02.459351', 'step': 2354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:02.513452', 'step': 2354, 'epoch': 1}
{'type': 'loss', 'content': 0.1312858909368515, 'timestamp': '2025-10-02 00:15:02.515460', 'step': 2355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:02.568761', 'step': 2355, 'epoch': 1}
{'type': 'loss', 'content': 0.24836409091949463, 'timestamp': '2025-10-02 00:15:02.574994', 'step': 2356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:02.628252', 'step': 2356, 'epoch': 1}
{'type': 'loss', 'content': 0.2397320419549942, 'timestamp': '2025-10-02 00:15:02.631179', 'step': 2357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:02.691048', 'step': 2357, 'epoch': 1}
{'type': 'loss', 'content': 0.03388427942991257, 'timestamp': '2025-10-02 00:15:02.701476', 'step': 2358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:02.757088', 'step': 2358, 'epoch': 1}
{'type': 'loss', 'content': 0.05324817821383476, 'timestamp': '2025-10-02 00:15:02.766871', 'step': 2359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:02.824092', 'step': 2359, 'epoch': 1}
{'type': 'loss', 'content': 0.07792365550994873, 'timestamp': '2025-10-02 00:15:02.830101', 'step': 2360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:02.884135', 'step': 2360, 'epoch': 1}
{'type': 'loss', 'content': 0.057448167353868484, 'timestamp': '2025-10-02 00:15:02.891077', 'step': 2361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:02.950986', 'step': 2361, 'epoch': 1}
{'type': 'loss', 'content': 0.13136227428913116, 'timestamp': '2025-10-02 00:15:02.953173', 'step': 2362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:03.014000', 'step': 2362, 'epoch': 1}
{'type': 'loss', 'content': 0.08255218714475632, 'timestamp': '2025-10-02 00:15:03.017211', 'step': 2363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:03.077380', 'step': 2363, 'epoch': 1}
{'type': 'loss', 'content': 0.009732501581311226, 'timestamp': '2025-10-02 00:15:03.085323', 'step': 2364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:03.148666', 'step': 2364, 'epoch': 1}
{'type': 'loss', 'content': 0.01794566959142685, 'timestamp': '2025-10-02 00:15:03.158840', 'step': 2365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:03.220017', 'step': 2365, 'epoch': 1}
{'type': 'loss', 'content': 0.0897534117102623, 'timestamp': '2025-10-02 00:15:03.222949', 'step': 2366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:03.283814', 'step': 2366, 'epoch': 1}
{'type': 'loss', 'content': 0.12809813022613525, 'timestamp': '2025-10-02 00:15:03.286981', 'step': 2367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:03.347316', 'step': 2367, 'epoch': 1}
{'type': 'loss', 'content': 0.05019007995724678, 'timestamp': '2025-10-02 00:15:03.354504', 'step': 2368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:03.411177', 'step': 2368, 'epoch': 1}
{'type': 'loss', 'content': 0.05859704315662384, 'timestamp': '2025-10-02 00:15:03.413697', 'step': 2369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:03.468678', 'step': 2369, 'epoch': 1}
{'type': 'loss', 'content': 0.2456856071949005, 'timestamp': '2025-10-02 00:15:03.472103', 'step': 2370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:03.526099', 'step': 2370, 'epoch': 1}
{'type': 'loss', 'content': 0.05940060317516327, 'timestamp': '2025-10-02 00:15:03.531890', 'step': 2371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:03.585766', 'step': 2371, 'epoch': 1}
{'type': 'loss', 'content': 0.03421608731150627, 'timestamp': '2025-10-02 00:15:03.592452', 'step': 2372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:03.646784', 'step': 2372, 'epoch': 1}
{'type': 'loss', 'content': 0.0799902006983757, 'timestamp': '2025-10-02 00:15:03.656563', 'step': 2373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:03.710491', 'step': 2373, 'epoch': 1}
{'type': 'loss', 'content': 0.1200874075293541, 'timestamp': '2025-10-02 00:15:03.713424', 'step': 2374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:03.769921', 'step': 2374, 'epoch': 1}
{'type': 'loss', 'content': 0.010494153946638107, 'timestamp': '2025-10-02 00:15:03.773083', 'step': 2375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:03.831430', 'step': 2375, 'epoch': 1}
{'type': 'loss', 'content': 0.17830300331115723, 'timestamp': '2025-10-02 00:15:03.837640', 'step': 2376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:03.892125', 'step': 2376, 'epoch': 1}
{'type': 'loss', 'content': 0.08927998691797256, 'timestamp': '2025-10-02 00:15:03.894400', 'step': 2377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:03.949052', 'step': 2377, 'epoch': 1}
{'type': 'loss', 'content': 0.08948412537574768, 'timestamp': '2025-10-02 00:15:03.951140', 'step': 2378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:04.005169', 'step': 2378, 'epoch': 1}
{'type': 'loss', 'content': 0.23960576951503754, 'timestamp': '2025-10-02 00:15:04.008401', 'step': 2379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:04.064603', 'step': 2379, 'epoch': 1}
{'type': 'loss', 'content': 0.09371867775917053, 'timestamp': '2025-10-02 00:15:04.071724', 'step': 2380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:04.128677', 'step': 2380, 'epoch': 1}
{'type': 'loss', 'content': 0.2441520094871521, 'timestamp': '2025-10-02 00:15:04.131040', 'step': 2381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:04.184983', 'step': 2381, 'epoch': 1}
{'type': 'loss', 'content': 0.11542988568544388, 'timestamp': '2025-10-02 00:15:04.188052', 'step': 2382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:04.242986', 'step': 2382, 'epoch': 1}
{'type': 'loss', 'content': 0.08271680027246475, 'timestamp': '2025-10-02 00:15:04.248748', 'step': 2383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:04.302692', 'step': 2383, 'epoch': 1}
{'type': 'loss', 'content': 0.10232959687709808, 'timestamp': '2025-10-02 00:15:04.308766', 'step': 2384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:04.368080', 'step': 2384, 'epoch': 1}
{'type': 'loss', 'content': 0.030317047610878944, 'timestamp': '2025-10-02 00:15:04.379710', 'step': 2385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:04.435583', 'step': 2385, 'epoch': 1}
{'type': 'loss', 'content': 0.05898246169090271, 'timestamp': '2025-10-02 00:15:04.439727', 'step': 2386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:04.499192', 'step': 2386, 'epoch': 1}
{'type': 'loss', 'content': 0.10850659012794495, 'timestamp': '2025-10-02 00:15:04.501779', 'step': 2387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:04.556513', 'step': 2387, 'epoch': 1}
{'type': 'loss', 'content': 0.10496655106544495, 'timestamp': '2025-10-02 00:15:04.563535', 'step': 2388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:04.616991', 'step': 2388, 'epoch': 1}
{'type': 'loss', 'content': 0.05690087750554085, 'timestamp': '2025-10-02 00:15:04.619452', 'step': 2389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:04.673938', 'step': 2389, 'epoch': 1}
{'type': 'loss', 'content': 0.09390340745449066, 'timestamp': '2025-10-02 00:15:04.676691', 'step': 2390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:04.733244', 'step': 2390, 'epoch': 1}
{'type': 'loss', 'content': 0.03004721738398075, 'timestamp': '2025-10-02 00:15:04.735535', 'step': 2391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:04.789828', 'step': 2391, 'epoch': 1}
{'type': 'loss', 'content': 0.1727578490972519, 'timestamp': '2025-10-02 00:15:04.796383', 'step': 2392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:04.850119', 'step': 2392, 'epoch': 1}
{'type': 'loss', 'content': 0.1977788358926773, 'timestamp': '2025-10-02 00:15:04.852478', 'step': 2393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:04.907957', 'step': 2393, 'epoch': 1}
{'type': 'loss', 'content': 0.06704150140285492, 'timestamp': '2025-10-02 00:15:04.913662', 'step': 2394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:04.969726', 'step': 2394, 'epoch': 1}
{'type': 'loss', 'content': 0.18790072202682495, 'timestamp': '2025-10-02 00:15:04.972592', 'step': 2395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:05.028797', 'step': 2395, 'epoch': 1}
{'type': 'loss', 'content': 0.05841853469610214, 'timestamp': '2025-10-02 00:15:05.035429', 'step': 2396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:05.099522', 'step': 2396, 'epoch': 1}
{'type': 'loss', 'content': 0.07095956802368164, 'timestamp': '2025-10-02 00:15:05.105719', 'step': 2397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:05.161481', 'step': 2397, 'epoch': 1}
{'type': 'loss', 'content': 0.11118831485509872, 'timestamp': '2025-10-02 00:15:05.164094', 'step': 2398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:05.218138', 'step': 2398, 'epoch': 1}
{'type': 'loss', 'content': 0.10484109073877335, 'timestamp': '2025-10-02 00:15:05.220981', 'step': 2399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:15:05.294992', 'step': 2399, 'epoch': 1}
{'type': 'loss', 'content': 0.018003104254603386, 'timestamp': '2025-10-02 00:15:05.309461', 'step': 2400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:05.364724', 'step': 2400, 'epoch': 1}
{'type': 'loss', 'content': 0.06713477522134781, 'timestamp': '2025-10-02 00:15:05.370195', 'step': 2401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:05.425072', 'step': 2401, 'epoch': 1}
{'type': 'loss', 'content': 0.0774555578827858, 'timestamp': '2025-10-02 00:15:05.432507', 'step': 2402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:05.487849', 'step': 2402, 'epoch': 1}
{'type': 'loss', 'content': 0.021795595064759254, 'timestamp': '2025-10-02 00:15:05.495288', 'step': 2403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:05.553422', 'step': 2403, 'epoch': 1}
{'type': 'loss', 'content': 0.04459415003657341, 'timestamp': '2025-10-02 00:15:05.564577', 'step': 2404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:05.617649', 'step': 2404, 'epoch': 1}
{'type': 'loss', 'content': 0.08521541953086853, 'timestamp': '2025-10-02 00:15:05.620036', 'step': 2405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:05.673135', 'step': 2405, 'epoch': 1}
{'type': 'loss', 'content': 0.15270738303661346, 'timestamp': '2025-10-02 00:15:05.675605', 'step': 2406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:05.729431', 'step': 2406, 'epoch': 1}
{'type': 'loss', 'content': 0.176801398396492, 'timestamp': '2025-10-02 00:15:05.731677', 'step': 2407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:15:05.792557', 'step': 2407, 'epoch': 1}
{'type': 'loss', 'content': 0.07100094109773636, 'timestamp': '2025-10-02 00:15:05.804256', 'step': 2408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:05.858824', 'step': 2408, 'epoch': 1}
{'type': 'loss', 'content': 0.02108852006494999, 'timestamp': '2025-10-02 00:15:05.868279', 'step': 2409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:05.924106', 'step': 2409, 'epoch': 1}
{'type': 'loss', 'content': 0.04504581540822983, 'timestamp': '2025-10-02 00:15:05.927788', 'step': 2410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:05.982757', 'step': 2410, 'epoch': 1}
{'type': 'loss', 'content': 0.14813914895057678, 'timestamp': '2025-10-02 00:15:05.986452', 'step': 2411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:06.040916', 'step': 2411, 'epoch': 1}
{'type': 'loss', 'content': 0.17991717159748077, 'timestamp': '2025-10-02 00:15:06.047731', 'step': 2412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:06.106388', 'step': 2412, 'epoch': 1}
{'type': 'loss', 'content': 0.09130117297172546, 'timestamp': '2025-10-02 00:15:06.111054', 'step': 2413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:06.168279', 'step': 2413, 'epoch': 1}
{'type': 'loss', 'content': 0.1423511803150177, 'timestamp': '2025-10-02 00:15:06.171337', 'step': 2414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:06.227165', 'step': 2414, 'epoch': 1}
{'type': 'loss', 'content': 0.24967144429683685, 'timestamp': '2025-10-02 00:15:06.230295', 'step': 2415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:06.286048', 'step': 2415, 'epoch': 1}
{'type': 'loss', 'content': 0.2249302864074707, 'timestamp': '2025-10-02 00:15:06.293313', 'step': 2416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:06.348640', 'step': 2416, 'epoch': 1}
{'type': 'loss', 'content': 0.09025583416223526, 'timestamp': '2025-10-02 00:15:06.351622', 'step': 2417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:06.407303', 'step': 2417, 'epoch': 1}
{'type': 'loss', 'content': 0.1533300131559372, 'timestamp': '2025-10-02 00:15:06.410487', 'step': 2418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:06.466780', 'step': 2418, 'epoch': 1}
{'type': 'loss', 'content': 0.08443385362625122, 'timestamp': '2025-10-02 00:15:06.469259', 'step': 2419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:06.524058', 'step': 2419, 'epoch': 1}
{'type': 'loss', 'content': 0.13479548692703247, 'timestamp': '2025-10-02 00:15:06.531183', 'step': 2420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:06.586231', 'step': 2420, 'epoch': 1}
{'type': 'loss', 'content': 0.14211902022361755, 'timestamp': '2025-10-02 00:15:06.589528', 'step': 2421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:06.645403', 'step': 2421, 'epoch': 1}
{'type': 'loss', 'content': 0.12335249036550522, 'timestamp': '2025-10-02 00:15:06.653104', 'step': 2422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:06.708895', 'step': 2422, 'epoch': 1}
{'type': 'loss', 'content': 0.14266297221183777, 'timestamp': '2025-10-02 00:15:06.716453', 'step': 2423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:06.772880', 'step': 2423, 'epoch': 1}
{'type': 'loss', 'content': 0.05004018545150757, 'timestamp': '2025-10-02 00:15:06.783135', 'step': 2424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:06.840410', 'step': 2424, 'epoch': 1}
{'type': 'loss', 'content': 0.14557217061519623, 'timestamp': '2025-10-02 00:15:06.843641', 'step': 2425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:06.899558', 'step': 2425, 'epoch': 1}
{'type': 'loss', 'content': 0.09306303411722183, 'timestamp': '2025-10-02 00:15:06.902341', 'step': 2426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:06.958383', 'step': 2426, 'epoch': 1}
{'type': 'loss', 'content': 0.06264098733663559, 'timestamp': '2025-10-02 00:15:06.960888', 'step': 2427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:07.017472', 'step': 2427, 'epoch': 1}
{'type': 'loss', 'content': 0.1943078488111496, 'timestamp': '2025-10-02 00:15:07.023978', 'step': 2428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:07.080340', 'step': 2428, 'epoch': 1}
{'type': 'loss', 'content': 0.0681367889046669, 'timestamp': '2025-10-02 00:15:07.086803', 'step': 2429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:07.142963', 'step': 2429, 'epoch': 1}
{'type': 'loss', 'content': 0.047722864896059036, 'timestamp': '2025-10-02 00:15:07.150838', 'step': 2430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:07.211381', 'step': 2430, 'epoch': 1}
{'type': 'loss', 'content': 0.26589515805244446, 'timestamp': '2025-10-02 00:15:07.214043', 'step': 2431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:07.268660', 'step': 2431, 'epoch': 1}
{'type': 'loss', 'content': 0.22693750262260437, 'timestamp': '2025-10-02 00:15:07.276037', 'step': 2432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:07.331418', 'step': 2432, 'epoch': 1}
{'type': 'loss', 'content': 0.02499563992023468, 'timestamp': '2025-10-02 00:15:07.338980', 'step': 2433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:07.396455', 'step': 2433, 'epoch': 1}
{'type': 'loss', 'content': 0.12333114445209503, 'timestamp': '2025-10-02 00:15:07.399200', 'step': 2434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:07.455100', 'step': 2434, 'epoch': 1}
{'type': 'loss', 'content': 0.20630189776420593, 'timestamp': '2025-10-02 00:15:07.458618', 'step': 2435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:07.517041', 'step': 2435, 'epoch': 1}
{'type': 'loss', 'content': 0.10729732364416122, 'timestamp': '2025-10-02 00:15:07.525343', 'step': 2436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:07.582419', 'step': 2436, 'epoch': 1}
{'type': 'loss', 'content': 0.09455028176307678, 'timestamp': '2025-10-02 00:15:07.586250', 'step': 2437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:07.641826', 'step': 2437, 'epoch': 1}
{'type': 'loss', 'content': 0.11993543803691864, 'timestamp': '2025-10-02 00:15:07.645985', 'step': 2438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:07.701396', 'step': 2438, 'epoch': 1}
{'type': 'loss', 'content': 0.14697016775608063, 'timestamp': '2025-10-02 00:15:07.704582', 'step': 2439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:07.759577', 'step': 2439, 'epoch': 1}
{'type': 'loss', 'content': 0.2013898342847824, 'timestamp': '2025-10-02 00:15:07.767048', 'step': 2440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:07.821677', 'step': 2440, 'epoch': 1}
{'type': 'loss', 'content': 0.08562831580638885, 'timestamp': '2025-10-02 00:15:07.824250', 'step': 2441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:07.879703', 'step': 2441, 'epoch': 1}
{'type': 'loss', 'content': 0.04059233143925667, 'timestamp': '2025-10-02 00:15:07.883453', 'step': 2442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:07.938034', 'step': 2442, 'epoch': 1}
{'type': 'loss', 'content': 0.210297629237175, 'timestamp': '2025-10-02 00:15:07.940284', 'step': 2443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:07.996397', 'step': 2443, 'epoch': 1}
{'type': 'loss', 'content': 0.16339148581027985, 'timestamp': '2025-10-02 00:15:08.003276', 'step': 2444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:08.057822', 'step': 2444, 'epoch': 1}
{'type': 'loss', 'content': 0.06207147240638733, 'timestamp': '2025-10-02 00:15:08.068298', 'step': 2445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:08.122833', 'step': 2445, 'epoch': 1}
{'type': 'loss', 'content': 0.11119838804006577, 'timestamp': '2025-10-02 00:15:08.128761', 'step': 2446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:08.183417', 'step': 2446, 'epoch': 1}
{'type': 'loss', 'content': 0.04354516416788101, 'timestamp': '2025-10-02 00:15:08.185761', 'step': 2447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:15:08.247365', 'step': 2447, 'epoch': 1}
{'type': 'loss', 'content': 0.09796110540628433, 'timestamp': '2025-10-02 00:15:08.259004', 'step': 2448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:08.313021', 'step': 2448, 'epoch': 1}
{'type': 'loss', 'content': 0.05209388583898544, 'timestamp': '2025-10-02 00:15:08.320425', 'step': 2449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:08.374601', 'step': 2449, 'epoch': 1}
{'type': 'loss', 'content': 0.07283136248588562, 'timestamp': '2025-10-02 00:15:08.377255', 'step': 2450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:08.431338', 'step': 2450, 'epoch': 1}
{'type': 'loss', 'content': 0.14796161651611328, 'timestamp': '2025-10-02 00:15:08.433474', 'step': 2451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:08.486945', 'step': 2451, 'epoch': 1}
{'type': 'loss', 'content': 0.061894822865724564, 'timestamp': '2025-10-02 00:15:08.493105', 'step': 2452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:08.546203', 'step': 2452, 'epoch': 1}
{'type': 'loss', 'content': 0.1554514318704605, 'timestamp': '2025-10-02 00:15:08.549040', 'step': 2453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:08.603055', 'step': 2453, 'epoch': 1}
{'type': 'loss', 'content': 0.2434246838092804, 'timestamp': '2025-10-02 00:15:08.605945', 'step': 2454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:08.660948', 'step': 2454, 'epoch': 1}
{'type': 'loss', 'content': 0.09351403266191483, 'timestamp': '2025-10-02 00:15:08.670730', 'step': 2455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:08.724422', 'step': 2455, 'epoch': 1}
{'type': 'loss', 'content': 0.06549723446369171, 'timestamp': '2025-10-02 00:15:08.731116', 'step': 2456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:08.784443', 'step': 2456, 'epoch': 1}
{'type': 'loss', 'content': 0.04363109543919563, 'timestamp': '2025-10-02 00:15:08.794370', 'step': 2457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:08.852247', 'step': 2457, 'epoch': 1}
{'type': 'loss', 'content': 0.14214582741260529, 'timestamp': '2025-10-02 00:15:08.854665', 'step': 2458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:08.908622', 'step': 2458, 'epoch': 1}
{'type': 'loss', 'content': 0.17018826305866241, 'timestamp': '2025-10-02 00:15:08.911853', 'step': 2459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:08.966342', 'step': 2459, 'epoch': 1}
{'type': 'loss', 'content': 0.13918183743953705, 'timestamp': '2025-10-02 00:15:08.972384', 'step': 2460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:09.025226', 'step': 2460, 'epoch': 1}
{'type': 'loss', 'content': 0.06186184659600258, 'timestamp': '2025-10-02 00:15:09.027978', 'step': 2461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:09.083078', 'step': 2461, 'epoch': 1}
{'type': 'loss', 'content': 0.048431068658828735, 'timestamp': '2025-10-02 00:15:09.085711', 'step': 2462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:09.141441', 'step': 2462, 'epoch': 1}
{'type': 'loss', 'content': 0.07604295015335083, 'timestamp': '2025-10-02 00:15:09.144553', 'step': 2463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:09.201424', 'step': 2463, 'epoch': 1}
{'type': 'loss', 'content': 0.05382857844233513, 'timestamp': '2025-10-02 00:15:09.207934', 'step': 2464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:09.262013', 'step': 2464, 'epoch': 1}
{'type': 'loss', 'content': 0.07118067890405655, 'timestamp': '2025-10-02 00:15:09.264680', 'step': 2465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:09.318155', 'step': 2465, 'epoch': 1}
{'type': 'loss', 'content': 0.18634292483329773, 'timestamp': '2025-10-02 00:15:09.320810', 'step': 2466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:09.382078', 'step': 2466, 'epoch': 1}
{'type': 'loss', 'content': 0.08971790969371796, 'timestamp': '2025-10-02 00:15:09.392841', 'step': 2467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:15:09.459582', 'step': 2467, 'epoch': 1}
{'type': 'loss', 'content': 0.03922773152589798, 'timestamp': '2025-10-02 00:15:09.472587', 'step': 2468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:09.526742', 'step': 2468, 'epoch': 1}
{'type': 'loss', 'content': 0.11336679011583328, 'timestamp': '2025-10-02 00:15:09.535660', 'step': 2469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:09.590254', 'step': 2469, 'epoch': 1}
{'type': 'loss', 'content': 0.07840639352798462, 'timestamp': '2025-10-02 00:15:09.593267', 'step': 2470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:09.647070', 'step': 2470, 'epoch': 1}
{'type': 'loss', 'content': 0.09698083996772766, 'timestamp': '2025-10-02 00:15:09.654667', 'step': 2471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:09.708844', 'step': 2471, 'epoch': 1}
{'type': 'loss', 'content': 0.14720739424228668, 'timestamp': '2025-10-02 00:15:09.714804', 'step': 2472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:09.768134', 'step': 2472, 'epoch': 1}
{'type': 'loss', 'content': 0.08430518954992294, 'timestamp': '2025-10-02 00:15:09.775686', 'step': 2473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:09.830773', 'step': 2473, 'epoch': 1}
{'type': 'loss', 'content': 0.02488454431295395, 'timestamp': '2025-10-02 00:15:09.838469', 'step': 2474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:09.893486', 'step': 2474, 'epoch': 1}
{'type': 'loss', 'content': 0.08517811447381973, 'timestamp': '2025-10-02 00:15:09.896048', 'step': 2475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:09.949534', 'step': 2475, 'epoch': 1}
{'type': 'loss', 'content': 0.0667804703116417, 'timestamp': '2025-10-02 00:15:09.959912', 'step': 2476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:10.017963', 'step': 2476, 'epoch': 1}
{'type': 'loss', 'content': 0.16305769979953766, 'timestamp': '2025-10-02 00:15:10.020273', 'step': 2477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:10.073334', 'step': 2477, 'epoch': 1}
{'type': 'loss', 'content': 0.05516999214887619, 'timestamp': '2025-10-02 00:15:10.079363', 'step': 2478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:15:10.141542', 'step': 2478, 'epoch': 1}
{'type': 'loss', 'content': 0.060899700969457626, 'timestamp': '2025-10-02 00:15:10.152397', 'step': 2479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:10.207861', 'step': 2479, 'epoch': 1}
{'type': 'loss', 'content': 0.13962379097938538, 'timestamp': '2025-10-02 00:15:10.213725', 'step': 2480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:10.268824', 'step': 2480, 'epoch': 1}
{'type': 'loss', 'content': 0.22243885695934296, 'timestamp': '2025-10-02 00:15:10.274812', 'step': 2481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:10.329159', 'step': 2481, 'epoch': 1}
{'type': 'loss', 'content': 0.05127650499343872, 'timestamp': '2025-10-02 00:15:10.332297', 'step': 2482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:10.385631', 'step': 2482, 'epoch': 1}
{'type': 'loss', 'content': 0.20159928500652313, 'timestamp': '2025-10-02 00:15:10.388121', 'step': 2483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:10.442277', 'step': 2483, 'epoch': 1}
{'type': 'loss', 'content': 0.022834893316030502, 'timestamp': '2025-10-02 00:15:10.452662', 'step': 2484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:10.508921', 'step': 2484, 'epoch': 1}
{'type': 'loss', 'content': 0.06007378548383713, 'timestamp': '2025-10-02 00:15:10.511992', 'step': 2485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:10.573539', 'step': 2485, 'epoch': 1}
{'type': 'loss', 'content': 0.16893154382705688, 'timestamp': '2025-10-02 00:15:10.576064', 'step': 2486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:10.636705', 'step': 2486, 'epoch': 1}
{'type': 'loss', 'content': 0.06715238839387894, 'timestamp': '2025-10-02 00:15:10.647483', 'step': 2487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:10.702793', 'step': 2487, 'epoch': 1}
{'type': 'loss', 'content': 0.03647742420434952, 'timestamp': '2025-10-02 00:15:10.711164', 'step': 2488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:10.763884', 'step': 2488, 'epoch': 1}
{'type': 'loss', 'content': 0.08405737578868866, 'timestamp': '2025-10-02 00:15:10.771641', 'step': 2489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:10.829368', 'step': 2489, 'epoch': 1}
{'type': 'loss', 'content': 0.15573781728744507, 'timestamp': '2025-10-02 00:15:10.831735', 'step': 2490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:10.886244', 'step': 2490, 'epoch': 1}
{'type': 'loss', 'content': 0.1351170688867569, 'timestamp': '2025-10-02 00:15:10.896009', 'step': 2491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:10.950936', 'step': 2491, 'epoch': 1}
{'type': 'loss', 'content': 0.11884672194719315, 'timestamp': '2025-10-02 00:15:10.961276', 'step': 2492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:11.013875', 'step': 2492, 'epoch': 1}
{'type': 'loss', 'content': 0.24282139539718628, 'timestamp': '2025-10-02 00:15:11.016330', 'step': 2493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:11.069337', 'step': 2493, 'epoch': 1}
{'type': 'loss', 'content': 0.26203465461730957, 'timestamp': '2025-10-02 00:15:11.072481', 'step': 2494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:11.128481', 'step': 2494, 'epoch': 1}
{'type': 'loss', 'content': 0.0848369374871254, 'timestamp': '2025-10-02 00:15:11.131547', 'step': 2495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:11.190633', 'step': 2495, 'epoch': 1}
{'type': 'loss', 'content': 0.15479397773742676, 'timestamp': '2025-10-02 00:15:11.197442', 'step': 2496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:11.255878', 'step': 2496, 'epoch': 1}
{'type': 'loss', 'content': 0.03333647549152374, 'timestamp': '2025-10-02 00:15:11.267109', 'step': 2497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:11.321560', 'step': 2497, 'epoch': 1}
{'type': 'loss', 'content': 0.1479908525943756, 'timestamp': '2025-10-02 00:15:11.324410', 'step': 2498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:11.378468', 'step': 2498, 'epoch': 1}
{'type': 'loss', 'content': 0.15075351297855377, 'timestamp': '2025-10-02 00:15:11.382357', 'step': 2499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:11.435677', 'step': 2499, 'epoch': 1}
{'type': 'loss', 'content': 0.2007923573255539, 'timestamp': '2025-10-02 00:15:11.442073', 'step': 2500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-10-02 00:15:11.886935', 'step': 2500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:11.941552', 'step': 2500, 'epoch': 1}
{'type': 'loss', 'content': 0.16824746131896973, 'timestamp': '2025-10-02 00:15:11.944303', 'step': 2501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:11.997724', 'step': 2501, 'epoch': 1}
{'type': 'loss', 'content': 0.12336442619562149, 'timestamp': '2025-10-02 00:15:12.007090', 'step': 2502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:12.060478', 'step': 2502, 'epoch': 1}
{'type': 'loss', 'content': 0.08193614333868027, 'timestamp': '2025-10-02 00:15:12.063272', 'step': 2503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:12.121386', 'step': 2503, 'epoch': 1}
{'type': 'loss', 'content': 0.027283184230327606, 'timestamp': '2025-10-02 00:15:12.132609', 'step': 2504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:12.190741', 'step': 2504, 'epoch': 1}
{'type': 'loss', 'content': 0.06718914955854416, 'timestamp': '2025-10-02 00:15:12.201939', 'step': 2505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:12.256722', 'step': 2505, 'epoch': 1}
{'type': 'loss', 'content': 0.08450129628181458, 'timestamp': '2025-10-02 00:15:12.262664', 'step': 2506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:12.318546', 'step': 2506, 'epoch': 1}
{'type': 'loss', 'content': 0.06699661165475845, 'timestamp': '2025-10-02 00:15:12.321431', 'step': 2507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:12.377102', 'step': 2507, 'epoch': 1}
{'type': 'loss', 'content': 0.13768428564071655, 'timestamp': '2025-10-02 00:15:12.383699', 'step': 2508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:12.436408', 'step': 2508, 'epoch': 1}
{'type': 'loss', 'content': 0.17861208319664001, 'timestamp': '2025-10-02 00:15:12.438714', 'step': 2509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:12.492253', 'step': 2509, 'epoch': 1}
{'type': 'loss', 'content': 0.03373872488737106, 'timestamp': '2025-10-02 00:15:12.495619', 'step': 2510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:12.549592', 'step': 2510, 'epoch': 1}
{'type': 'loss', 'content': 0.20057706534862518, 'timestamp': '2025-10-02 00:15:12.552253', 'step': 2511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:12.606039', 'step': 2511, 'epoch': 1}
{'type': 'loss', 'content': 0.18081767857074738, 'timestamp': '2025-10-02 00:15:12.613393', 'step': 2512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:12.666686', 'step': 2512, 'epoch': 1}
{'type': 'loss', 'content': 0.12075501680374146, 'timestamp': '2025-10-02 00:15:12.669152', 'step': 2513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:12.722910', 'step': 2513, 'epoch': 1}
{'type': 'loss', 'content': 0.11806902289390564, 'timestamp': '2025-10-02 00:15:12.725974', 'step': 2514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:12.780001', 'step': 2514, 'epoch': 1}
{'type': 'loss', 'content': 0.059525199234485626, 'timestamp': '2025-10-02 00:15:12.784804', 'step': 2515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:15:12.850634', 'step': 2515, 'epoch': 1}
{'type': 'loss', 'content': 0.04440249502658844, 'timestamp': '2025-10-02 00:15:12.863670', 'step': 2516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:12.916656', 'step': 2516, 'epoch': 1}
{'type': 'loss', 'content': 0.09832821786403656, 'timestamp': '2025-10-02 00:15:12.919260', 'step': 2517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:12.972007', 'step': 2517, 'epoch': 1}
{'type': 'loss', 'content': 0.11615131050348282, 'timestamp': '2025-10-02 00:15:12.974721', 'step': 2518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:13.028396', 'step': 2518, 'epoch': 1}
{'type': 'loss', 'content': 0.02219623513519764, 'timestamp': '2025-10-02 00:15:13.037995', 'step': 2519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:13.091725', 'step': 2519, 'epoch': 1}
{'type': 'loss', 'content': 0.21653775870800018, 'timestamp': '2025-10-02 00:15:13.098079', 'step': 2520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:13.153455', 'step': 2520, 'epoch': 1}
{'type': 'loss', 'content': 0.05796435847878456, 'timestamp': '2025-10-02 00:15:13.159414', 'step': 2521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:13.215134', 'step': 2521, 'epoch': 1}
{'type': 'loss', 'content': 0.2119055688381195, 'timestamp': '2025-10-02 00:15:13.218924', 'step': 2522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:13.274800', 'step': 2522, 'epoch': 1}
{'type': 'loss', 'content': 0.032689642161130905, 'timestamp': '2025-10-02 00:15:13.282385', 'step': 2523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:13.343162', 'step': 2523, 'epoch': 1}
{'type': 'loss', 'content': 0.025775888934731483, 'timestamp': '2025-10-02 00:15:13.354699', 'step': 2524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:13.407529', 'step': 2524, 'epoch': 1}
{'type': 'loss', 'content': 0.15002453327178955, 'timestamp': '2025-10-02 00:15:13.410044', 'step': 2525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:13.464304', 'step': 2525, 'epoch': 1}
{'type': 'loss', 'content': 0.053643446415662766, 'timestamp': '2025-10-02 00:15:13.474084', 'step': 2526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:13.530909', 'step': 2526, 'epoch': 1}
{'type': 'loss', 'content': 0.18237754702568054, 'timestamp': '2025-10-02 00:15:13.533636', 'step': 2527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:13.591003', 'step': 2527, 'epoch': 1}
{'type': 'loss', 'content': 0.050430431962013245, 'timestamp': '2025-10-02 00:15:13.602232', 'step': 2528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:13.655933', 'step': 2528, 'epoch': 1}
{'type': 'loss', 'content': 0.05613025650382042, 'timestamp': '2025-10-02 00:15:13.658849', 'step': 2529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:13.713529', 'step': 2529, 'epoch': 1}
{'type': 'loss', 'content': 0.11928939074277878, 'timestamp': '2025-10-02 00:15:13.719534', 'step': 2530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:13.774244', 'step': 2530, 'epoch': 1}
{'type': 'loss', 'content': 0.03516937419772148, 'timestamp': '2025-10-02 00:15:13.784059', 'step': 2531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:13.837194', 'step': 2531, 'epoch': 1}
{'type': 'loss', 'content': 0.054539281874895096, 'timestamp': '2025-10-02 00:15:13.843075', 'step': 2532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:13.895764', 'step': 2532, 'epoch': 1}
{'type': 'loss', 'content': 0.08044684678316116, 'timestamp': '2025-10-02 00:15:13.899022', 'step': 2533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:13.952224', 'step': 2533, 'epoch': 1}
{'type': 'loss', 'content': 0.24676848948001862, 'timestamp': '2025-10-02 00:15:13.954820', 'step': 2534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:14.010813', 'step': 2534, 'epoch': 1}
{'type': 'loss', 'content': 0.09165793657302856, 'timestamp': '2025-10-02 00:15:14.013390', 'step': 2535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:14.067812', 'step': 2535, 'epoch': 1}
{'type': 'loss', 'content': 0.062427178025245667, 'timestamp': '2025-10-02 00:15:14.074116', 'step': 2536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:14.126572', 'step': 2536, 'epoch': 1}
{'type': 'loss', 'content': 0.055469825863838196, 'timestamp': '2025-10-02 00:15:14.129122', 'step': 2537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:14.183239', 'step': 2537, 'epoch': 1}
{'type': 'loss', 'content': 0.0285777784883976, 'timestamp': '2025-10-02 00:15:14.185973', 'step': 2538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:15:14.248246', 'step': 2538, 'epoch': 1}
{'type': 'loss', 'content': 0.032628096640110016, 'timestamp': '2025-10-02 00:15:14.259344', 'step': 2539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:14.313337', 'step': 2539, 'epoch': 1}
{'type': 'loss', 'content': 0.1291704773902893, 'timestamp': '2025-10-02 00:15:14.319474', 'step': 2540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:14.373479', 'step': 2540, 'epoch': 1}
{'type': 'loss', 'content': 0.16148792207241058, 'timestamp': '2025-10-02 00:15:14.375875', 'step': 2541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:14.429034', 'step': 2541, 'epoch': 1}
{'type': 'loss', 'content': 0.1925698071718216, 'timestamp': '2025-10-02 00:15:14.431329', 'step': 2542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:14.485184', 'step': 2542, 'epoch': 1}
{'type': 'loss', 'content': 0.03248283639550209, 'timestamp': '2025-10-02 00:15:14.492963', 'step': 2543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:14.545746', 'step': 2543, 'epoch': 1}
{'type': 'loss', 'content': 0.1778361201286316, 'timestamp': '2025-10-02 00:15:14.551556', 'step': 2544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 00:15:14.630870', 'step': 2544, 'epoch': 1}
{'type': 'loss', 'content': 0.020714323967695236, 'timestamp': '2025-10-02 00:15:14.647125', 'step': 2545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:14.700498', 'step': 2545, 'epoch': 1}
{'type': 'loss', 'content': 0.10093294829130173, 'timestamp': '2025-10-02 00:15:14.706481', 'step': 2546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:14.760675', 'step': 2546, 'epoch': 1}
{'type': 'loss', 'content': 0.0719776526093483, 'timestamp': '2025-10-02 00:15:14.763294', 'step': 2547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:14.817510', 'step': 2547, 'epoch': 1}
{'type': 'loss', 'content': 0.040926069021224976, 'timestamp': '2025-10-02 00:15:14.825884', 'step': 2548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:14.880511', 'step': 2548, 'epoch': 1}
{'type': 'loss', 'content': 0.08717142045497894, 'timestamp': '2025-10-02 00:15:14.883025', 'step': 2549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:14.939159', 'step': 2549, 'epoch': 1}
{'type': 'loss', 'content': 0.14459963142871857, 'timestamp': '2025-10-02 00:15:14.942489', 'step': 2550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:14.996080', 'step': 2550, 'epoch': 1}
{'type': 'loss', 'content': 0.0903119370341301, 'timestamp': '2025-10-02 00:15:15.005679', 'step': 2551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:15.059740', 'step': 2551, 'epoch': 1}
{'type': 'loss', 'content': 0.03249591961503029, 'timestamp': '2025-10-02 00:15:15.068102', 'step': 2552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:15.123651', 'step': 2552, 'epoch': 1}
{'type': 'loss', 'content': 0.020422881469130516, 'timestamp': '2025-10-02 00:15:15.129587', 'step': 2553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:15.184728', 'step': 2553, 'epoch': 1}
{'type': 'loss', 'content': 0.24850910902023315, 'timestamp': '2025-10-02 00:15:15.188227', 'step': 2554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:15.244616', 'step': 2554, 'epoch': 1}
{'type': 'loss', 'content': 0.14735661447048187, 'timestamp': '2025-10-02 00:15:15.247820', 'step': 2555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:15.303266', 'step': 2555, 'epoch': 1}
{'type': 'loss', 'content': 0.062206923961639404, 'timestamp': '2025-10-02 00:15:15.310311', 'step': 2556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:15.365654', 'step': 2556, 'epoch': 1}
{'type': 'loss', 'content': 0.17652814090251923, 'timestamp': '2025-10-02 00:15:15.368841', 'step': 2557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:15.425471', 'step': 2557, 'epoch': 1}
{'type': 'loss', 'content': 0.14407502114772797, 'timestamp': '2025-10-02 00:15:15.432797', 'step': 2558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:15.487756', 'step': 2558, 'epoch': 1}
{'type': 'loss', 'content': 0.09689842164516449, 'timestamp': '2025-10-02 00:15:15.491052', 'step': 2559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:15.547522', 'step': 2559, 'epoch': 1}
{'type': 'loss', 'content': 0.08859919756650925, 'timestamp': '2025-10-02 00:15:15.553857', 'step': 2560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:15.608722', 'step': 2560, 'epoch': 1}
{'type': 'loss', 'content': 0.021214766427874565, 'timestamp': '2025-10-02 00:15:15.619210', 'step': 2561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:15.675582', 'step': 2561, 'epoch': 1}
{'type': 'loss', 'content': 0.04187482222914696, 'timestamp': '2025-10-02 00:15:15.683109', 'step': 2562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:15.738318', 'step': 2562, 'epoch': 1}
{'type': 'loss', 'content': 0.09320279955863953, 'timestamp': '2025-10-02 00:15:15.740651', 'step': 2563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:15.796166', 'step': 2563, 'epoch': 1}
{'type': 'loss', 'content': 0.0974186360836029, 'timestamp': '2025-10-02 00:15:15.802459', 'step': 2564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:15:15.864006', 'step': 2564, 'epoch': 1}
{'type': 'loss', 'content': 0.05537019297480583, 'timestamp': '2025-10-02 00:15:15.875703', 'step': 2565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:15.930957', 'step': 2565, 'epoch': 1}
{'type': 'loss', 'content': 0.16057360172271729, 'timestamp': '2025-10-02 00:15:15.934317', 'step': 2566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:15.990437', 'step': 2566, 'epoch': 1}
{'type': 'loss', 'content': 0.04245370253920555, 'timestamp': '2025-10-02 00:15:15.998165', 'step': 2567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:16.055250', 'step': 2567, 'epoch': 1}
{'type': 'loss', 'content': 0.06384787708520889, 'timestamp': '2025-10-02 00:15:16.062866', 'step': 2568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:16.118315', 'step': 2568, 'epoch': 1}
{'type': 'loss', 'content': 0.13072569668293, 'timestamp': '2025-10-02 00:15:16.121271', 'step': 2569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:16.177084', 'step': 2569, 'epoch': 1}
{'type': 'loss', 'content': 0.09658856689929962, 'timestamp': '2025-10-02 00:15:16.180423', 'step': 2570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:16.236504', 'step': 2570, 'epoch': 1}
{'type': 'loss', 'content': 0.1253117322921753, 'timestamp': '2025-10-02 00:15:16.239441', 'step': 2571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:16.299514', 'step': 2571, 'epoch': 1}
{'type': 'loss', 'content': 0.08643344789743423, 'timestamp': '2025-10-02 00:15:16.310717', 'step': 2572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:16.367350', 'step': 2572, 'epoch': 1}
{'type': 'loss', 'content': 0.052897755056619644, 'timestamp': '2025-10-02 00:15:16.377785', 'step': 2573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:16.431477', 'step': 2573, 'epoch': 1}
{'type': 'loss', 'content': 0.21114498376846313, 'timestamp': '2025-10-02 00:15:16.434424', 'step': 2574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:16.487414', 'step': 2574, 'epoch': 1}
{'type': 'loss', 'content': 0.09282880276441574, 'timestamp': '2025-10-02 00:15:16.490048', 'step': 2575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:16.543935', 'step': 2575, 'epoch': 1}
{'type': 'loss', 'content': 0.1497589349746704, 'timestamp': '2025-10-02 00:15:16.549928', 'step': 2576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:16.603125', 'step': 2576, 'epoch': 1}
{'type': 'loss', 'content': 0.11537841707468033, 'timestamp': '2025-10-02 00:15:16.609090', 'step': 2577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:16.662668', 'step': 2577, 'epoch': 1}
{'type': 'loss', 'content': 0.18017958104610443, 'timestamp': '2025-10-02 00:15:16.665871', 'step': 2578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:16.720845', 'step': 2578, 'epoch': 1}
{'type': 'loss', 'content': 0.043050263077020645, 'timestamp': '2025-10-02 00:15:16.726756', 'step': 2579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:16.780216', 'step': 2579, 'epoch': 1}
{'type': 'loss', 'content': 0.12756024301052094, 'timestamp': '2025-10-02 00:15:16.786514', 'step': 2580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:16.840407', 'step': 2580, 'epoch': 1}
{'type': 'loss', 'content': 0.11551203578710556, 'timestamp': '2025-10-02 00:15:16.842960', 'step': 2581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:16.897655', 'step': 2581, 'epoch': 1}
{'type': 'loss', 'content': 0.029077621176838875, 'timestamp': '2025-10-02 00:15:16.903760', 'step': 2582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:16.957565', 'step': 2582, 'epoch': 1}
{'type': 'loss', 'content': 0.16665767133235931, 'timestamp': '2025-10-02 00:15:16.960348', 'step': 2583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:17.015091', 'step': 2583, 'epoch': 1}
{'type': 'loss', 'content': 0.0717499777674675, 'timestamp': '2025-10-02 00:15:17.021789', 'step': 2584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:17.075865', 'step': 2584, 'epoch': 1}
{'type': 'loss', 'content': 0.16821587085723877, 'timestamp': '2025-10-02 00:15:17.078940', 'step': 2585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:17.133238', 'step': 2585, 'epoch': 1}
{'type': 'loss', 'content': 0.05412815511226654, 'timestamp': '2025-10-02 00:15:17.142482', 'step': 2586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:17.197289', 'step': 2586, 'epoch': 1}
{'type': 'loss', 'content': 0.09806925058364868, 'timestamp': '2025-10-02 00:15:17.207045', 'step': 2587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:17.261503', 'step': 2587, 'epoch': 1}
{'type': 'loss', 'content': 0.1054006814956665, 'timestamp': '2025-10-02 00:15:17.269365', 'step': 2588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:17.336022', 'step': 2588, 'epoch': 1}
{'type': 'loss', 'content': 0.053464505821466446, 'timestamp': '2025-10-02 00:15:17.338584', 'step': 2589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:17.393414', 'step': 2589, 'epoch': 1}
{'type': 'loss', 'content': 0.025238001719117165, 'timestamp': '2025-10-02 00:15:17.402989', 'step': 2590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 11200068058304.0}, 'timestamp': '2025-10-02 00:15:17.488858', 'step': 2590, 'epoch': 1}
{'type': 'loss', 'content': 0.020805319771170616, 'timestamp': '2025-10-02 00:15:17.503861', 'step': 2591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:17.558517', 'step': 2591, 'epoch': 1}
{'type': 'loss', 'content': 0.05042514204978943, 'timestamp': '2025-10-02 00:15:17.565206', 'step': 2592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:17.621361', 'step': 2592, 'epoch': 1}
{'type': 'loss', 'content': 0.1199503168463707, 'timestamp': '2025-10-02 00:15:17.632555', 'step': 2593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:17.687181', 'step': 2593, 'epoch': 1}
{'type': 'loss', 'content': 0.08150948584079742, 'timestamp': '2025-10-02 00:15:17.696970', 'step': 2594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:17.751356', 'step': 2594, 'epoch': 1}
{'type': 'loss', 'content': 0.025100992992520332, 'timestamp': '2025-10-02 00:15:17.761149', 'step': 2595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:17.814611', 'step': 2595, 'epoch': 1}
{'type': 'loss', 'content': 0.18836361169815063, 'timestamp': '2025-10-02 00:15:17.820627', 'step': 2596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:17.875852', 'step': 2596, 'epoch': 1}
{'type': 'loss', 'content': 0.0929332971572876, 'timestamp': '2025-10-02 00:15:17.878532', 'step': 2597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:17.931565', 'step': 2597, 'epoch': 1}
{'type': 'loss', 'content': 0.1236574649810791, 'timestamp': '2025-10-02 00:15:17.937600', 'step': 2598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:17.990878', 'step': 2598, 'epoch': 1}
{'type': 'loss', 'content': 0.15704010426998138, 'timestamp': '2025-10-02 00:15:17.993397', 'step': 2599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:18.046682', 'step': 2599, 'epoch': 1}
{'type': 'loss', 'content': 0.07516627013683319, 'timestamp': '2025-10-02 00:15:18.052638', 'step': 2600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:18.105843', 'step': 2600, 'epoch': 1}
{'type': 'loss', 'content': 0.05210260674357414, 'timestamp': '2025-10-02 00:15:18.108574', 'step': 2601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:18.161893', 'step': 2601, 'epoch': 1}
{'type': 'loss', 'content': 0.16739365458488464, 'timestamp': '2025-10-02 00:15:18.164448', 'step': 2602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:18.219050', 'step': 2602, 'epoch': 1}
{'type': 'loss', 'content': 0.05351828783750534, 'timestamp': '2025-10-02 00:15:18.221729', 'step': 2603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:18.275528', 'step': 2603, 'epoch': 1}
{'type': 'loss', 'content': 0.03123534843325615, 'timestamp': '2025-10-02 00:15:18.285856', 'step': 2604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:18.338621', 'step': 2604, 'epoch': 1}
{'type': 'loss', 'content': 0.04436337575316429, 'timestamp': '2025-10-02 00:15:18.341661', 'step': 2605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:18.394998', 'step': 2605, 'epoch': 1}
{'type': 'loss', 'content': 0.14505793154239655, 'timestamp': '2025-10-02 00:15:18.397971', 'step': 2606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:18.452988', 'step': 2606, 'epoch': 1}
{'type': 'loss', 'content': 0.10006571561098099, 'timestamp': '2025-10-02 00:15:18.458252', 'step': 2607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:18.518914', 'step': 2607, 'epoch': 1}
{'type': 'loss', 'content': 0.04516296833753586, 'timestamp': '2025-10-02 00:15:18.530390', 'step': 2608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:18.585270', 'step': 2608, 'epoch': 1}
{'type': 'loss', 'content': 0.030914440751075745, 'timestamp': '2025-10-02 00:15:18.594520', 'step': 2609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:18.650348', 'step': 2609, 'epoch': 1}
{'type': 'loss', 'content': 0.17139966785907745, 'timestamp': '2025-10-02 00:15:18.657329', 'step': 2610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:18.711317', 'step': 2610, 'epoch': 1}
{'type': 'loss', 'content': 0.1685050129890442, 'timestamp': '2025-10-02 00:15:18.713993', 'step': 2611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:18.771029', 'step': 2611, 'epoch': 1}
{'type': 'loss', 'content': 0.063517726957798, 'timestamp': '2025-10-02 00:15:18.777176', 'step': 2612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:18.831048', 'step': 2612, 'epoch': 1}
{'type': 'loss', 'content': 0.04172603413462639, 'timestamp': '2025-10-02 00:15:18.841066', 'step': 2613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:18.895601', 'step': 2613, 'epoch': 1}
{'type': 'loss', 'content': 0.06098737195134163, 'timestamp': '2025-10-02 00:15:18.897784', 'step': 2614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:18.951121', 'step': 2614, 'epoch': 1}
{'type': 'loss', 'content': 0.2150864154100418, 'timestamp': '2025-10-02 00:15:18.954350', 'step': 2615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:15:19.023312', 'step': 2615, 'epoch': 1}
{'type': 'loss', 'content': 0.01906248927116394, 'timestamp': '2025-10-02 00:15:19.036537', 'step': 2616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:19.093464', 'step': 2616, 'epoch': 1}
{'type': 'loss', 'content': 0.11211980134248734, 'timestamp': '2025-10-02 00:15:19.103620', 'step': 2617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:19.157351', 'step': 2617, 'epoch': 1}
{'type': 'loss', 'content': 0.02805941179394722, 'timestamp': '2025-10-02 00:15:19.159443', 'step': 2618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:19.212883', 'step': 2618, 'epoch': 1}
{'type': 'loss', 'content': 0.04854682832956314, 'timestamp': '2025-10-02 00:15:19.218280', 'step': 2619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:19.272259', 'step': 2619, 'epoch': 1}
{'type': 'loss', 'content': 0.08289214223623276, 'timestamp': '2025-10-02 00:15:19.278033', 'step': 2620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:19.332194', 'step': 2620, 'epoch': 1}
{'type': 'loss', 'content': 0.21615555882453918, 'timestamp': '2025-10-02 00:15:19.335492', 'step': 2621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:19.389701', 'step': 2621, 'epoch': 1}
{'type': 'loss', 'content': 0.054929718375205994, 'timestamp': '2025-10-02 00:15:19.392124', 'step': 2622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:19.446143', 'step': 2622, 'epoch': 1}
{'type': 'loss', 'content': 0.16614790260791779, 'timestamp': '2025-10-02 00:15:19.448441', 'step': 2623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:19.502599', 'step': 2623, 'epoch': 1}
{'type': 'loss', 'content': 0.15301916003227234, 'timestamp': '2025-10-02 00:15:19.508547', 'step': 2624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:15:19.574349', 'step': 2624, 'epoch': 1}
{'type': 'loss', 'content': 0.008028761483728886, 'timestamp': '2025-10-02 00:15:19.587361', 'step': 2625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:19.642610', 'step': 2625, 'epoch': 1}
{'type': 'loss', 'content': 0.09075640141963959, 'timestamp': '2025-10-02 00:15:19.645412', 'step': 2626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:19.701448', 'step': 2626, 'epoch': 1}
{'type': 'loss', 'content': 0.05554113909602165, 'timestamp': '2025-10-02 00:15:19.704525', 'step': 2627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:19.762631', 'step': 2627, 'epoch': 1}
{'type': 'loss', 'content': 0.08163316547870636, 'timestamp': '2025-10-02 00:15:19.770180', 'step': 2628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:19.824513', 'step': 2628, 'epoch': 1}
{'type': 'loss', 'content': 0.02639605849981308, 'timestamp': '2025-10-02 00:15:19.834795', 'step': 2629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:19.888683', 'step': 2629, 'epoch': 1}
{'type': 'loss', 'content': 0.18489591777324677, 'timestamp': '2025-10-02 00:15:19.891113', 'step': 2630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:19.945494', 'step': 2630, 'epoch': 1}
{'type': 'loss', 'content': 0.15716706216335297, 'timestamp': '2025-10-02 00:15:19.947475', 'step': 2631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:20.002187', 'step': 2631, 'epoch': 1}
{'type': 'loss', 'content': 0.14750497043132782, 'timestamp': '2025-10-02 00:15:20.008208', 'step': 2632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:20.062816', 'step': 2632, 'epoch': 1}
{'type': 'loss', 'content': 0.025050217285752296, 'timestamp': '2025-10-02 00:15:20.070166', 'step': 2633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:20.124778', 'step': 2633, 'epoch': 1}
{'type': 'loss', 'content': 0.2652361989021301, 'timestamp': '2025-10-02 00:15:20.128229', 'step': 2634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:20.182720', 'step': 2634, 'epoch': 1}
{'type': 'loss', 'content': 0.08548153936862946, 'timestamp': '2025-10-02 00:15:20.186261', 'step': 2635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:20.240104', 'step': 2635, 'epoch': 1}
{'type': 'loss', 'content': 0.06775043159723282, 'timestamp': '2025-10-02 00:15:20.245833', 'step': 2636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:20.299281', 'step': 2636, 'epoch': 1}
{'type': 'loss', 'content': 0.1545548439025879, 'timestamp': '2025-10-02 00:15:20.301591', 'step': 2637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:20.355313', 'step': 2637, 'epoch': 1}
{'type': 'loss', 'content': 0.22750243544578552, 'timestamp': '2025-10-02 00:15:20.358322', 'step': 2638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:20.412277', 'step': 2638, 'epoch': 1}
{'type': 'loss', 'content': 0.11979050934314728, 'timestamp': '2025-10-02 00:15:20.414523', 'step': 2639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:15:20.483902', 'step': 2639, 'epoch': 1}
{'type': 'loss', 'content': 0.058872148394584656, 'timestamp': '2025-10-02 00:15:20.497148', 'step': 2640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:20.550820', 'step': 2640, 'epoch': 1}
{'type': 'loss', 'content': 0.04895011708140373, 'timestamp': '2025-10-02 00:15:20.560288', 'step': 2641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:20.614082', 'step': 2641, 'epoch': 1}
{'type': 'loss', 'content': 0.04307621717453003, 'timestamp': '2025-10-02 00:15:20.619837', 'step': 2642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:20.674029', 'step': 2642, 'epoch': 1}
{'type': 'loss', 'content': 0.09469152241945267, 'timestamp': '2025-10-02 00:15:20.676294', 'step': 2643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:20.734583', 'step': 2643, 'epoch': 1}
{'type': 'loss', 'content': 0.047435909509658813, 'timestamp': '2025-10-02 00:15:20.745523', 'step': 2644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:20.799070', 'step': 2644, 'epoch': 1}
{'type': 'loss', 'content': 0.03662075847387314, 'timestamp': '2025-10-02 00:15:20.806365', 'step': 2645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:20.860545', 'step': 2645, 'epoch': 1}
{'type': 'loss', 'content': 0.04765130206942558, 'timestamp': '2025-10-02 00:15:20.867802', 'step': 2646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:20.923060', 'step': 2646, 'epoch': 1}
{'type': 'loss', 'content': 0.08466921001672745, 'timestamp': '2025-10-02 00:15:20.925683', 'step': 2647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:20.979627', 'step': 2647, 'epoch': 1}
{'type': 'loss', 'content': 0.03295021876692772, 'timestamp': '2025-10-02 00:15:20.985820', 'step': 2648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:21.039320', 'step': 2648, 'epoch': 1}
{'type': 'loss', 'content': 0.06496285647153854, 'timestamp': '2025-10-02 00:15:21.046634', 'step': 2649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:21.101627', 'step': 2649, 'epoch': 1}
{'type': 'loss', 'content': 0.1829976588487625, 'timestamp': '2025-10-02 00:15:21.104297', 'step': 2650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:21.159113', 'step': 2650, 'epoch': 1}
{'type': 'loss', 'content': 0.060000941157341, 'timestamp': '2025-10-02 00:15:21.161285', 'step': 2651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:21.214542', 'step': 2651, 'epoch': 1}
{'type': 'loss', 'content': 0.21706454455852509, 'timestamp': '2025-10-02 00:15:21.220423', 'step': 2652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:21.273893', 'step': 2652, 'epoch': 1}
{'type': 'loss', 'content': 0.06708785891532898, 'timestamp': '2025-10-02 00:15:21.276142', 'step': 2653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 00:15:21.364218', 'step': 2653, 'epoch': 1}
{'type': 'loss', 'content': 0.03722698986530304, 'timestamp': '2025-10-02 00:15:21.380643', 'step': 2654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:21.435011', 'step': 2654, 'epoch': 1}
{'type': 'loss', 'content': 0.038928866386413574, 'timestamp': '2025-10-02 00:15:21.437519', 'step': 2655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:15:21.499339', 'step': 2655, 'epoch': 1}
{'type': 'loss', 'content': 0.08653860539197922, 'timestamp': '2025-10-02 00:15:21.510937', 'step': 2656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:21.565754', 'step': 2656, 'epoch': 1}
{'type': 'loss', 'content': 0.07544420659542084, 'timestamp': '2025-10-02 00:15:21.575112', 'step': 2657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:21.629265', 'step': 2657, 'epoch': 1}
{'type': 'loss', 'content': 0.17274034023284912, 'timestamp': '2025-10-02 00:15:21.631817', 'step': 2658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:21.685898', 'step': 2658, 'epoch': 1}
{'type': 'loss', 'content': 0.1709306389093399, 'timestamp': '2025-10-02 00:15:21.688234', 'step': 2659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:21.741850', 'step': 2659, 'epoch': 1}
{'type': 'loss', 'content': 0.14056150615215302, 'timestamp': '2025-10-02 00:15:21.748403', 'step': 2660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:21.805476', 'step': 2660, 'epoch': 1}
{'type': 'loss', 'content': 0.055829260498285294, 'timestamp': '2025-10-02 00:15:21.816450', 'step': 2661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:21.870868', 'step': 2661, 'epoch': 1}
{'type': 'loss', 'content': 0.0179474838078022, 'timestamp': '2025-10-02 00:15:21.878246', 'step': 2662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:21.932504', 'step': 2662, 'epoch': 1}
{'type': 'loss', 'content': 0.14126834273338318, 'timestamp': '2025-10-02 00:15:21.934939', 'step': 2663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:21.989809', 'step': 2663, 'epoch': 1}
{'type': 'loss', 'content': 0.08247263729572296, 'timestamp': '2025-10-02 00:15:21.998535', 'step': 2664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:22.053864', 'step': 2664, 'epoch': 1}
{'type': 'loss', 'content': 0.22253955900669098, 'timestamp': '2025-10-02 00:15:22.056071', 'step': 2665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:22.110212', 'step': 2665, 'epoch': 1}
{'type': 'loss', 'content': 0.06814935058355331, 'timestamp': '2025-10-02 00:15:22.112744', 'step': 2666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 64], 'flops': 1280007837952.0}, 'timestamp': '2025-10-02 00:15:22.180903', 'step': 2666, 'epoch': 1}
{'type': 'loss', 'content': 0.16603945195674896, 'timestamp': '2025-10-02 00:15:22.183319', 'step': 2667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:22.239581', 'step': 2667, 'epoch': 1}
{'type': 'loss', 'content': 0.09190529584884644, 'timestamp': '2025-10-02 00:15:22.245486', 'step': 2668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:22.298360', 'step': 2668, 'epoch': 1}
{'type': 'loss', 'content': 0.15384672582149506, 'timestamp': '2025-10-02 00:15:22.301625', 'step': 2669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:22.363312', 'step': 2669, 'epoch': 1}
{'type': 'loss', 'content': 0.04465217888355255, 'timestamp': '2025-10-02 00:15:22.373800', 'step': 2670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:22.427672', 'step': 2670, 'epoch': 1}
{'type': 'loss', 'content': 0.07465412467718124, 'timestamp': '2025-10-02 00:15:22.430519', 'step': 2671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:22.484479', 'step': 2671, 'epoch': 1}
{'type': 'loss', 'content': 0.1281675398349762, 'timestamp': '2025-10-02 00:15:22.490532', 'step': 2672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:22.544493', 'step': 2672, 'epoch': 1}
{'type': 'loss', 'content': 0.06985577195882797, 'timestamp': '2025-10-02 00:15:22.551851', 'step': 2673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:22.605611', 'step': 2673, 'epoch': 1}
{'type': 'loss', 'content': 0.08143708854913712, 'timestamp': '2025-10-02 00:15:22.608056', 'step': 2674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:22.664106', 'step': 2674, 'epoch': 1}
{'type': 'loss', 'content': 0.06277691572904587, 'timestamp': '2025-10-02 00:15:22.671438', 'step': 2675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:22.725197', 'step': 2675, 'epoch': 1}
{'type': 'loss', 'content': 0.2268649786710739, 'timestamp': '2025-10-02 00:15:22.733393', 'step': 2676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:22.793386', 'step': 2676, 'epoch': 1}
{'type': 'loss', 'content': 0.10685133934020996, 'timestamp': '2025-10-02 00:15:22.804750', 'step': 2677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:22.859873', 'step': 2677, 'epoch': 1}
{'type': 'loss', 'content': 0.02928931638598442, 'timestamp': '2025-10-02 00:15:22.862394', 'step': 2678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:22.916958', 'step': 2678, 'epoch': 1}
{'type': 'loss', 'content': 0.015692580491304398, 'timestamp': '2025-10-02 00:15:22.922697', 'step': 2679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:22.977632', 'step': 2679, 'epoch': 1}
{'type': 'loss', 'content': 0.015793627128005028, 'timestamp': '2025-10-02 00:15:22.994683', 'step': 2680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:23.049769', 'step': 2680, 'epoch': 1}
{'type': 'loss', 'content': 0.12415317445993423, 'timestamp': '2025-10-02 00:15:23.052231', 'step': 2681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:23.106785', 'step': 2681, 'epoch': 1}
{'type': 'loss', 'content': 0.20585274696350098, 'timestamp': '2025-10-02 00:15:23.109184', 'step': 2682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:15:23.183613', 'step': 2682, 'epoch': 1}
{'type': 'loss', 'content': 0.021771328523755074, 'timestamp': '2025-10-02 00:15:23.197258', 'step': 2683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:23.251468', 'step': 2683, 'epoch': 1}
{'type': 'loss', 'content': 0.052735019475221634, 'timestamp': '2025-10-02 00:15:23.258118', 'step': 2684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:23.311511', 'step': 2684, 'epoch': 1}
{'type': 'loss', 'content': 0.10440859198570251, 'timestamp': '2025-10-02 00:15:23.313718', 'step': 2685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:23.367782', 'step': 2685, 'epoch': 1}
{'type': 'loss', 'content': 0.15237492322921753, 'timestamp': '2025-10-02 00:15:23.369958', 'step': 2686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:23.424102', 'step': 2686, 'epoch': 1}
{'type': 'loss', 'content': 0.09287649393081665, 'timestamp': '2025-10-02 00:15:23.426773', 'step': 2687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:15:23.482280', 'step': 2687, 'epoch': 1}
{'type': 'loss', 'content': 0.22248685359954834, 'timestamp': '2025-10-02 00:15:23.488324', 'step': 2688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:23.541353', 'step': 2688, 'epoch': 1}
{'type': 'loss', 'content': 0.120550237596035, 'timestamp': '2025-10-02 00:15:23.543842', 'step': 2689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:23.598313', 'step': 2689, 'epoch': 1}
{'type': 'loss', 'content': 0.0863451138138771, 'timestamp': '2025-10-02 00:15:23.600473', 'step': 2690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:23.654373', 'step': 2690, 'epoch': 1}
{'type': 'loss', 'content': 0.14004617929458618, 'timestamp': '2025-10-02 00:15:23.657051', 'step': 2691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:23.711211', 'step': 2691, 'epoch': 1}
{'type': 'loss', 'content': 0.13710780441761017, 'timestamp': '2025-10-02 00:15:23.717408', 'step': 2692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:23.774564', 'step': 2692, 'epoch': 1}
{'type': 'loss', 'content': 0.03168435022234917, 'timestamp': '2025-10-02 00:15:23.785325', 'step': 2693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:23.845110', 'step': 2693, 'epoch': 1}
{'type': 'loss', 'content': 0.19717052578926086, 'timestamp': '2025-10-02 00:15:23.849083', 'step': 2694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:23.909312', 'step': 2694, 'epoch': 1}
{'type': 'loss', 'content': 0.08870553970336914, 'timestamp': '2025-10-02 00:15:23.911879', 'step': 2695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:15:23.980737', 'step': 2695, 'epoch': 1}
{'type': 'loss', 'content': 0.06238899379968643, 'timestamp': '2025-10-02 00:15:23.993653', 'step': 2696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:24.053472', 'step': 2696, 'epoch': 1}
{'type': 'loss', 'content': 0.059235040098428726, 'timestamp': '2025-10-02 00:15:24.056017', 'step': 2697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:24.115451', 'step': 2697, 'epoch': 1}
{'type': 'loss', 'content': 0.17796021699905396, 'timestamp': '2025-10-02 00:15:24.118271', 'step': 2698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:24.178277', 'step': 2698, 'epoch': 1}
{'type': 'loss', 'content': 0.12381981313228607, 'timestamp': '2025-10-02 00:15:24.180816', 'step': 2699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:24.242740', 'step': 2699, 'epoch': 1}
{'type': 'loss', 'content': 0.07137313485145569, 'timestamp': '2025-10-02 00:15:24.253918', 'step': 2700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:24.310621', 'step': 2700, 'epoch': 1}
{'type': 'loss', 'content': 0.11174661666154861, 'timestamp': '2025-10-02 00:15:24.313496', 'step': 2701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:24.368339', 'step': 2701, 'epoch': 1}
{'type': 'loss', 'content': 0.1354360729455948, 'timestamp': '2025-10-02 00:15:24.377423', 'step': 2702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:24.432299', 'step': 2702, 'epoch': 1}
{'type': 'loss', 'content': 0.062142666429281235, 'timestamp': '2025-10-02 00:15:24.436673', 'step': 2703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:24.493377', 'step': 2703, 'epoch': 1}
{'type': 'loss', 'content': 0.18959194421768188, 'timestamp': '2025-10-02 00:15:24.499236', 'step': 2704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:24.553126', 'step': 2704, 'epoch': 1}
{'type': 'loss', 'content': 0.1378660947084427, 'timestamp': '2025-10-02 00:15:24.555424', 'step': 2705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:24.612291', 'step': 2705, 'epoch': 1}
{'type': 'loss', 'content': 0.05174136906862259, 'timestamp': '2025-10-02 00:15:24.619529', 'step': 2706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:15:24.686776', 'step': 2706, 'epoch': 1}
{'type': 'loss', 'content': 0.09127873927354813, 'timestamp': '2025-10-02 00:15:24.698718', 'step': 2707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:15:24.760806', 'step': 2707, 'epoch': 1}
{'type': 'loss', 'content': 0.03322447091341019, 'timestamp': '2025-10-02 00:15:24.772228', 'step': 2708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:24.827882', 'step': 2708, 'epoch': 1}
{'type': 'loss', 'content': 0.1212608590722084, 'timestamp': '2025-10-02 00:15:24.833466', 'step': 2709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:24.896348', 'step': 2709, 'epoch': 1}
{'type': 'loss', 'content': 0.0760960653424263, 'timestamp': '2025-10-02 00:15:24.898794', 'step': 2710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:24.952501', 'step': 2710, 'epoch': 1}
{'type': 'loss', 'content': 0.0862085223197937, 'timestamp': '2025-10-02 00:15:24.959613', 'step': 2711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:25.012989', 'step': 2711, 'epoch': 1}
{'type': 'loss', 'content': 0.09886559098958969, 'timestamp': '2025-10-02 00:15:25.019990', 'step': 2712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:25.075023', 'step': 2712, 'epoch': 1}
{'type': 'loss', 'content': 0.09889021515846252, 'timestamp': '2025-10-02 00:15:25.080550', 'step': 2713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:25.138434', 'step': 2713, 'epoch': 1}
{'type': 'loss', 'content': 0.213158518075943, 'timestamp': '2025-10-02 00:15:25.141759', 'step': 2714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:25.198909', 'step': 2714, 'epoch': 1}
{'type': 'loss', 'content': 0.09170723706483841, 'timestamp': '2025-10-02 00:15:25.205978', 'step': 2715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:15:25.273633', 'step': 2715, 'epoch': 1}
{'type': 'loss', 'content': 0.021607443690299988, 'timestamp': '2025-10-02 00:15:25.285070', 'step': 2716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:25.341658', 'step': 2716, 'epoch': 1}
{'type': 'loss', 'content': 0.11678429692983627, 'timestamp': '2025-10-02 00:15:25.348075', 'step': 2717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:25.406078', 'step': 2717, 'epoch': 1}
{'type': 'loss', 'content': 0.08257371187210083, 'timestamp': '2025-10-02 00:15:25.410044', 'step': 2718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:25.468460', 'step': 2718, 'epoch': 1}
{'type': 'loss', 'content': 0.10627452284097672, 'timestamp': '2025-10-02 00:15:25.470802', 'step': 2719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:25.525849', 'step': 2719, 'epoch': 1}
{'type': 'loss', 'content': 0.2505335509777069, 'timestamp': '2025-10-02 00:15:25.532355', 'step': 2720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:25.586389', 'step': 2720, 'epoch': 1}
{'type': 'loss', 'content': 0.11622630059719086, 'timestamp': '2025-10-02 00:15:25.593348', 'step': 2721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:25.649158', 'step': 2721, 'epoch': 1}
{'type': 'loss', 'content': 0.18968522548675537, 'timestamp': '2025-10-02 00:15:25.653263', 'step': 2722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:25.713775', 'step': 2722, 'epoch': 1}
{'type': 'loss', 'content': 0.027925152331590652, 'timestamp': '2025-10-02 00:15:25.723961', 'step': 2723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:25.782609', 'step': 2723, 'epoch': 1}
{'type': 'loss', 'content': 0.06014268472790718, 'timestamp': '2025-10-02 00:15:25.789909', 'step': 2724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:15:25.845607', 'step': 2724, 'epoch': 1}
{'type': 'loss', 'content': 0.12179384380578995, 'timestamp': '2025-10-02 00:15:25.848237', 'step': 2725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:25.908826', 'step': 2725, 'epoch': 1}
{'type': 'loss', 'content': 0.08609005063772202, 'timestamp': '2025-10-02 00:15:25.911767', 'step': 2726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:25.967584', 'step': 2726, 'epoch': 1}
{'type': 'loss', 'content': 0.06888947635889053, 'timestamp': '2025-10-02 00:15:25.970923', 'step': 2727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:26.027435', 'step': 2727, 'epoch': 1}
{'type': 'loss', 'content': 0.08846796303987503, 'timestamp': '2025-10-02 00:15:26.033897', 'step': 2728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:26.090362', 'step': 2728, 'epoch': 1}
{'type': 'loss', 'content': 0.1573830097913742, 'timestamp': '2025-10-02 00:15:26.093772', 'step': 2729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:26.150470', 'step': 2729, 'epoch': 1}
{'type': 'loss', 'content': 0.22689969837665558, 'timestamp': '2025-10-02 00:15:26.153585', 'step': 2730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:26.209777', 'step': 2730, 'epoch': 1}
{'type': 'loss', 'content': 0.19110283255577087, 'timestamp': '2025-10-02 00:15:26.213085', 'step': 2731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:26.269077', 'step': 2731, 'epoch': 1}
{'type': 'loss', 'content': 0.044282156974077225, 'timestamp': '2025-10-02 00:15:26.275621', 'step': 2732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:26.329410', 'step': 2732, 'epoch': 1}
{'type': 'loss', 'content': 0.0338779054582119, 'timestamp': '2025-10-02 00:15:26.336682', 'step': 2733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:26.391216', 'step': 2733, 'epoch': 1}
{'type': 'loss', 'content': 0.13341102004051208, 'timestamp': '2025-10-02 00:15:26.394118', 'step': 2734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:26.450517', 'step': 2734, 'epoch': 1}
{'type': 'loss', 'content': 0.0851946771144867, 'timestamp': '2025-10-02 00:15:26.452809', 'step': 2735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:26.507485', 'step': 2735, 'epoch': 1}
{'type': 'loss', 'content': 0.047647736966609955, 'timestamp': '2025-10-02 00:15:26.517613', 'step': 2736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:26.572604', 'step': 2736, 'epoch': 1}
{'type': 'loss', 'content': 0.1816377341747284, 'timestamp': '2025-10-02 00:15:26.575499', 'step': 2737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:26.632488', 'step': 2737, 'epoch': 1}
{'type': 'loss', 'content': 0.14782699942588806, 'timestamp': '2025-10-02 00:15:26.635051', 'step': 2738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:26.690709', 'step': 2738, 'epoch': 1}
{'type': 'loss', 'content': 0.14987924695014954, 'timestamp': '2025-10-02 00:15:26.693860', 'step': 2739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:15:26.765883', 'step': 2739, 'epoch': 1}
{'type': 'loss', 'content': 0.015529093332588673, 'timestamp': '2025-10-02 00:15:26.778998', 'step': 2740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:26.837078', 'step': 2740, 'epoch': 1}
{'type': 'loss', 'content': 0.07527583837509155, 'timestamp': '2025-10-02 00:15:26.842698', 'step': 2741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:26.897897', 'step': 2741, 'epoch': 1}
{'type': 'loss', 'content': 0.1719653308391571, 'timestamp': '2025-10-02 00:15:26.902744', 'step': 2742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:26.959469', 'step': 2742, 'epoch': 1}
{'type': 'loss', 'content': 0.05192933976650238, 'timestamp': '2025-10-02 00:15:26.962405', 'step': 2743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:27.019365', 'step': 2743, 'epoch': 1}
{'type': 'loss', 'content': 0.02462933585047722, 'timestamp': '2025-10-02 00:15:27.028855', 'step': 2744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:27.084732', 'step': 2744, 'epoch': 1}
{'type': 'loss', 'content': 0.07974962145090103, 'timestamp': '2025-10-02 00:15:27.087385', 'step': 2745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:27.142505', 'step': 2745, 'epoch': 1}
{'type': 'loss', 'content': 0.1708991825580597, 'timestamp': '2025-10-02 00:15:27.145853', 'step': 2746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:27.203681', 'step': 2746, 'epoch': 1}
{'type': 'loss', 'content': 0.1144411712884903, 'timestamp': '2025-10-02 00:15:27.205842', 'step': 2747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:27.262684', 'step': 2747, 'epoch': 1}
{'type': 'loss', 'content': 0.1377624273300171, 'timestamp': '2025-10-02 00:15:27.271116', 'step': 2748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:27.327442', 'step': 2748, 'epoch': 1}
{'type': 'loss', 'content': 0.04042603075504303, 'timestamp': '2025-10-02 00:15:27.329666', 'step': 2749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:27.385550', 'step': 2749, 'epoch': 1}
{'type': 'loss', 'content': 0.18909679353237152, 'timestamp': '2025-10-02 00:15:27.390682', 'step': 2750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:27.447274', 'step': 2750, 'epoch': 1}
{'type': 'loss', 'content': 0.08492130041122437, 'timestamp': '2025-10-02 00:15:27.452676', 'step': 2751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:27.507856', 'step': 2751, 'epoch': 1}
{'type': 'loss', 'content': 0.06809122115373611, 'timestamp': '2025-10-02 00:15:27.514866', 'step': 2752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:27.569768', 'step': 2752, 'epoch': 1}
{'type': 'loss', 'content': 0.1179286539554596, 'timestamp': '2025-10-02 00:15:27.575539', 'step': 2753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:15:27.638270', 'step': 2753, 'epoch': 1}
{'type': 'loss', 'content': 0.024046165868639946, 'timestamp': '2025-10-02 00:15:27.648907', 'step': 2754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:27.709868', 'step': 2754, 'epoch': 1}
{'type': 'loss', 'content': 0.10064958781003952, 'timestamp': '2025-10-02 00:15:27.720300', 'step': 2755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:27.776671', 'step': 2755, 'epoch': 1}
{'type': 'loss', 'content': 0.07139059156179428, 'timestamp': '2025-10-02 00:15:27.786959', 'step': 2756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:27.849256', 'step': 2756, 'epoch': 1}
{'type': 'loss', 'content': 0.06307795643806458, 'timestamp': '2025-10-02 00:15:27.860548', 'step': 2757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:27.915774', 'step': 2757, 'epoch': 1}
{'type': 'loss', 'content': 0.11364791542291641, 'timestamp': '2025-10-02 00:15:27.918542', 'step': 2758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:15:27.973807', 'step': 2758, 'epoch': 1}
{'type': 'loss', 'content': 0.25317129492759705, 'timestamp': '2025-10-02 00:15:27.976201', 'step': 2759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:28.030094', 'step': 2759, 'epoch': 1}
{'type': 'loss', 'content': 0.12204482406377792, 'timestamp': '2025-10-02 00:15:28.037620', 'step': 2760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:28.091844', 'step': 2760, 'epoch': 1}
{'type': 'loss', 'content': 0.10419665277004242, 'timestamp': '2025-10-02 00:15:28.097517', 'step': 2761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:28.154610', 'step': 2761, 'epoch': 1}
{'type': 'loss', 'content': 0.13869133591651917, 'timestamp': '2025-10-02 00:15:28.164179', 'step': 2762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:28.235649', 'step': 2762, 'epoch': 1}
{'type': 'loss', 'content': 0.045510586351156235, 'timestamp': '2025-10-02 00:15:28.248205', 'step': 2763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:28.310265', 'step': 2763, 'epoch': 1}
{'type': 'loss', 'content': 0.04341378062963486, 'timestamp': '2025-10-02 00:15:28.318415', 'step': 2764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:28.386758', 'step': 2764, 'epoch': 1}
{'type': 'loss', 'content': 0.032424986362457275, 'timestamp': '2025-10-02 00:15:28.407432', 'step': 2765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:28.481679', 'step': 2765, 'epoch': 1}
{'type': 'loss', 'content': 0.06741531193256378, 'timestamp': '2025-10-02 00:15:28.487466', 'step': 2766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:28.549210', 'step': 2766, 'epoch': 1}
{'type': 'loss', 'content': 0.059439994394779205, 'timestamp': '2025-10-02 00:15:28.555877', 'step': 2767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:28.612571', 'step': 2767, 'epoch': 1}
{'type': 'loss', 'content': 0.08237045258283615, 'timestamp': '2025-10-02 00:15:28.619341', 'step': 2768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:28.681013', 'step': 2768, 'epoch': 1}
{'type': 'loss', 'content': 0.1240631639957428, 'timestamp': '2025-10-02 00:15:28.688445', 'step': 2769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:15:28.775753', 'step': 2769, 'epoch': 1}
{'type': 'loss', 'content': 0.00728088803589344, 'timestamp': '2025-10-02 00:15:28.790665', 'step': 2770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:28.848778', 'step': 2770, 'epoch': 1}
{'type': 'loss', 'content': 0.01839323714375496, 'timestamp': '2025-10-02 00:15:28.858395', 'step': 2771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:15:28.920285', 'step': 2771, 'epoch': 1}
{'type': 'loss', 'content': 0.09187054634094238, 'timestamp': '2025-10-02 00:15:28.937942', 'step': 2772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:29.015186', 'step': 2772, 'epoch': 1}
{'type': 'loss', 'content': 0.09069815278053284, 'timestamp': '2025-10-02 00:15:29.018997', 'step': 2773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:29.085239', 'step': 2773, 'epoch': 1}
{'type': 'loss', 'content': 0.054671742022037506, 'timestamp': '2025-10-02 00:15:29.091112', 'step': 2774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:29.146160', 'step': 2774, 'epoch': 1}
{'type': 'loss', 'content': 0.10608532279729843, 'timestamp': '2025-10-02 00:15:29.155506', 'step': 2775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:29.210087', 'step': 2775, 'epoch': 1}
{'type': 'loss', 'content': 0.09168612957000732, 'timestamp': '2025-10-02 00:15:29.216479', 'step': 2776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:29.270224', 'step': 2776, 'epoch': 1}
{'type': 'loss', 'content': 0.08817467838525772, 'timestamp': '2025-10-02 00:15:29.272530', 'step': 2777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:29.326759', 'step': 2777, 'epoch': 1}
{'type': 'loss', 'content': 0.23244218528270721, 'timestamp': '2025-10-02 00:15:29.329129', 'step': 2778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:15:29.398355', 'step': 2778, 'epoch': 1}
{'type': 'loss', 'content': 0.021183334290981293, 'timestamp': '2025-10-02 00:15:29.410681', 'step': 2779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:29.465515', 'step': 2779, 'epoch': 1}
{'type': 'loss', 'content': 0.14829741418361664, 'timestamp': '2025-10-02 00:15:29.471537', 'step': 2780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:29.524217', 'step': 2780, 'epoch': 1}
{'type': 'loss', 'content': 0.07204744964838028, 'timestamp': '2025-10-02 00:15:29.530183', 'step': 2781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:29.585909', 'step': 2781, 'epoch': 1}
{'type': 'loss', 'content': 0.022680625319480896, 'timestamp': '2025-10-02 00:15:29.591772', 'step': 2782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:29.645481', 'step': 2782, 'epoch': 1}
{'type': 'loss', 'content': 0.11684751510620117, 'timestamp': '2025-10-02 00:15:29.647855', 'step': 2783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:29.702543', 'step': 2783, 'epoch': 1}
{'type': 'loss', 'content': 0.1629418581724167, 'timestamp': '2025-10-02 00:15:29.709268', 'step': 2784, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:15:56.191124', 'step': 2784, 'epoch': 1}
{'type': 'pplx', 'content': 91.33360960516808, 'timestamp': '2025-10-02 00:15:56.194932', 'step': 2784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:56.250879', 'step': 2784, 'epoch': 1}
{'type': 'loss', 'content': 0.06947892159223557, 'timestamp': '2025-10-02 00:15:56.254920', 'step': 2785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:56.311394', 'step': 2785, 'epoch': 1}
{'type': 'loss', 'content': 0.04259856045246124, 'timestamp': '2025-10-02 00:15:56.317719', 'step': 2786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:56.373285', 'step': 2786, 'epoch': 1}
{'type': 'loss', 'content': 0.10281717777252197, 'timestamp': '2025-10-02 00:15:56.376662', 'step': 2787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:56.430741', 'step': 2787, 'epoch': 1}
{'type': 'loss', 'content': 0.03593897074460983, 'timestamp': '2025-10-02 00:15:56.437167', 'step': 2788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:56.494535', 'step': 2788, 'epoch': 1}
{'type': 'loss', 'content': 0.05765954405069351, 'timestamp': '2025-10-02 00:15:56.505538', 'step': 2789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:56.560116', 'step': 2789, 'epoch': 1}
{'type': 'loss', 'content': 0.016362067312002182, 'timestamp': '2025-10-02 00:15:56.567559', 'step': 2790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:56.623829', 'step': 2790, 'epoch': 1}
{'type': 'loss', 'content': 0.2677896022796631, 'timestamp': '2025-10-02 00:15:56.625979', 'step': 2791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:15:56.679234', 'step': 2791, 'epoch': 1}
{'type': 'loss', 'content': 0.17960095405578613, 'timestamp': '2025-10-02 00:15:56.685556', 'step': 2792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:15:56.754349', 'step': 2792, 'epoch': 1}
{'type': 'loss', 'content': 0.06446593254804611, 'timestamp': '2025-10-02 00:15:56.768138', 'step': 2793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:56.824156', 'step': 2793, 'epoch': 1}
{'type': 'loss', 'content': 0.05067722126841545, 'timestamp': '2025-10-02 00:15:56.826489', 'step': 2794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:56.881632', 'step': 2794, 'epoch': 1}
{'type': 'loss', 'content': 0.16025958955287933, 'timestamp': '2025-10-02 00:15:56.884323', 'step': 2795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:56.938125', 'step': 2795, 'epoch': 1}
{'type': 'loss', 'content': 0.13900478184223175, 'timestamp': '2025-10-02 00:15:56.943945', 'step': 2796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:15:57.005739', 'step': 2796, 'epoch': 1}
{'type': 'loss', 'content': 0.047833435237407684, 'timestamp': '2025-10-02 00:15:57.017269', 'step': 2797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:15:57.071640', 'step': 2797, 'epoch': 1}
{'type': 'loss', 'content': 0.10155113786458969, 'timestamp': '2025-10-02 00:15:57.074136', 'step': 2798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:57.129081', 'step': 2798, 'epoch': 1}
{'type': 'loss', 'content': 0.03588242828845978, 'timestamp': '2025-10-02 00:15:57.134582', 'step': 2799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:57.188940', 'step': 2799, 'epoch': 1}
{'type': 'loss', 'content': 0.12522275745868683, 'timestamp': '2025-10-02 00:15:57.195299', 'step': 2800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:57.249362', 'step': 2800, 'epoch': 1}
{'type': 'loss', 'content': 0.19365127384662628, 'timestamp': '2025-10-02 00:15:57.251710', 'step': 2801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:57.305853', 'step': 2801, 'epoch': 1}
{'type': 'loss', 'content': 0.08676441013813019, 'timestamp': '2025-10-02 00:15:57.308332', 'step': 2802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:57.363676', 'step': 2802, 'epoch': 1}
{'type': 'loss', 'content': 0.05952703580260277, 'timestamp': '2025-10-02 00:15:57.366430', 'step': 2803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:57.421529', 'step': 2803, 'epoch': 1}
{'type': 'loss', 'content': 0.051345065236091614, 'timestamp': '2025-10-02 00:15:57.427976', 'step': 2804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:15:57.484120', 'step': 2804, 'epoch': 1}
{'type': 'loss', 'content': 0.019824206829071045, 'timestamp': '2025-10-02 00:15:57.494396', 'step': 2805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:57.549496', 'step': 2805, 'epoch': 1}
{'type': 'loss', 'content': 0.0881071463227272, 'timestamp': '2025-10-02 00:15:57.556943', 'step': 2806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:57.612094', 'step': 2806, 'epoch': 1}
{'type': 'loss', 'content': 0.07792294770479202, 'timestamp': '2025-10-02 00:15:57.617818', 'step': 2807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:57.672764', 'step': 2807, 'epoch': 1}
{'type': 'loss', 'content': 0.16891542077064514, 'timestamp': '2025-10-02 00:15:57.678728', 'step': 2808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:57.732763', 'step': 2808, 'epoch': 1}
{'type': 'loss', 'content': 0.11866893619298935, 'timestamp': '2025-10-02 00:15:57.735391', 'step': 2809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:57.790309', 'step': 2809, 'epoch': 1}
{'type': 'loss', 'content': 0.16902890801429749, 'timestamp': '2025-10-02 00:15:57.792613', 'step': 2810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:15:57.854358', 'step': 2810, 'epoch': 1}
{'type': 'loss', 'content': 0.03250139206647873, 'timestamp': '2025-10-02 00:15:57.864806', 'step': 2811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:57.919699', 'step': 2811, 'epoch': 1}
{'type': 'loss', 'content': 0.0875614732503891, 'timestamp': '2025-10-02 00:15:57.925757', 'step': 2812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:57.979659', 'step': 2812, 'epoch': 1}
{'type': 'loss', 'content': 0.1039707362651825, 'timestamp': '2025-10-02 00:15:57.982046', 'step': 2813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:58.036115', 'step': 2813, 'epoch': 1}
{'type': 'loss', 'content': 0.10608773678541183, 'timestamp': '2025-10-02 00:15:58.038484', 'step': 2814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:58.092499', 'step': 2814, 'epoch': 1}
{'type': 'loss', 'content': 0.08234129846096039, 'timestamp': '2025-10-02 00:15:58.094794', 'step': 2815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:15:58.149241', 'step': 2815, 'epoch': 1}
{'type': 'loss', 'content': 0.1990993320941925, 'timestamp': '2025-10-02 00:15:58.155039', 'step': 2816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:58.209434', 'step': 2816, 'epoch': 1}
{'type': 'loss', 'content': 0.11174766719341278, 'timestamp': '2025-10-02 00:15:58.211664', 'step': 2817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:58.266295', 'step': 2817, 'epoch': 1}
{'type': 'loss', 'content': 0.035290781408548355, 'timestamp': '2025-10-02 00:15:58.268792', 'step': 2818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:58.323225', 'step': 2818, 'epoch': 1}
{'type': 'loss', 'content': 0.07727332413196564, 'timestamp': '2025-10-02 00:15:58.329025', 'step': 2819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:58.384762', 'step': 2819, 'epoch': 1}
{'type': 'loss', 'content': 0.11040320247411728, 'timestamp': '2025-10-02 00:15:58.390589', 'step': 2820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:58.449772', 'step': 2820, 'epoch': 1}
{'type': 'loss', 'content': 0.054213471710681915, 'timestamp': '2025-10-02 00:15:58.460700', 'step': 2821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:58.515327', 'step': 2821, 'epoch': 1}
{'type': 'loss', 'content': 0.05958397686481476, 'timestamp': '2025-10-02 00:15:58.517330', 'step': 2822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:15:58.576554', 'step': 2822, 'epoch': 1}
{'type': 'loss', 'content': 0.0693998858332634, 'timestamp': '2025-10-02 00:15:58.586735', 'step': 2823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:15:58.661131', 'step': 2823, 'epoch': 1}
{'type': 'loss', 'content': 0.013818571344017982, 'timestamp': '2025-10-02 00:15:58.675081', 'step': 2824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:58.728832', 'step': 2824, 'epoch': 1}
{'type': 'loss', 'content': 0.10303793847560883, 'timestamp': '2025-10-02 00:15:58.731316', 'step': 2825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:58.786254', 'step': 2825, 'epoch': 1}
{'type': 'loss', 'content': 0.08162432909011841, 'timestamp': '2025-10-02 00:15:58.788584', 'step': 2826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:58.843586', 'step': 2826, 'epoch': 1}
{'type': 'loss', 'content': 0.09503024071455002, 'timestamp': '2025-10-02 00:15:58.852945', 'step': 2827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:58.907639', 'step': 2827, 'epoch': 1}
{'type': 'loss', 'content': 0.07883401960134506, 'timestamp': '2025-10-02 00:15:58.917781', 'step': 2828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:15:58.972482', 'step': 2828, 'epoch': 1}
{'type': 'loss', 'content': 0.10943044722080231, 'timestamp': '2025-10-02 00:15:58.981959', 'step': 2829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:59.037874', 'step': 2829, 'epoch': 1}
{'type': 'loss', 'content': 0.20392899215221405, 'timestamp': '2025-10-02 00:15:59.040404', 'step': 2830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:15:59.095241', 'step': 2830, 'epoch': 1}
{'type': 'loss', 'content': 0.059098728001117706, 'timestamp': '2025-10-02 00:15:59.097764', 'step': 2831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:15:59.152500', 'step': 2831, 'epoch': 1}
{'type': 'loss', 'content': 0.07048499584197998, 'timestamp': '2025-10-02 00:15:59.159029', 'step': 2832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:59.213044', 'step': 2832, 'epoch': 1}
{'type': 'loss', 'content': 0.13137322664260864, 'timestamp': '2025-10-02 00:15:59.215283', 'step': 2833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:15:59.269687', 'step': 2833, 'epoch': 1}
{'type': 'loss', 'content': 0.0819256603717804, 'timestamp': '2025-10-02 00:15:59.277033', 'step': 2834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:15:59.332812', 'step': 2834, 'epoch': 1}
{'type': 'loss', 'content': 0.1304958164691925, 'timestamp': '2025-10-02 00:15:59.335231', 'step': 2835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:15:59.390216', 'step': 2835, 'epoch': 1}
{'type': 'loss', 'content': 0.18673905730247498, 'timestamp': '2025-10-02 00:15:59.396323', 'step': 2836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:59.451085', 'step': 2836, 'epoch': 1}
{'type': 'loss', 'content': 0.18387152254581451, 'timestamp': '2025-10-02 00:15:59.453190', 'step': 2837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:59.508271', 'step': 2837, 'epoch': 1}
{'type': 'loss', 'content': 0.08045554906129837, 'timestamp': '2025-10-02 00:15:59.510688', 'step': 2838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:15:59.565268', 'step': 2838, 'epoch': 1}
{'type': 'loss', 'content': 0.1809832602739334, 'timestamp': '2025-10-02 00:15:59.567354', 'step': 2839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:59.622227', 'step': 2839, 'epoch': 1}
{'type': 'loss', 'content': 0.0637044683098793, 'timestamp': '2025-10-02 00:15:59.628203', 'step': 2840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:15:59.682993', 'step': 2840, 'epoch': 1}
{'type': 'loss', 'content': 0.08839460462331772, 'timestamp': '2025-10-02 00:15:59.685502', 'step': 2841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:15:59.739761', 'step': 2841, 'epoch': 1}
{'type': 'loss', 'content': 0.1744900643825531, 'timestamp': '2025-10-02 00:15:59.742164', 'step': 2842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:59.796626', 'step': 2842, 'epoch': 1}
{'type': 'loss', 'content': 0.08277218043804169, 'timestamp': '2025-10-02 00:15:59.799899', 'step': 2843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:15:59.853868', 'step': 2843, 'epoch': 1}
{'type': 'loss', 'content': 0.08128079026937485, 'timestamp': '2025-10-02 00:15:59.859605', 'step': 2844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:59.913886', 'step': 2844, 'epoch': 1}
{'type': 'loss', 'content': 0.07093874365091324, 'timestamp': '2025-10-02 00:15:59.916658', 'step': 2845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:15:59.972323', 'step': 2845, 'epoch': 1}
{'type': 'loss', 'content': 0.0550607405602932, 'timestamp': '2025-10-02 00:15:59.975788', 'step': 2846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:00.030434', 'step': 2846, 'epoch': 1}
{'type': 'loss', 'content': 0.10667997598648071, 'timestamp': '2025-10-02 00:16:00.035913', 'step': 2847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:00.091502', 'step': 2847, 'epoch': 1}
{'type': 'loss', 'content': 0.06046079844236374, 'timestamp': '2025-10-02 00:16:00.097754', 'step': 2848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:00.158638', 'step': 2848, 'epoch': 1}
{'type': 'loss', 'content': 0.017884278669953346, 'timestamp': '2025-10-02 00:16:00.170004', 'step': 2849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:00.225308', 'step': 2849, 'epoch': 1}
{'type': 'loss', 'content': 0.16391272842884064, 'timestamp': '2025-10-02 00:16:00.227878', 'step': 2850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:16:00.296299', 'step': 2850, 'epoch': 1}
{'type': 'loss', 'content': 0.035171955823898315, 'timestamp': '2025-10-02 00:16:00.308274', 'step': 2851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:00.363517', 'step': 2851, 'epoch': 1}
{'type': 'loss', 'content': 0.10865068435668945, 'timestamp': '2025-10-02 00:16:00.373304', 'step': 2852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:00.427055', 'step': 2852, 'epoch': 1}
{'type': 'loss', 'content': 0.16346809267997742, 'timestamp': '2025-10-02 00:16:00.429465', 'step': 2853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:00.490106', 'step': 2853, 'epoch': 1}
{'type': 'loss', 'content': 0.050424810498952866, 'timestamp': '2025-10-02 00:16:00.495139', 'step': 2854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:00.550513', 'step': 2854, 'epoch': 1}
{'type': 'loss', 'content': 0.03470393270254135, 'timestamp': '2025-10-02 00:16:00.559762', 'step': 2855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:00.616395', 'step': 2855, 'epoch': 1}
{'type': 'loss', 'content': 0.06487123668193817, 'timestamp': '2025-10-02 00:16:00.622331', 'step': 2856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:00.676368', 'step': 2856, 'epoch': 1}
{'type': 'loss', 'content': 0.2439216524362564, 'timestamp': '2025-10-02 00:16:00.678417', 'step': 2857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:00.732840', 'step': 2857, 'epoch': 1}
{'type': 'loss', 'content': 0.02158770151436329, 'timestamp': '2025-10-02 00:16:00.735045', 'step': 2858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:00.790330', 'step': 2858, 'epoch': 1}
{'type': 'loss', 'content': 0.055068228393793106, 'timestamp': '2025-10-02 00:16:00.797176', 'step': 2859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:00.852509', 'step': 2859, 'epoch': 1}
{'type': 'loss', 'content': 0.14355573058128357, 'timestamp': '2025-10-02 00:16:00.858435', 'step': 2860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:00.913486', 'step': 2860, 'epoch': 1}
{'type': 'loss', 'content': 0.07027330249547958, 'timestamp': '2025-10-02 00:16:00.915930', 'step': 2861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:00.971437', 'step': 2861, 'epoch': 1}
{'type': 'loss', 'content': 0.05371113494038582, 'timestamp': '2025-10-02 00:16:00.973913', 'step': 2862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:01.031282', 'step': 2862, 'epoch': 1}
{'type': 'loss', 'content': 0.07604657858610153, 'timestamp': '2025-10-02 00:16:01.033792', 'step': 2863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:01.089185', 'step': 2863, 'epoch': 1}
{'type': 'loss', 'content': 0.168395534157753, 'timestamp': '2025-10-02 00:16:01.095362', 'step': 2864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:01.150511', 'step': 2864, 'epoch': 1}
{'type': 'loss', 'content': 0.057905856519937515, 'timestamp': '2025-10-02 00:16:01.159705', 'step': 2865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:01.221827', 'step': 2865, 'epoch': 1}
{'type': 'loss', 'content': 0.05019165575504303, 'timestamp': '2025-10-02 00:16:01.232595', 'step': 2866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:01.287545', 'step': 2866, 'epoch': 1}
{'type': 'loss', 'content': 0.05038860812783241, 'timestamp': '2025-10-02 00:16:01.296584', 'step': 2867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:01.351748', 'step': 2867, 'epoch': 1}
{'type': 'loss', 'content': 0.16888566315174103, 'timestamp': '2025-10-02 00:16:01.357470', 'step': 2868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:01.410487', 'step': 2868, 'epoch': 1}
{'type': 'loss', 'content': 0.17623800039291382, 'timestamp': '2025-10-02 00:16:01.412975', 'step': 2869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:01.468513', 'step': 2869, 'epoch': 1}
{'type': 'loss', 'content': 0.09040851145982742, 'timestamp': '2025-10-02 00:16:01.474256', 'step': 2870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:01.528767', 'step': 2870, 'epoch': 1}
{'type': 'loss', 'content': 0.05340906232595444, 'timestamp': '2025-10-02 00:16:01.532501', 'step': 2871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:01.587387', 'step': 2871, 'epoch': 1}
{'type': 'loss', 'content': 0.09759694337844849, 'timestamp': '2025-10-02 00:16:01.593388', 'step': 2872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:01.647506', 'step': 2872, 'epoch': 1}
{'type': 'loss', 'content': 0.10233912616968155, 'timestamp': '2025-10-02 00:16:01.649893', 'step': 2873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:01.705532', 'step': 2873, 'epoch': 1}
{'type': 'loss', 'content': 0.05886884406208992, 'timestamp': '2025-10-02 00:16:01.708023', 'step': 2874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:01.762989', 'step': 2874, 'epoch': 1}
{'type': 'loss', 'content': 0.05545181408524513, 'timestamp': '2025-10-02 00:16:01.765572', 'step': 2875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:01.824450', 'step': 2875, 'epoch': 1}
{'type': 'loss', 'content': 0.06803484261035919, 'timestamp': '2025-10-02 00:16:01.835407', 'step': 2876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:01.889432', 'step': 2876, 'epoch': 1}
{'type': 'loss', 'content': 0.1230897307395935, 'timestamp': '2025-10-02 00:16:01.891714', 'step': 2877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:01.946227', 'step': 2877, 'epoch': 1}
{'type': 'loss', 'content': 0.1744983196258545, 'timestamp': '2025-10-02 00:16:01.948466', 'step': 2878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:02.002788', 'step': 2878, 'epoch': 1}
{'type': 'loss', 'content': 0.04594198986887932, 'timestamp': '2025-10-02 00:16:02.011824', 'step': 2879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:02.065543', 'step': 2879, 'epoch': 1}
{'type': 'loss', 'content': 0.06071048602461815, 'timestamp': '2025-10-02 00:16:02.071587', 'step': 2880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:02.126478', 'step': 2880, 'epoch': 1}
{'type': 'loss', 'content': 0.027340522035956383, 'timestamp': '2025-10-02 00:16:02.132301', 'step': 2881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:02.187085', 'step': 2881, 'epoch': 1}
{'type': 'loss', 'content': 0.013563213869929314, 'timestamp': '2025-10-02 00:16:02.192889', 'step': 2882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:02.248860', 'step': 2882, 'epoch': 1}
{'type': 'loss', 'content': 0.06474345177412033, 'timestamp': '2025-10-02 00:16:02.254432', 'step': 2883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:02.309543', 'step': 2883, 'epoch': 1}
{'type': 'loss', 'content': 0.05793329328298569, 'timestamp': '2025-10-02 00:16:02.315748', 'step': 2884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:02.370746', 'step': 2884, 'epoch': 1}
{'type': 'loss', 'content': 0.06843698769807816, 'timestamp': '2025-10-02 00:16:02.373093', 'step': 2885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:02.426946', 'step': 2885, 'epoch': 1}
{'type': 'loss', 'content': 0.09041167050600052, 'timestamp': '2025-10-02 00:16:02.429369', 'step': 2886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:02.484038', 'step': 2886, 'epoch': 1}
{'type': 'loss', 'content': 0.06437403708696365, 'timestamp': '2025-10-02 00:16:02.486894', 'step': 2887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:02.541683', 'step': 2887, 'epoch': 1}
{'type': 'loss', 'content': 0.07308300584554672, 'timestamp': '2025-10-02 00:16:02.547659', 'step': 2888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:02.602308', 'step': 2888, 'epoch': 1}
{'type': 'loss', 'content': 0.043997786939144135, 'timestamp': '2025-10-02 00:16:02.608273', 'step': 2889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:02.661993', 'step': 2889, 'epoch': 1}
{'type': 'loss', 'content': 0.1091606393456459, 'timestamp': '2025-10-02 00:16:02.664516', 'step': 2890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:02.718159', 'step': 2890, 'epoch': 1}
{'type': 'loss', 'content': 0.18239179253578186, 'timestamp': '2025-10-02 00:16:02.720503', 'step': 2891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:02.775305', 'step': 2891, 'epoch': 1}
{'type': 'loss', 'content': 0.03828670084476471, 'timestamp': '2025-10-02 00:16:02.785443', 'step': 2892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:02.839866', 'step': 2892, 'epoch': 1}
{'type': 'loss', 'content': 0.15442144870758057, 'timestamp': '2025-10-02 00:16:02.842489', 'step': 2893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:02.897419', 'step': 2893, 'epoch': 1}
{'type': 'loss', 'content': 0.06492169946432114, 'timestamp': '2025-10-02 00:16:02.899294', 'step': 2894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:02.953016', 'step': 2894, 'epoch': 1}
{'type': 'loss', 'content': 0.03177913650870323, 'timestamp': '2025-10-02 00:16:02.955510', 'step': 2895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:03.010779', 'step': 2895, 'epoch': 1}
{'type': 'loss', 'content': 0.07650194317102432, 'timestamp': '2025-10-02 00:16:03.017437', 'step': 2896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:03.072264', 'step': 2896, 'epoch': 1}
{'type': 'loss', 'content': 0.05578944832086563, 'timestamp': '2025-10-02 00:16:03.081780', 'step': 2897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:03.137392', 'step': 2897, 'epoch': 1}
{'type': 'loss', 'content': 0.016710398718714714, 'timestamp': '2025-10-02 00:16:03.140759', 'step': 2898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:03.197517', 'step': 2898, 'epoch': 1}
{'type': 'loss', 'content': 0.04365245997905731, 'timestamp': '2025-10-02 00:16:03.206466', 'step': 2899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:03.266078', 'step': 2899, 'epoch': 1}
{'type': 'loss', 'content': 0.07233259081840515, 'timestamp': '2025-10-02 00:16:03.277085', 'step': 2900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:03.334620', 'step': 2900, 'epoch': 1}
{'type': 'loss', 'content': 0.10894448310136795, 'timestamp': '2025-10-02 00:16:03.337546', 'step': 2901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:03.394265', 'step': 2901, 'epoch': 1}
{'type': 'loss', 'content': 0.13177567720413208, 'timestamp': '2025-10-02 00:16:03.397452', 'step': 2902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:16:03.470055', 'step': 2902, 'epoch': 1}
{'type': 'loss', 'content': 0.070425845682621, 'timestamp': '2025-10-02 00:16:03.482370', 'step': 2903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:03.539335', 'step': 2903, 'epoch': 1}
{'type': 'loss', 'content': 0.035275109112262726, 'timestamp': '2025-10-02 00:16:03.545550', 'step': 2904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:03.601201', 'step': 2904, 'epoch': 1}
{'type': 'loss', 'content': 0.06117641180753708, 'timestamp': '2025-10-02 00:16:03.607708', 'step': 2905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:03.666329', 'step': 2905, 'epoch': 1}
{'type': 'loss', 'content': 0.0532769076526165, 'timestamp': '2025-10-02 00:16:03.669280', 'step': 2906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:03.725984', 'step': 2906, 'epoch': 1}
{'type': 'loss', 'content': 0.158453568816185, 'timestamp': '2025-10-02 00:16:03.728461', 'step': 2907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:03.783664', 'step': 2907, 'epoch': 1}
{'type': 'loss', 'content': 0.115597203373909, 'timestamp': '2025-10-02 00:16:03.791659', 'step': 2908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:03.846351', 'step': 2908, 'epoch': 1}
{'type': 'loss', 'content': 0.06543895602226257, 'timestamp': '2025-10-02 00:16:03.849120', 'step': 2909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:03.904221', 'step': 2909, 'epoch': 1}
{'type': 'loss', 'content': 0.13150936365127563, 'timestamp': '2025-10-02 00:16:03.906521', 'step': 2910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:03.961318', 'step': 2910, 'epoch': 1}
{'type': 'loss', 'content': 0.070986308157444, 'timestamp': '2025-10-02 00:16:03.963870', 'step': 2911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:04.019342', 'step': 2911, 'epoch': 1}
{'type': 'loss', 'content': 0.03911760821938515, 'timestamp': '2025-10-02 00:16:04.027556', 'step': 2912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:04.081794', 'step': 2912, 'epoch': 1}
{'type': 'loss', 'content': 0.21708008646965027, 'timestamp': '2025-10-02 00:16:04.084786', 'step': 2913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:04.140937', 'step': 2913, 'epoch': 1}
{'type': 'loss', 'content': 0.12981770932674408, 'timestamp': '2025-10-02 00:16:04.144343', 'step': 2914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:04.202128', 'step': 2914, 'epoch': 1}
{'type': 'loss', 'content': 0.09217037260532379, 'timestamp': '2025-10-02 00:16:04.209610', 'step': 2915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:04.265792', 'step': 2915, 'epoch': 1}
{'type': 'loss', 'content': 0.14562439918518066, 'timestamp': '2025-10-02 00:16:04.272493', 'step': 2916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:04.327408', 'step': 2916, 'epoch': 1}
{'type': 'loss', 'content': 0.21270786225795746, 'timestamp': '2025-10-02 00:16:04.330628', 'step': 2917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:04.389388', 'step': 2917, 'epoch': 1}
{'type': 'loss', 'content': 0.10756402462720871, 'timestamp': '2025-10-02 00:16:04.391890', 'step': 2918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:04.449442', 'step': 2918, 'epoch': 1}
{'type': 'loss', 'content': 0.07346726208925247, 'timestamp': '2025-10-02 00:16:04.457000', 'step': 2919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:04.514369', 'step': 2919, 'epoch': 1}
{'type': 'loss', 'content': 0.1319914013147354, 'timestamp': '2025-10-02 00:16:04.521252', 'step': 2920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:04.583253', 'step': 2920, 'epoch': 1}
{'type': 'loss', 'content': 0.1617768108844757, 'timestamp': '2025-10-02 00:16:04.594547', 'step': 2921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:04.651741', 'step': 2921, 'epoch': 1}
{'type': 'loss', 'content': 0.06929302215576172, 'timestamp': '2025-10-02 00:16:04.654432', 'step': 2922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:04.710933', 'step': 2922, 'epoch': 1}
{'type': 'loss', 'content': 0.03705280274152756, 'timestamp': '2025-10-02 00:16:04.713318', 'step': 2923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:04.768826', 'step': 2923, 'epoch': 1}
{'type': 'loss', 'content': 0.15175753831863403, 'timestamp': '2025-10-02 00:16:04.774982', 'step': 2924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:04.831584', 'step': 2924, 'epoch': 1}
{'type': 'loss', 'content': 0.08253535628318787, 'timestamp': '2025-10-02 00:16:04.834600', 'step': 2925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:04.891267', 'step': 2925, 'epoch': 1}
{'type': 'loss', 'content': 0.06979881972074509, 'timestamp': '2025-10-02 00:16:04.900830', 'step': 2926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:04.960596', 'step': 2926, 'epoch': 1}
{'type': 'loss', 'content': 0.09935412555932999, 'timestamp': '2025-10-02 00:16:04.970789', 'step': 2927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:05.025250', 'step': 2927, 'epoch': 1}
{'type': 'loss', 'content': 0.10988351702690125, 'timestamp': '2025-10-02 00:16:05.031699', 'step': 2928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:05.088228', 'step': 2928, 'epoch': 1}
{'type': 'loss', 'content': 0.10998983681201935, 'timestamp': '2025-10-02 00:16:05.090484', 'step': 2929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:05.146775', 'step': 2929, 'epoch': 1}
{'type': 'loss', 'content': 0.04202680662274361, 'timestamp': '2025-10-02 00:16:05.154312', 'step': 2930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:05.211226', 'step': 2930, 'epoch': 1}
{'type': 'loss', 'content': 0.2902534604072571, 'timestamp': '2025-10-02 00:16:05.213629', 'step': 2931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:05.272620', 'step': 2931, 'epoch': 1}
{'type': 'loss', 'content': 0.06267248839139938, 'timestamp': '2025-10-02 00:16:05.283646', 'step': 2932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:05.338831', 'step': 2932, 'epoch': 1}
{'type': 'loss', 'content': 0.08976190537214279, 'timestamp': '2025-10-02 00:16:05.346270', 'step': 2933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:05.401963', 'step': 2933, 'epoch': 1}
{'type': 'loss', 'content': 0.07625357061624527, 'timestamp': '2025-10-02 00:16:05.407694', 'step': 2934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:05.472565', 'step': 2934, 'epoch': 1}
{'type': 'loss', 'content': 0.023520687595009804, 'timestamp': '2025-10-02 00:16:05.483425', 'step': 2935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:05.542884', 'step': 2935, 'epoch': 1}
{'type': 'loss', 'content': 0.15292194485664368, 'timestamp': '2025-10-02 00:16:05.550339', 'step': 2936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:05.605207', 'step': 2936, 'epoch': 1}
{'type': 'loss', 'content': 0.16194400191307068, 'timestamp': '2025-10-02 00:16:05.607747', 'step': 2937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:05.670478', 'step': 2937, 'epoch': 1}
{'type': 'loss', 'content': 0.0705418810248375, 'timestamp': '2025-10-02 00:16:05.680944', 'step': 2938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:05.735582', 'step': 2938, 'epoch': 1}
{'type': 'loss', 'content': 0.06689881533384323, 'timestamp': '2025-10-02 00:16:05.737881', 'step': 2939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:05.792200', 'step': 2939, 'epoch': 1}
{'type': 'loss', 'content': 0.10634920001029968, 'timestamp': '2025-10-02 00:16:05.798184', 'step': 2940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:05.852305', 'step': 2940, 'epoch': 1}
{'type': 'loss', 'content': 0.08415310829877853, 'timestamp': '2025-10-02 00:16:05.854583', 'step': 2941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:05.909342', 'step': 2941, 'epoch': 1}
{'type': 'loss', 'content': 0.06660744547843933, 'timestamp': '2025-10-02 00:16:05.915036', 'step': 2942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:05.971159', 'step': 2942, 'epoch': 1}
{'type': 'loss', 'content': 0.02345023863017559, 'timestamp': '2025-10-02 00:16:05.980686', 'step': 2943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:06.035447', 'step': 2943, 'epoch': 1}
{'type': 'loss', 'content': 0.18421031534671783, 'timestamp': '2025-10-02 00:16:06.040947', 'step': 2944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:06.095338', 'step': 2944, 'epoch': 1}
{'type': 'loss', 'content': 0.17564089596271515, 'timestamp': '2025-10-02 00:16:06.097787', 'step': 2945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:06.153039', 'step': 2945, 'epoch': 1}
{'type': 'loss', 'content': 0.07096009701490402, 'timestamp': '2025-10-02 00:16:06.155291', 'step': 2946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:06.209611', 'step': 2946, 'epoch': 1}
{'type': 'loss', 'content': 0.0925607979297638, 'timestamp': '2025-10-02 00:16:06.212532', 'step': 2947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:06.266784', 'step': 2947, 'epoch': 1}
{'type': 'loss', 'content': 0.08527538925409317, 'timestamp': '2025-10-02 00:16:06.272800', 'step': 2948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:16:06.340597', 'step': 2948, 'epoch': 1}
{'type': 'loss', 'content': 0.07892502099275589, 'timestamp': '2025-10-02 00:16:06.354075', 'step': 2949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:06.409400', 'step': 2949, 'epoch': 1}
{'type': 'loss', 'content': 0.03841570392251015, 'timestamp': '2025-10-02 00:16:06.411554', 'step': 2950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:06.465640', 'step': 2950, 'epoch': 1}
{'type': 'loss', 'content': 0.1575693041086197, 'timestamp': '2025-10-02 00:16:06.467767', 'step': 2951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:06.524723', 'step': 2951, 'epoch': 1}
{'type': 'loss', 'content': 0.1954485923051834, 'timestamp': '2025-10-02 00:16:06.530388', 'step': 2952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:06.585054', 'step': 2952, 'epoch': 1}
{'type': 'loss', 'content': 0.024273378774523735, 'timestamp': '2025-10-02 00:16:06.587313', 'step': 2953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:06.643287', 'step': 2953, 'epoch': 1}
{'type': 'loss', 'content': 0.15336619317531586, 'timestamp': '2025-10-02 00:16:06.647866', 'step': 2954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:06.705369', 'step': 2954, 'epoch': 1}
{'type': 'loss', 'content': 0.06312165409326553, 'timestamp': '2025-10-02 00:16:06.707840', 'step': 2955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:06.770230', 'step': 2955, 'epoch': 1}
{'type': 'loss', 'content': 0.18215711414813995, 'timestamp': '2025-10-02 00:16:06.776180', 'step': 2956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:06.829911', 'step': 2956, 'epoch': 1}
{'type': 'loss', 'content': 0.12344207614660263, 'timestamp': '2025-10-02 00:16:06.832361', 'step': 2957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:06.895622', 'step': 2957, 'epoch': 1}
{'type': 'loss', 'content': 0.032771818339824677, 'timestamp': '2025-10-02 00:16:06.906488', 'step': 2958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:06.962334', 'step': 2958, 'epoch': 1}
{'type': 'loss', 'content': 0.09974852204322815, 'timestamp': '2025-10-02 00:16:06.971942', 'step': 2959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:07.031096', 'step': 2959, 'epoch': 1}
{'type': 'loss', 'content': 0.03923266753554344, 'timestamp': '2025-10-02 00:16:07.042050', 'step': 2960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:07.110145', 'step': 2960, 'epoch': 1}
{'type': 'loss', 'content': 0.11951850354671478, 'timestamp': '2025-10-02 00:16:07.112492', 'step': 2961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:07.167753', 'step': 2961, 'epoch': 1}
{'type': 'loss', 'content': 0.16510160267353058, 'timestamp': '2025-10-02 00:16:07.170095', 'step': 2962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:07.224448', 'step': 2962, 'epoch': 1}
{'type': 'loss', 'content': 0.17535006999969482, 'timestamp': '2025-10-02 00:16:07.227354', 'step': 2963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:07.281424', 'step': 2963, 'epoch': 1}
{'type': 'loss', 'content': 0.13833415508270264, 'timestamp': '2025-10-02 00:16:07.287648', 'step': 2964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:07.342048', 'step': 2964, 'epoch': 1}
{'type': 'loss', 'content': 0.03737149015069008, 'timestamp': '2025-10-02 00:16:07.347709', 'step': 2965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:07.401916', 'step': 2965, 'epoch': 1}
{'type': 'loss', 'content': 0.27125900983810425, 'timestamp': '2025-10-02 00:16:07.403881', 'step': 2966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:07.458767', 'step': 2966, 'epoch': 1}
{'type': 'loss', 'content': 0.05459992587566376, 'timestamp': '2025-10-02 00:16:07.460678', 'step': 2967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:07.514701', 'step': 2967, 'epoch': 1}
{'type': 'loss', 'content': 0.2511412799358368, 'timestamp': '2025-10-02 00:16:07.520870', 'step': 2968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:07.574612', 'step': 2968, 'epoch': 1}
{'type': 'loss', 'content': 0.17101500928401947, 'timestamp': '2025-10-02 00:16:07.576961', 'step': 2969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:07.632604', 'step': 2969, 'epoch': 1}
{'type': 'loss', 'content': 0.12331854552030563, 'timestamp': '2025-10-02 00:16:07.634893', 'step': 2970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:07.688784', 'step': 2970, 'epoch': 1}
{'type': 'loss', 'content': 0.20183509588241577, 'timestamp': '2025-10-02 00:16:07.691351', 'step': 2971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:07.745403', 'step': 2971, 'epoch': 1}
{'type': 'loss', 'content': 0.1672033667564392, 'timestamp': '2025-10-02 00:16:07.751433', 'step': 2972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:07.806612', 'step': 2972, 'epoch': 1}
{'type': 'loss', 'content': 0.025563200935721397, 'timestamp': '2025-10-02 00:16:07.813935', 'step': 2973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:07.875952', 'step': 2973, 'epoch': 1}
{'type': 'loss', 'content': 0.12446723133325577, 'timestamp': '2025-10-02 00:16:07.877881', 'step': 2974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:07.932695', 'step': 2974, 'epoch': 1}
{'type': 'loss', 'content': 0.08140478283166885, 'timestamp': '2025-10-02 00:16:07.934786', 'step': 2975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:07.989640', 'step': 2975, 'epoch': 1}
{'type': 'loss', 'content': 0.07033515721559525, 'timestamp': '2025-10-02 00:16:07.995211', 'step': 2976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:08.060784', 'step': 2976, 'epoch': 1}
{'type': 'loss', 'content': 0.05685223639011383, 'timestamp': '2025-10-02 00:16:08.072081', 'step': 2977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:08.126679', 'step': 2977, 'epoch': 1}
{'type': 'loss', 'content': 0.11181145161390305, 'timestamp': '2025-10-02 00:16:08.129102', 'step': 2978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:08.191588', 'step': 2978, 'epoch': 1}
{'type': 'loss', 'content': 0.03440620377659798, 'timestamp': '2025-10-02 00:16:08.201702', 'step': 2979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:08.255899', 'step': 2979, 'epoch': 1}
{'type': 'loss', 'content': 0.08322347700595856, 'timestamp': '2025-10-02 00:16:08.263871', 'step': 2980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:08.318657', 'step': 2980, 'epoch': 1}
{'type': 'loss', 'content': 0.07418470829725266, 'timestamp': '2025-10-02 00:16:08.325410', 'step': 2981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:08.380732', 'step': 2981, 'epoch': 1}
{'type': 'loss', 'content': 0.02415868639945984, 'timestamp': '2025-10-02 00:16:08.387926', 'step': 2982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:08.443082', 'step': 2982, 'epoch': 1}
{'type': 'loss', 'content': 0.1698126196861267, 'timestamp': '2025-10-02 00:16:08.445107', 'step': 2983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:08.503998', 'step': 2983, 'epoch': 1}
{'type': 'loss', 'content': 0.041915394365787506, 'timestamp': '2025-10-02 00:16:08.514903', 'step': 2984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:08.569946', 'step': 2984, 'epoch': 1}
{'type': 'loss', 'content': 0.06856182217597961, 'timestamp': '2025-10-02 00:16:08.579856', 'step': 2985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:08.635179', 'step': 2985, 'epoch': 1}
{'type': 'loss', 'content': 0.0290568545460701, 'timestamp': '2025-10-02 00:16:08.644548', 'step': 2986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:08.699035', 'step': 2986, 'epoch': 1}
{'type': 'loss', 'content': 0.06981584429740906, 'timestamp': '2025-10-02 00:16:08.704742', 'step': 2987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:08.759745', 'step': 2987, 'epoch': 1}
{'type': 'loss', 'content': 0.0543297678232193, 'timestamp': '2025-10-02 00:16:08.766203', 'step': 2988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:08.820419', 'step': 2988, 'epoch': 1}
{'type': 'loss', 'content': 0.04902319982647896, 'timestamp': '2025-10-02 00:16:08.825734', 'step': 2989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:08.884200', 'step': 2989, 'epoch': 1}
{'type': 'loss', 'content': 0.05688970163464546, 'timestamp': '2025-10-02 00:16:08.893706', 'step': 2990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:08.949647', 'step': 2990, 'epoch': 1}
{'type': 'loss', 'content': 0.03351902589201927, 'timestamp': '2025-10-02 00:16:08.956484', 'step': 2991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:09.011004', 'step': 2991, 'epoch': 1}
{'type': 'loss', 'content': 0.08233325183391571, 'timestamp': '2025-10-02 00:16:09.016625', 'step': 2992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:16:09.083113', 'step': 2992, 'epoch': 1}
{'type': 'loss', 'content': 0.0471593514084816, 'timestamp': '2025-10-02 00:16:09.096557', 'step': 2993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:16:09.158631', 'step': 2993, 'epoch': 1}
{'type': 'loss', 'content': 0.040093619376420975, 'timestamp': '2025-10-02 00:16:09.169269', 'step': 2994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:09.223640', 'step': 2994, 'epoch': 1}
{'type': 'loss', 'content': 0.09702198952436447, 'timestamp': '2025-10-02 00:16:09.226144', 'step': 2995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:16:09.287882', 'step': 2995, 'epoch': 1}
{'type': 'loss', 'content': 0.03190746530890465, 'timestamp': '2025-10-02 00:16:09.299317', 'step': 2996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:09.353197', 'step': 2996, 'epoch': 1}
{'type': 'loss', 'content': 0.25216835737228394, 'timestamp': '2025-10-02 00:16:09.355123', 'step': 2997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:09.409430', 'step': 2997, 'epoch': 1}
{'type': 'loss', 'content': 0.26090770959854126, 'timestamp': '2025-10-02 00:16:09.411479', 'step': 2998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:09.466065', 'step': 2998, 'epoch': 1}
{'type': 'loss', 'content': 0.13798588514328003, 'timestamp': '2025-10-02 00:16:09.471574', 'step': 2999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:09.527019', 'step': 2999, 'epoch': 1}
{'type': 'loss', 'content': 0.057200174778699875, 'timestamp': '2025-10-02 00:16:09.532728', 'step': 3000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-10-02 00:16:09.944634', 'step': 3000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:09.998607', 'step': 3000, 'epoch': 1}
{'type': 'loss', 'content': 0.1040005013346672, 'timestamp': '2025-10-02 00:16:10.000729', 'step': 3001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:10.056937', 'step': 3001, 'epoch': 1}
{'type': 'loss', 'content': 0.10262850672006607, 'timestamp': '2025-10-02 00:16:10.059371', 'step': 3002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:10.121227', 'step': 3002, 'epoch': 1}
{'type': 'loss', 'content': 0.04669847711920738, 'timestamp': '2025-10-02 00:16:10.131986', 'step': 3003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:16:10.193669', 'step': 3003, 'epoch': 1}
{'type': 'loss', 'content': 0.13002881407737732, 'timestamp': '2025-10-02 00:16:10.205059', 'step': 3004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:10.259415', 'step': 3004, 'epoch': 1}
{'type': 'loss', 'content': 0.0831068754196167, 'timestamp': '2025-10-02 00:16:10.261346', 'step': 3005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:10.315409', 'step': 3005, 'epoch': 1}
{'type': 'loss', 'content': 0.06807687878608704, 'timestamp': '2025-10-02 00:16:10.317286', 'step': 3006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:10.371455', 'step': 3006, 'epoch': 1}
{'type': 'loss', 'content': 0.12433168292045593, 'timestamp': '2025-10-02 00:16:10.373584', 'step': 3007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:10.428452', 'step': 3007, 'epoch': 1}
{'type': 'loss', 'content': 0.050311945378780365, 'timestamp': '2025-10-02 00:16:10.434149', 'step': 3008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:10.495166', 'step': 3008, 'epoch': 1}
{'type': 'loss', 'content': 0.05203996226191521, 'timestamp': '2025-10-02 00:16:10.506809', 'step': 3009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:10.563478', 'step': 3009, 'epoch': 1}
{'type': 'loss', 'content': 0.11243616789579391, 'timestamp': '2025-10-02 00:16:10.565759', 'step': 3010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:16:10.621943', 'step': 3010, 'epoch': 1}
{'type': 'loss', 'content': 0.1128564178943634, 'timestamp': '2025-10-02 00:16:10.624359', 'step': 3011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:10.681222', 'step': 3011, 'epoch': 1}
{'type': 'loss', 'content': 0.08696582913398743, 'timestamp': '2025-10-02 00:16:10.688787', 'step': 3012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:10.745520', 'step': 3012, 'epoch': 1}
{'type': 'loss', 'content': 0.11493954807519913, 'timestamp': '2025-10-02 00:16:10.748111', 'step': 3013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:10.804591', 'step': 3013, 'epoch': 1}
{'type': 'loss', 'content': 0.2768687605857849, 'timestamp': '2025-10-02 00:16:10.807048', 'step': 3014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:10.866638', 'step': 3014, 'epoch': 1}
{'type': 'loss', 'content': 0.02936781570315361, 'timestamp': '2025-10-02 00:16:10.876801', 'step': 3015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:10.933288', 'step': 3015, 'epoch': 1}
{'type': 'loss', 'content': 0.07291058450937271, 'timestamp': '2025-10-02 00:16:10.940975', 'step': 3016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:10.994738', 'step': 3016, 'epoch': 1}
{'type': 'loss', 'content': 0.13188569247722626, 'timestamp': '2025-10-02 00:16:10.997936', 'step': 3017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:11.052600', 'step': 3017, 'epoch': 1}
{'type': 'loss', 'content': 0.060142189264297485, 'timestamp': '2025-10-02 00:16:11.055082', 'step': 3018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:11.110229', 'step': 3018, 'epoch': 1}
{'type': 'loss', 'content': 0.12415623664855957, 'timestamp': '2025-10-02 00:16:11.112614', 'step': 3019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:11.166903', 'step': 3019, 'epoch': 1}
{'type': 'loss', 'content': 0.17422117292881012, 'timestamp': '2025-10-02 00:16:11.173031', 'step': 3020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:11.227193', 'step': 3020, 'epoch': 1}
{'type': 'loss', 'content': 0.030868245288729668, 'timestamp': '2025-10-02 00:16:11.237414', 'step': 3021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:16:11.299193', 'step': 3021, 'epoch': 1}
{'type': 'loss', 'content': 0.06717648357152939, 'timestamp': '2025-10-02 00:16:11.309848', 'step': 3022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:11.365376', 'step': 3022, 'epoch': 1}
{'type': 'loss', 'content': 0.1221836730837822, 'timestamp': '2025-10-02 00:16:11.367798', 'step': 3023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:11.422256', 'step': 3023, 'epoch': 1}
{'type': 'loss', 'content': 0.16571110486984253, 'timestamp': '2025-10-02 00:16:11.428318', 'step': 3024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:11.482471', 'step': 3024, 'epoch': 1}
{'type': 'loss', 'content': 0.053300634026527405, 'timestamp': '2025-10-02 00:16:11.484838', 'step': 3025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:11.538964', 'step': 3025, 'epoch': 1}
{'type': 'loss', 'content': 0.06757482141256332, 'timestamp': '2025-10-02 00:16:11.546559', 'step': 3026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:11.608910', 'step': 3026, 'epoch': 1}
{'type': 'loss', 'content': 0.06149495020508766, 'timestamp': '2025-10-02 00:16:11.619768', 'step': 3027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:11.674316', 'step': 3027, 'epoch': 1}
{'type': 'loss', 'content': 0.17909006774425507, 'timestamp': '2025-10-02 00:16:11.680427', 'step': 3028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:11.734091', 'step': 3028, 'epoch': 1}
{'type': 'loss', 'content': 0.1354319006204605, 'timestamp': '2025-10-02 00:16:11.741670', 'step': 3029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:11.795763', 'step': 3029, 'epoch': 1}
{'type': 'loss', 'content': 0.06561551243066788, 'timestamp': '2025-10-02 00:16:11.798233', 'step': 3030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:16:11.873013', 'step': 3030, 'epoch': 1}
{'type': 'loss', 'content': 0.028229305520653725, 'timestamp': '2025-10-02 00:16:11.886301', 'step': 3031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:11.942759', 'step': 3031, 'epoch': 1}
{'type': 'loss', 'content': 0.03804695978760719, 'timestamp': '2025-10-02 00:16:11.948695', 'step': 3032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:12.010675', 'step': 3032, 'epoch': 1}
{'type': 'loss', 'content': 0.01984252594411373, 'timestamp': '2025-10-02 00:16:12.022452', 'step': 3033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:12.077197', 'step': 3033, 'epoch': 1}
{'type': 'loss', 'content': 0.08792834728956223, 'timestamp': '2025-10-02 00:16:12.086566', 'step': 3034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:12.140758', 'step': 3034, 'epoch': 1}
{'type': 'loss', 'content': 0.11828944087028503, 'timestamp': '2025-10-02 00:16:12.146471', 'step': 3035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:12.200273', 'step': 3035, 'epoch': 1}
{'type': 'loss', 'content': 0.07355137169361115, 'timestamp': '2025-10-02 00:16:12.206085', 'step': 3036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:12.259471', 'step': 3036, 'epoch': 1}
{'type': 'loss', 'content': 0.033187106251716614, 'timestamp': '2025-10-02 00:16:12.265476', 'step': 3037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:12.319170', 'step': 3037, 'epoch': 1}
{'type': 'loss', 'content': 0.049635306000709534, 'timestamp': '2025-10-02 00:16:12.325186', 'step': 3038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:12.379831', 'step': 3038, 'epoch': 1}
{'type': 'loss', 'content': 0.03727918490767479, 'timestamp': '2025-10-02 00:16:12.382210', 'step': 3039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:12.443491', 'step': 3039, 'epoch': 1}
{'type': 'loss', 'content': 0.06484612077474594, 'timestamp': '2025-10-02 00:16:12.454762', 'step': 3040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:12.508174', 'step': 3040, 'epoch': 1}
{'type': 'loss', 'content': 0.18971571326255798, 'timestamp': '2025-10-02 00:16:12.510569', 'step': 3041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:12.565227', 'step': 3041, 'epoch': 1}
{'type': 'loss', 'content': 0.2126697450876236, 'timestamp': '2025-10-02 00:16:12.567650', 'step': 3042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:12.622245', 'step': 3042, 'epoch': 1}
{'type': 'loss', 'content': 0.1293739527463913, 'timestamp': '2025-10-02 00:16:12.625708', 'step': 3043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:16:12.696771', 'step': 3043, 'epoch': 1}
{'type': 'loss', 'content': 0.015220475383102894, 'timestamp': '2025-10-02 00:16:12.709869', 'step': 3044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:12.767441', 'step': 3044, 'epoch': 1}
{'type': 'loss', 'content': 0.02631218545138836, 'timestamp': '2025-10-02 00:16:12.776837', 'step': 3045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:12.832666', 'step': 3045, 'epoch': 1}
{'type': 'loss', 'content': 0.10715481638908386, 'timestamp': '2025-10-02 00:16:12.835583', 'step': 3046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:12.891080', 'step': 3046, 'epoch': 1}
{'type': 'loss', 'content': 0.10773530602455139, 'timestamp': '2025-10-02 00:16:12.894094', 'step': 3047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:12.951836', 'step': 3047, 'epoch': 1}
{'type': 'loss', 'content': 0.07544796913862228, 'timestamp': '2025-10-02 00:16:12.957998', 'step': 3048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:13.011783', 'step': 3048, 'epoch': 1}
{'type': 'loss', 'content': 0.18151476979255676, 'timestamp': '2025-10-02 00:16:13.014539', 'step': 3049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:13.075776', 'step': 3049, 'epoch': 1}
{'type': 'loss', 'content': 0.02777133882045746, 'timestamp': '2025-10-02 00:16:13.085946', 'step': 3050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:13.140412', 'step': 3050, 'epoch': 1}
{'type': 'loss', 'content': 0.16484665870666504, 'timestamp': '2025-10-02 00:16:13.142765', 'step': 3051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:13.206160', 'step': 3051, 'epoch': 1}
{'type': 'loss', 'content': 0.04455098137259483, 'timestamp': '2025-10-02 00:16:13.217406', 'step': 3052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:13.273149', 'step': 3052, 'epoch': 1}
{'type': 'loss', 'content': 0.030921675264835358, 'timestamp': '2025-10-02 00:16:13.280709', 'step': 3053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:13.336447', 'step': 3053, 'epoch': 1}
{'type': 'loss', 'content': 0.11648286879062653, 'timestamp': '2025-10-02 00:16:13.339758', 'step': 3054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:13.396713', 'step': 3054, 'epoch': 1}
{'type': 'loss', 'content': 0.09738747030496597, 'timestamp': '2025-10-02 00:16:13.399671', 'step': 3055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:13.455120', 'step': 3055, 'epoch': 1}
{'type': 'loss', 'content': 0.0852692723274231, 'timestamp': '2025-10-02 00:16:13.461464', 'step': 3056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:13.516180', 'step': 3056, 'epoch': 1}
{'type': 'loss', 'content': 0.04601992666721344, 'timestamp': '2025-10-02 00:16:13.526456', 'step': 3057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:13.585372', 'step': 3057, 'epoch': 1}
{'type': 'loss', 'content': 0.04825561121106148, 'timestamp': '2025-10-02 00:16:13.594689', 'step': 3058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:13.651277', 'step': 3058, 'epoch': 1}
{'type': 'loss', 'content': 0.049413807690143585, 'timestamp': '2025-10-02 00:16:13.654336', 'step': 3059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:13.709449', 'step': 3059, 'epoch': 1}
{'type': 'loss', 'content': 0.05470629781484604, 'timestamp': '2025-10-02 00:16:13.716100', 'step': 3060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:13.772476', 'step': 3060, 'epoch': 1}
{'type': 'loss', 'content': 0.10308106988668442, 'timestamp': '2025-10-02 00:16:13.775893', 'step': 3061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:13.832308', 'step': 3061, 'epoch': 1}
{'type': 'loss', 'content': 0.1587400585412979, 'timestamp': '2025-10-02 00:16:13.834402', 'step': 3062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:13.890591', 'step': 3062, 'epoch': 1}
{'type': 'loss', 'content': 0.028394833207130432, 'timestamp': '2025-10-02 00:16:13.900105', 'step': 3063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:16:13.968454', 'step': 3063, 'epoch': 1}
{'type': 'loss', 'content': 0.11846840381622314, 'timestamp': '2025-10-02 00:16:13.981194', 'step': 3064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:14.036197', 'step': 3064, 'epoch': 1}
{'type': 'loss', 'content': 0.07249579578638077, 'timestamp': '2025-10-02 00:16:14.038564', 'step': 3065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:16:14.109032', 'step': 3065, 'epoch': 1}
{'type': 'loss', 'content': 0.060765013098716736, 'timestamp': '2025-10-02 00:16:14.121651', 'step': 3066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:14.183221', 'step': 3066, 'epoch': 1}
{'type': 'loss', 'content': 0.03393620625138283, 'timestamp': '2025-10-02 00:16:14.193703', 'step': 3067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:14.248336', 'step': 3067, 'epoch': 1}
{'type': 'loss', 'content': 0.06400184333324432, 'timestamp': '2025-10-02 00:16:14.254047', 'step': 3068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:14.306989', 'step': 3068, 'epoch': 1}
{'type': 'loss', 'content': 0.10008466243743896, 'timestamp': '2025-10-02 00:16:14.309374', 'step': 3069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:14.363287', 'step': 3069, 'epoch': 1}
{'type': 'loss', 'content': 0.07849784195423126, 'timestamp': '2025-10-02 00:16:14.372589', 'step': 3070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:14.427345', 'step': 3070, 'epoch': 1}
{'type': 'loss', 'content': 0.10742629319429398, 'timestamp': '2025-10-02 00:16:14.433189', 'step': 3071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:14.487263', 'step': 3071, 'epoch': 1}
{'type': 'loss', 'content': 0.05275304988026619, 'timestamp': '2025-10-02 00:16:14.493601', 'step': 3072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:16:14.559651', 'step': 3072, 'epoch': 1}
{'type': 'loss', 'content': 0.04573412612080574, 'timestamp': '2025-10-02 00:16:14.572576', 'step': 3073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:14.634280', 'step': 3073, 'epoch': 1}
{'type': 'loss', 'content': 0.042469821870326996, 'timestamp': '2025-10-02 00:16:14.644812', 'step': 3074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:14.699029', 'step': 3074, 'epoch': 1}
{'type': 'loss', 'content': 0.10533444583415985, 'timestamp': '2025-10-02 00:16:14.706422', 'step': 3075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:14.760809', 'step': 3075, 'epoch': 1}
{'type': 'loss', 'content': 0.14413075149059296, 'timestamp': '2025-10-02 00:16:14.766790', 'step': 3076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:14.821573', 'step': 3076, 'epoch': 1}
{'type': 'loss', 'content': 0.038541290909051895, 'timestamp': '2025-10-02 00:16:14.827177', 'step': 3077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:14.881828', 'step': 3077, 'epoch': 1}
{'type': 'loss', 'content': 0.11870193481445312, 'timestamp': '2025-10-02 00:16:14.884419', 'step': 3078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:14.939263', 'step': 3078, 'epoch': 1}
{'type': 'loss', 'content': 0.1590922772884369, 'timestamp': '2025-10-02 00:16:14.941618', 'step': 3079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:14.995822', 'step': 3079, 'epoch': 1}
{'type': 'loss', 'content': 0.04015597701072693, 'timestamp': '2025-10-02 00:16:15.003101', 'step': 3080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:15.056888', 'step': 3080, 'epoch': 1}
{'type': 'loss', 'content': 0.1587592363357544, 'timestamp': '2025-10-02 00:16:15.064313', 'step': 3081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:15.119616', 'step': 3081, 'epoch': 1}
{'type': 'loss', 'content': 0.07905201613903046, 'timestamp': '2025-10-02 00:16:15.125540', 'step': 3082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:15.179935', 'step': 3082, 'epoch': 1}
{'type': 'loss', 'content': 0.0667860135436058, 'timestamp': '2025-10-02 00:16:15.182214', 'step': 3083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:15.236712', 'step': 3083, 'epoch': 1}
{'type': 'loss', 'content': 0.051358602941036224, 'timestamp': '2025-10-02 00:16:15.242558', 'step': 3084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:15.295698', 'step': 3084, 'epoch': 1}
{'type': 'loss', 'content': 0.0794830396771431, 'timestamp': '2025-10-02 00:16:15.298238', 'step': 3085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:15.352486', 'step': 3085, 'epoch': 1}
{'type': 'loss', 'content': 0.07419571280479431, 'timestamp': '2025-10-02 00:16:15.355150', 'step': 3086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:15.409950', 'step': 3086, 'epoch': 1}
{'type': 'loss', 'content': 0.10962604731321335, 'timestamp': '2025-10-02 00:16:15.412493', 'step': 3087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:15.466415', 'step': 3087, 'epoch': 1}
{'type': 'loss', 'content': 0.18314215540885925, 'timestamp': '2025-10-02 00:16:15.472859', 'step': 3088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:15.526776', 'step': 3088, 'epoch': 1}
{'type': 'loss', 'content': 0.09665677696466446, 'timestamp': '2025-10-02 00:16:15.532679', 'step': 3089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:15.587607', 'step': 3089, 'epoch': 1}
{'type': 'loss', 'content': 0.2944673001766205, 'timestamp': '2025-10-02 00:16:15.590044', 'step': 3090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:15.644664', 'step': 3090, 'epoch': 1}
{'type': 'loss', 'content': 0.13800328969955444, 'timestamp': '2025-10-02 00:16:15.647235', 'step': 3091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:15.709547', 'step': 3091, 'epoch': 1}
{'type': 'loss', 'content': 0.04643743485212326, 'timestamp': '2025-10-02 00:16:15.720730', 'step': 3092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:16:15.777200', 'step': 3092, 'epoch': 1}
{'type': 'loss', 'content': 0.14875325560569763, 'timestamp': '2025-10-02 00:16:15.780028', 'step': 3093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:15.839037', 'step': 3093, 'epoch': 1}
{'type': 'loss', 'content': 0.053710129112005234, 'timestamp': '2025-10-02 00:16:15.849255', 'step': 3094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:15.913880', 'step': 3094, 'epoch': 1}
{'type': 'loss', 'content': 0.09973764419555664, 'timestamp': '2025-10-02 00:16:15.923428', 'step': 3095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:15.977346', 'step': 3095, 'epoch': 1}
{'type': 'loss', 'content': 0.08263048529624939, 'timestamp': '2025-10-02 00:16:15.983149', 'step': 3096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:16.040265', 'step': 3096, 'epoch': 1}
{'type': 'loss', 'content': 0.18620441854000092, 'timestamp': '2025-10-02 00:16:16.042568', 'step': 3097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:16.096358', 'step': 3097, 'epoch': 1}
{'type': 'loss', 'content': 0.2418982982635498, 'timestamp': '2025-10-02 00:16:16.099632', 'step': 3098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:16.154842', 'step': 3098, 'epoch': 1}
{'type': 'loss', 'content': 0.09737524390220642, 'timestamp': '2025-10-02 00:16:16.162200', 'step': 3099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:16.217959', 'step': 3099, 'epoch': 1}
{'type': 'loss', 'content': 0.049362920224666595, 'timestamp': '2025-10-02 00:16:16.228298', 'step': 3100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:16:16.301763', 'step': 3100, 'epoch': 1}
{'type': 'loss', 'content': 0.016462022438645363, 'timestamp': '2025-10-02 00:16:16.316221', 'step': 3101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:16.374199', 'step': 3101, 'epoch': 1}
{'type': 'loss', 'content': 0.07703443616628647, 'timestamp': '2025-10-02 00:16:16.376605', 'step': 3102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:16.430549', 'step': 3102, 'epoch': 1}
{'type': 'loss', 'content': 0.10392865538597107, 'timestamp': '2025-10-02 00:16:16.433307', 'step': 3103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:16.490032', 'step': 3103, 'epoch': 1}
{'type': 'loss', 'content': 0.024890344589948654, 'timestamp': '2025-10-02 00:16:16.500327', 'step': 3104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:16.554301', 'step': 3104, 'epoch': 1}
{'type': 'loss', 'content': 0.12120012193918228, 'timestamp': '2025-10-02 00:16:16.558226', 'step': 3105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:16.611807', 'step': 3105, 'epoch': 1}
{'type': 'loss', 'content': 0.17872639000415802, 'timestamp': '2025-10-02 00:16:16.614356', 'step': 3106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:16.670901', 'step': 3106, 'epoch': 1}
{'type': 'loss', 'content': 0.08970125019550323, 'timestamp': '2025-10-02 00:16:16.680454', 'step': 3107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:16.743158', 'step': 3107, 'epoch': 1}
{'type': 'loss', 'content': 0.042563166469335556, 'timestamp': '2025-10-02 00:16:16.754763', 'step': 3108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:16.808652', 'step': 3108, 'epoch': 1}
{'type': 'loss', 'content': 0.12093957513570786, 'timestamp': '2025-10-02 00:16:16.811047', 'step': 3109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:16.865411', 'step': 3109, 'epoch': 1}
{'type': 'loss', 'content': 0.03133174031972885, 'timestamp': '2025-10-02 00:16:16.867775', 'step': 3110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:16.921186', 'step': 3110, 'epoch': 1}
{'type': 'loss', 'content': 0.2330128252506256, 'timestamp': '2025-10-02 00:16:16.923528', 'step': 3111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:16.983491', 'step': 3111, 'epoch': 1}
{'type': 'loss', 'content': 0.027178378775715828, 'timestamp': '2025-10-02 00:16:16.994466', 'step': 3112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:17.048614', 'step': 3112, 'epoch': 1}
{'type': 'loss', 'content': 0.059644151479005814, 'timestamp': '2025-10-02 00:16:17.050863', 'step': 3113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:17.105130', 'step': 3113, 'epoch': 1}
{'type': 'loss', 'content': 0.11442919820547104, 'timestamp': '2025-10-02 00:16:17.107636', 'step': 3114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:17.162677', 'step': 3114, 'epoch': 1}
{'type': 'loss', 'content': 0.10791061818599701, 'timestamp': '2025-10-02 00:16:17.165346', 'step': 3115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:17.220482', 'step': 3115, 'epoch': 1}
{'type': 'loss', 'content': 0.038539476692676544, 'timestamp': '2025-10-02 00:16:17.226415', 'step': 3116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:17.280745', 'step': 3116, 'epoch': 1}
{'type': 'loss', 'content': 0.10506878793239594, 'timestamp': '2025-10-02 00:16:17.283280', 'step': 3117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:17.338182', 'step': 3117, 'epoch': 1}
{'type': 'loss', 'content': 0.06325865536928177, 'timestamp': '2025-10-02 00:16:17.344200', 'step': 3118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:17.401570', 'step': 3118, 'epoch': 1}
{'type': 'loss', 'content': 0.17760850489139557, 'timestamp': '2025-10-02 00:16:17.403888', 'step': 3119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:17.458389', 'step': 3119, 'epoch': 1}
{'type': 'loss', 'content': 0.03817782551050186, 'timestamp': '2025-10-02 00:16:17.468506', 'step': 3120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:17.522152', 'step': 3120, 'epoch': 1}
{'type': 'loss', 'content': 0.16088324785232544, 'timestamp': '2025-10-02 00:16:17.524792', 'step': 3121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:17.581421', 'step': 3121, 'epoch': 1}
{'type': 'loss', 'content': 0.09437038004398346, 'timestamp': '2025-10-02 00:16:17.590790', 'step': 3122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:17.645688', 'step': 3122, 'epoch': 1}
{'type': 'loss', 'content': 0.041040245443582535, 'timestamp': '2025-10-02 00:16:17.648183', 'step': 3123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:17.702273', 'step': 3123, 'epoch': 1}
{'type': 'loss', 'content': 0.14229156076908112, 'timestamp': '2025-10-02 00:16:17.708251', 'step': 3124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:17.762129', 'step': 3124, 'epoch': 1}
{'type': 'loss', 'content': 0.07709560543298721, 'timestamp': '2025-10-02 00:16:17.764542', 'step': 3125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:17.819073', 'step': 3125, 'epoch': 1}
{'type': 'loss', 'content': 0.040417201817035675, 'timestamp': '2025-10-02 00:16:17.825025', 'step': 3126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:17.879243', 'step': 3126, 'epoch': 1}
{'type': 'loss', 'content': 0.07017461210489273, 'timestamp': '2025-10-02 00:16:17.885056', 'step': 3127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:17.939017', 'step': 3127, 'epoch': 1}
{'type': 'loss', 'content': 0.11364137381315231, 'timestamp': '2025-10-02 00:16:17.947261', 'step': 3128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:18.001725', 'step': 3128, 'epoch': 1}
{'type': 'loss', 'content': 0.037808388471603394, 'timestamp': '2025-10-02 00:16:18.007702', 'step': 3129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:16:18.069584', 'step': 3129, 'epoch': 1}
{'type': 'loss', 'content': 0.08481909334659576, 'timestamp': '2025-10-02 00:16:18.080239', 'step': 3130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:18.134323', 'step': 3130, 'epoch': 1}
{'type': 'loss', 'content': 0.2421068549156189, 'timestamp': '2025-10-02 00:16:18.136364', 'step': 3131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:18.190655', 'step': 3131, 'epoch': 1}
{'type': 'loss', 'content': 0.08557222783565521, 'timestamp': '2025-10-02 00:16:18.196551', 'step': 3132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:18.250018', 'step': 3132, 'epoch': 1}
{'type': 'loss', 'content': 0.04685213416814804, 'timestamp': '2025-10-02 00:16:18.257760', 'step': 3133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:18.320475', 'step': 3133, 'epoch': 1}
{'type': 'loss', 'content': 0.03513054549694061, 'timestamp': '2025-10-02 00:16:18.329845', 'step': 3134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:18.385058', 'step': 3134, 'epoch': 1}
{'type': 'loss', 'content': 0.0781111791729927, 'timestamp': '2025-10-02 00:16:18.394434', 'step': 3135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:18.447902', 'step': 3135, 'epoch': 1}
{'type': 'loss', 'content': 0.272482693195343, 'timestamp': '2025-10-02 00:16:18.453580', 'step': 3136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:18.507488', 'step': 3136, 'epoch': 1}
{'type': 'loss', 'content': 0.02623465657234192, 'timestamp': '2025-10-02 00:16:18.515179', 'step': 3137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:18.569242', 'step': 3137, 'epoch': 1}
{'type': 'loss', 'content': 0.056577399373054504, 'timestamp': '2025-10-02 00:16:18.572305', 'step': 3138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:18.626233', 'step': 3138, 'epoch': 1}
{'type': 'loss', 'content': 0.11553564667701721, 'timestamp': '2025-10-02 00:16:18.631952', 'step': 3139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:18.687762', 'step': 3139, 'epoch': 1}
{'type': 'loss', 'content': 0.08609672635793686, 'timestamp': '2025-10-02 00:16:18.693653', 'step': 3140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:18.747590', 'step': 3140, 'epoch': 1}
{'type': 'loss', 'content': 0.04049886763095856, 'timestamp': '2025-10-02 00:16:18.757234', 'step': 3141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:18.811645', 'step': 3141, 'epoch': 1}
{'type': 'loss', 'content': 0.11904564499855042, 'timestamp': '2025-10-02 00:16:18.814227', 'step': 3142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:18.868534', 'step': 3142, 'epoch': 1}
{'type': 'loss', 'content': 0.0592324361205101, 'timestamp': '2025-10-02 00:16:18.874495', 'step': 3143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:18.928960', 'step': 3143, 'epoch': 1}
{'type': 'loss', 'content': 0.08426058292388916, 'timestamp': '2025-10-02 00:16:18.935385', 'step': 3144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:18.990251', 'step': 3144, 'epoch': 1}
{'type': 'loss', 'content': 0.12801004946231842, 'timestamp': '2025-10-02 00:16:18.992520', 'step': 3145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:19.045597', 'step': 3145, 'epoch': 1}
{'type': 'loss', 'content': 0.12902438640594482, 'timestamp': '2025-10-02 00:16:19.048489', 'step': 3146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:19.102926', 'step': 3146, 'epoch': 1}
{'type': 'loss', 'content': 0.15774130821228027, 'timestamp': '2025-10-02 00:16:19.105224', 'step': 3147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:19.159208', 'step': 3147, 'epoch': 1}
{'type': 'loss', 'content': 0.07560324668884277, 'timestamp': '2025-10-02 00:16:19.165890', 'step': 3148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:19.219484', 'step': 3148, 'epoch': 1}
{'type': 'loss', 'content': 0.09273846447467804, 'timestamp': '2025-10-02 00:16:19.222025', 'step': 3149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:19.276334', 'step': 3149, 'epoch': 1}
{'type': 'loss', 'content': 0.08654417842626572, 'timestamp': '2025-10-02 00:16:19.278839', 'step': 3150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:19.332918', 'step': 3150, 'epoch': 1}
{'type': 'loss', 'content': 0.09335847198963165, 'timestamp': '2025-10-02 00:16:19.338881', 'step': 3151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:19.393004', 'step': 3151, 'epoch': 1}
{'type': 'loss', 'content': 0.1168828159570694, 'timestamp': '2025-10-02 00:16:19.403170', 'step': 3152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:19.456838', 'step': 3152, 'epoch': 1}
{'type': 'loss', 'content': 0.21153104305267334, 'timestamp': '2025-10-02 00:16:19.459651', 'step': 3153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:19.513950', 'step': 3153, 'epoch': 1}
{'type': 'loss', 'content': 0.1105097159743309, 'timestamp': '2025-10-02 00:16:19.516492', 'step': 3154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:19.570265', 'step': 3154, 'epoch': 1}
{'type': 'loss', 'content': 0.12451706826686859, 'timestamp': '2025-10-02 00:16:19.572960', 'step': 3155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:19.632705', 'step': 3155, 'epoch': 1}
{'type': 'loss', 'content': 0.08237862586975098, 'timestamp': '2025-10-02 00:16:19.643613', 'step': 3156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:19.697169', 'step': 3156, 'epoch': 1}
{'type': 'loss', 'content': 0.2749229669570923, 'timestamp': '2025-10-02 00:16:19.699534', 'step': 3157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:19.753292', 'step': 3157, 'epoch': 1}
{'type': 'loss', 'content': 0.06713218241930008, 'timestamp': '2025-10-02 00:16:19.755759', 'step': 3158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:16:19.824132', 'step': 3158, 'epoch': 1}
{'type': 'loss', 'content': 0.08526282757520676, 'timestamp': '2025-10-02 00:16:19.836441', 'step': 3159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:19.890044', 'step': 3159, 'epoch': 1}
{'type': 'loss', 'content': 0.08230192959308624, 'timestamp': '2025-10-02 00:16:19.896285', 'step': 3160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:19.950441', 'step': 3160, 'epoch': 1}
{'type': 'loss', 'content': 0.13887465000152588, 'timestamp': '2025-10-02 00:16:19.952851', 'step': 3161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:20.006745', 'step': 3161, 'epoch': 1}
{'type': 'loss', 'content': 0.128458172082901, 'timestamp': '2025-10-02 00:16:20.009432', 'step': 3162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:20.065150', 'step': 3162, 'epoch': 1}
{'type': 'loss', 'content': 0.251944363117218, 'timestamp': '2025-10-02 00:16:20.067562', 'step': 3163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:20.122770', 'step': 3163, 'epoch': 1}
{'type': 'loss', 'content': 0.07484081387519836, 'timestamp': '2025-10-02 00:16:20.129106', 'step': 3164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:20.182569', 'step': 3164, 'epoch': 1}
{'type': 'loss', 'content': 0.03443704918026924, 'timestamp': '2025-10-02 00:16:20.188622', 'step': 3165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:20.245208', 'step': 3165, 'epoch': 1}
{'type': 'loss', 'content': 0.2035466730594635, 'timestamp': '2025-10-02 00:16:20.249961', 'step': 3166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:20.309946', 'step': 3166, 'epoch': 1}
{'type': 'loss', 'content': 0.07978943735361099, 'timestamp': '2025-10-02 00:16:20.315687', 'step': 3167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:20.376757', 'step': 3167, 'epoch': 1}
{'type': 'loss', 'content': 0.19790369272232056, 'timestamp': '2025-10-02 00:16:20.384648', 'step': 3168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:20.445889', 'step': 3168, 'epoch': 1}
{'type': 'loss', 'content': 0.17419132590293884, 'timestamp': '2025-10-02 00:16:20.450108', 'step': 3169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:20.511409', 'step': 3169, 'epoch': 1}
{'type': 'loss', 'content': 0.056383028626441956, 'timestamp': '2025-10-02 00:16:20.518987', 'step': 3170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:20.579427', 'step': 3170, 'epoch': 1}
{'type': 'loss', 'content': 0.02852349542081356, 'timestamp': '2025-10-02 00:16:20.583285', 'step': 3171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:20.641437', 'step': 3171, 'epoch': 1}
{'type': 'loss', 'content': 0.05048573017120361, 'timestamp': '2025-10-02 00:16:20.653268', 'step': 3172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:16:20.729070', 'step': 3172, 'epoch': 1}
{'type': 'loss', 'content': 0.04253264516592026, 'timestamp': '2025-10-02 00:16:20.740609', 'step': 3173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:20.805991', 'step': 3173, 'epoch': 1}
{'type': 'loss', 'content': 0.15457385778427124, 'timestamp': '2025-10-02 00:16:20.809003', 'step': 3174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:20.878129', 'step': 3174, 'epoch': 1}
{'type': 'loss', 'content': 0.05450230836868286, 'timestamp': '2025-10-02 00:16:20.887129', 'step': 3175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:20.944393', 'step': 3175, 'epoch': 1}
{'type': 'loss', 'content': 0.09409718960523605, 'timestamp': '2025-10-02 00:16:20.950789', 'step': 3176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:21.010608', 'step': 3176, 'epoch': 1}
{'type': 'loss', 'content': 0.09169688820838928, 'timestamp': '2025-10-02 00:16:21.013914', 'step': 3177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:21.069432', 'step': 3177, 'epoch': 1}
{'type': 'loss', 'content': 0.09701599925756454, 'timestamp': '2025-10-02 00:16:21.073507', 'step': 3178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:21.129945', 'step': 3178, 'epoch': 1}
{'type': 'loss', 'content': 0.23441877961158752, 'timestamp': '2025-10-02 00:16:21.140066', 'step': 3179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:21.202979', 'step': 3179, 'epoch': 1}
{'type': 'loss', 'content': 0.07884036749601364, 'timestamp': '2025-10-02 00:16:21.209783', 'step': 3180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:21.265144', 'step': 3180, 'epoch': 1}
{'type': 'loss', 'content': 0.06762172281742096, 'timestamp': '2025-10-02 00:16:21.267874', 'step': 3181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:21.328207', 'step': 3181, 'epoch': 1}
{'type': 'loss', 'content': 0.15491333603858948, 'timestamp': '2025-10-02 00:16:21.332333', 'step': 3182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:21.402400', 'step': 3182, 'epoch': 1}
{'type': 'loss', 'content': 0.04589135944843292, 'timestamp': '2025-10-02 00:16:21.411693', 'step': 3183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:21.476427', 'step': 3183, 'epoch': 1}
{'type': 'loss', 'content': 0.12287548184394836, 'timestamp': '2025-10-02 00:16:21.487485', 'step': 3184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:21.551809', 'step': 3184, 'epoch': 1}
{'type': 'loss', 'content': 0.12134642899036407, 'timestamp': '2025-10-02 00:16:21.554779', 'step': 3185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:21.617147', 'step': 3185, 'epoch': 1}
{'type': 'loss', 'content': 0.06627597659826279, 'timestamp': '2025-10-02 00:16:21.626544', 'step': 3186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:21.686199', 'step': 3186, 'epoch': 1}
{'type': 'loss', 'content': 0.17369940876960754, 'timestamp': '2025-10-02 00:16:21.696160', 'step': 3187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:21.782680', 'step': 3187, 'epoch': 1}
{'type': 'loss', 'content': 0.05659983679652214, 'timestamp': '2025-10-02 00:16:21.792047', 'step': 3188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:21.874216', 'step': 3188, 'epoch': 1}
{'type': 'loss', 'content': 0.05482758954167366, 'timestamp': '2025-10-02 00:16:21.882943', 'step': 3189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:21.961557', 'step': 3189, 'epoch': 1}
{'type': 'loss', 'content': 0.2653847336769104, 'timestamp': '2025-10-02 00:16:21.965830', 'step': 3190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:22.041062', 'step': 3190, 'epoch': 1}
{'type': 'loss', 'content': 0.13806898891925812, 'timestamp': '2025-10-02 00:16:22.046989', 'step': 3191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:22.116591', 'step': 3191, 'epoch': 1}
{'type': 'loss', 'content': 0.012884330935776234, 'timestamp': '2025-10-02 00:16:22.124874', 'step': 3192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:22.185165', 'step': 3192, 'epoch': 1}
{'type': 'loss', 'content': 0.17754434049129486, 'timestamp': '2025-10-02 00:16:22.189004', 'step': 3193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:22.261916', 'step': 3193, 'epoch': 1}
{'type': 'loss', 'content': 0.04062853008508682, 'timestamp': '2025-10-02 00:16:22.271442', 'step': 3194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:22.334078', 'step': 3194, 'epoch': 1}
{'type': 'loss', 'content': 0.0864148661494255, 'timestamp': '2025-10-02 00:16:22.350471', 'step': 3195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:22.421683', 'step': 3195, 'epoch': 1}
{'type': 'loss', 'content': 0.12191609293222427, 'timestamp': '2025-10-02 00:16:22.432631', 'step': 3196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:22.489538', 'step': 3196, 'epoch': 1}
{'type': 'loss', 'content': 0.060053516179323196, 'timestamp': '2025-10-02 00:16:22.493656', 'step': 3197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:22.551821', 'step': 3197, 'epoch': 1}
{'type': 'loss', 'content': 0.1797037124633789, 'timestamp': '2025-10-02 00:16:22.557202', 'step': 3198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:22.617337', 'step': 3198, 'epoch': 1}
{'type': 'loss', 'content': 0.17572851479053497, 'timestamp': '2025-10-02 00:16:22.620345', 'step': 3199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:22.692273', 'step': 3199, 'epoch': 1}
{'type': 'loss', 'content': 0.025226512923836708, 'timestamp': '2025-10-02 00:16:22.703544', 'step': 3200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:22.774442', 'step': 3200, 'epoch': 1}
{'type': 'loss', 'content': 0.221133753657341, 'timestamp': '2025-10-02 00:16:22.778868', 'step': 3201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:22.844389', 'step': 3201, 'epoch': 1}
{'type': 'loss', 'content': 0.06330420076847076, 'timestamp': '2025-10-02 00:16:22.847956', 'step': 3202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:22.909258', 'step': 3202, 'epoch': 1}
{'type': 'loss', 'content': 0.23843646049499512, 'timestamp': '2025-10-02 00:16:22.918462', 'step': 3203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:22.980586', 'step': 3203, 'epoch': 1}
{'type': 'loss', 'content': 0.08963292837142944, 'timestamp': '2025-10-02 00:16:22.988873', 'step': 3204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:23.050872', 'step': 3204, 'epoch': 1}
{'type': 'loss', 'content': 0.05383108928799629, 'timestamp': '2025-10-02 00:16:23.061925', 'step': 3205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:23.125753', 'step': 3205, 'epoch': 1}
{'type': 'loss', 'content': 0.193266823887825, 'timestamp': '2025-10-02 00:16:23.128800', 'step': 3206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:23.193486', 'step': 3206, 'epoch': 1}
{'type': 'loss', 'content': 0.05073026195168495, 'timestamp': '2025-10-02 00:16:23.196834', 'step': 3207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:23.260360', 'step': 3207, 'epoch': 1}
{'type': 'loss', 'content': 0.03026726096868515, 'timestamp': '2025-10-02 00:16:23.274115', 'step': 3208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:16:23.341576', 'step': 3208, 'epoch': 1}
{'type': 'loss', 'content': 0.12562671303749084, 'timestamp': '2025-10-02 00:16:23.349295', 'step': 3209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:23.418240', 'step': 3209, 'epoch': 1}
{'type': 'loss', 'content': 0.06335180252790451, 'timestamp': '2025-10-02 00:16:23.421494', 'step': 3210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:23.483484', 'step': 3210, 'epoch': 1}
{'type': 'loss', 'content': 0.10922358185052872, 'timestamp': '2025-10-02 00:16:23.487456', 'step': 3211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:23.553048', 'step': 3211, 'epoch': 1}
{'type': 'loss', 'content': 0.06395647674798965, 'timestamp': '2025-10-02 00:16:23.567385', 'step': 3212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:23.630223', 'step': 3212, 'epoch': 1}
{'type': 'loss', 'content': 0.18972431123256683, 'timestamp': '2025-10-02 00:16:23.633169', 'step': 3213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:23.690013', 'step': 3213, 'epoch': 1}
{'type': 'loss', 'content': 0.04552528262138367, 'timestamp': '2025-10-02 00:16:23.694471', 'step': 3214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:23.754507', 'step': 3214, 'epoch': 1}
{'type': 'loss', 'content': 0.11293283849954605, 'timestamp': '2025-10-02 00:16:23.758098', 'step': 3215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:23.815497', 'step': 3215, 'epoch': 1}
{'type': 'loss', 'content': 0.09313473105430603, 'timestamp': '2025-10-02 00:16:23.824197', 'step': 3216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:23.884936', 'step': 3216, 'epoch': 1}
{'type': 'loss', 'content': 0.20169493556022644, 'timestamp': '2025-10-02 00:16:23.888144', 'step': 3217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:23.950514', 'step': 3217, 'epoch': 1}
{'type': 'loss', 'content': 0.17265473306179047, 'timestamp': '2025-10-02 00:16:23.953919', 'step': 3218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:24.009642', 'step': 3218, 'epoch': 1}
{'type': 'loss', 'content': 0.17060275375843048, 'timestamp': '2025-10-02 00:16:24.019688', 'step': 3219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:24.092099', 'step': 3219, 'epoch': 1}
{'type': 'loss', 'content': 0.23810561001300812, 'timestamp': '2025-10-02 00:16:24.099159', 'step': 3220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:24.163650', 'step': 3220, 'epoch': 1}
{'type': 'loss', 'content': 0.10156666487455368, 'timestamp': '2025-10-02 00:16:24.166948', 'step': 3221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:24.227132', 'step': 3221, 'epoch': 1}
{'type': 'loss', 'content': 0.040663089603185654, 'timestamp': '2025-10-02 00:16:24.236490', 'step': 3222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:24.298673', 'step': 3222, 'epoch': 1}
{'type': 'loss', 'content': 0.2458723932504654, 'timestamp': '2025-10-02 00:16:24.307675', 'step': 3223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:24.377443', 'step': 3223, 'epoch': 1}
{'type': 'loss', 'content': 0.07686233520507812, 'timestamp': '2025-10-02 00:16:24.390854', 'step': 3224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:24.451368', 'step': 3224, 'epoch': 1}
{'type': 'loss', 'content': 0.08715939521789551, 'timestamp': '2025-10-02 00:16:24.462385', 'step': 3225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:24.531512', 'step': 3225, 'epoch': 1}
{'type': 'loss', 'content': 0.051294710487127304, 'timestamp': '2025-10-02 00:16:24.534851', 'step': 3226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:24.601295', 'step': 3226, 'epoch': 1}
{'type': 'loss', 'content': 0.023445121943950653, 'timestamp': '2025-10-02 00:16:24.604231', 'step': 3227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:16:24.682587', 'step': 3227, 'epoch': 1}
{'type': 'loss', 'content': 0.024252736940979958, 'timestamp': '2025-10-02 00:16:24.694008', 'step': 3228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:24.761152', 'step': 3228, 'epoch': 1}
{'type': 'loss', 'content': 0.23641350865364075, 'timestamp': '2025-10-02 00:16:24.765160', 'step': 3229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:24.833493', 'step': 3229, 'epoch': 1}
{'type': 'loss', 'content': 0.03288678824901581, 'timestamp': '2025-10-02 00:16:24.843018', 'step': 3230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:24.907141', 'step': 3230, 'epoch': 1}
{'type': 'loss', 'content': 0.09498583525419235, 'timestamp': '2025-10-02 00:16:24.917168', 'step': 3231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:16:24.991804', 'step': 3231, 'epoch': 1}
{'type': 'loss', 'content': 0.050189271569252014, 'timestamp': '2025-10-02 00:16:25.004755', 'step': 3232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:16:25.095142', 'step': 3232, 'epoch': 1}
{'type': 'loss', 'content': 0.009920136071741581, 'timestamp': '2025-10-02 00:16:25.110304', 'step': 3233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:25.175127', 'step': 3233, 'epoch': 1}
{'type': 'loss', 'content': 0.06888124346733093, 'timestamp': '2025-10-02 00:16:25.182504', 'step': 3234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:25.244229', 'step': 3234, 'epoch': 1}
{'type': 'loss', 'content': 0.20761674642562866, 'timestamp': '2025-10-02 00:16:25.247626', 'step': 3235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:25.317020', 'step': 3235, 'epoch': 1}
{'type': 'loss', 'content': 0.1049831286072731, 'timestamp': '2025-10-02 00:16:25.324633', 'step': 3236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:25.395320', 'step': 3236, 'epoch': 1}
{'type': 'loss', 'content': 0.15576155483722687, 'timestamp': '2025-10-02 00:16:25.399006', 'step': 3237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:25.467188', 'step': 3237, 'epoch': 1}
{'type': 'loss', 'content': 0.05206795409321785, 'timestamp': '2025-10-02 00:16:25.473183', 'step': 3238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:25.531493', 'step': 3238, 'epoch': 1}
{'type': 'loss', 'content': 0.04654458165168762, 'timestamp': '2025-10-02 00:16:25.538791', 'step': 3239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:25.602689', 'step': 3239, 'epoch': 1}
{'type': 'loss', 'content': 0.19777381420135498, 'timestamp': '2025-10-02 00:16:25.610032', 'step': 3240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:25.678482', 'step': 3240, 'epoch': 1}
{'type': 'loss', 'content': 0.05217771977186203, 'timestamp': '2025-10-02 00:16:25.688045', 'step': 3241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:25.758849', 'step': 3241, 'epoch': 1}
{'type': 'loss', 'content': 0.061964474618434906, 'timestamp': '2025-10-02 00:16:25.768167', 'step': 3242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:25.828106', 'step': 3242, 'epoch': 1}
{'type': 'loss', 'content': 0.07085569202899933, 'timestamp': '2025-10-02 00:16:25.834071', 'step': 3243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:25.906490', 'step': 3243, 'epoch': 1}
{'type': 'loss', 'content': 0.0454215444624424, 'timestamp': '2025-10-02 00:16:25.917430', 'step': 3244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:25.974005', 'step': 3244, 'epoch': 1}
{'type': 'loss', 'content': 0.08374835550785065, 'timestamp': '2025-10-02 00:16:25.977913', 'step': 3245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:26.047905', 'step': 3245, 'epoch': 1}
{'type': 'loss', 'content': 0.1284724920988083, 'timestamp': '2025-10-02 00:16:26.051144', 'step': 3246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:26.113987', 'step': 3246, 'epoch': 1}
{'type': 'loss', 'content': 0.07675955444574356, 'timestamp': '2025-10-02 00:16:26.123557', 'step': 3247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:26.202818', 'step': 3247, 'epoch': 1}
{'type': 'loss', 'content': 0.09183729439973831, 'timestamp': '2025-10-02 00:16:26.211142', 'step': 3248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:26.275447', 'step': 3248, 'epoch': 1}
{'type': 'loss', 'content': 0.04889379069209099, 'timestamp': '2025-10-02 00:16:26.286807', 'step': 3249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:26.355281', 'step': 3249, 'epoch': 1}
{'type': 'loss', 'content': 0.09514402598142624, 'timestamp': '2025-10-02 00:16:26.363913', 'step': 3250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:16:26.456656', 'step': 3250, 'epoch': 1}
{'type': 'loss', 'content': 0.032791055738925934, 'timestamp': '2025-10-02 00:16:26.469118', 'step': 3251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:26.526512', 'step': 3251, 'epoch': 1}
{'type': 'loss', 'content': 0.10013554990291595, 'timestamp': '2025-10-02 00:16:26.533433', 'step': 3252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:26.602813', 'step': 3252, 'epoch': 1}
{'type': 'loss', 'content': 0.13790762424468994, 'timestamp': '2025-10-02 00:16:26.610588', 'step': 3253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:26.672717', 'step': 3253, 'epoch': 1}
{'type': 'loss', 'content': 0.11913052201271057, 'timestamp': '2025-10-02 00:16:26.675426', 'step': 3254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:26.734930', 'step': 3254, 'epoch': 1}
{'type': 'loss', 'content': 0.028963245451450348, 'timestamp': '2025-10-02 00:16:26.742285', 'step': 3255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:26.815225', 'step': 3255, 'epoch': 1}
{'type': 'loss', 'content': 0.09103704243898392, 'timestamp': '2025-10-02 00:16:26.832608', 'step': 3256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:16:26.899243', 'step': 3256, 'epoch': 1}
{'type': 'loss', 'content': 0.03354820981621742, 'timestamp': '2025-10-02 00:16:26.910804', 'step': 3257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:26.981057', 'step': 3257, 'epoch': 1}
{'type': 'loss', 'content': 0.044087447226047516, 'timestamp': '2025-10-02 00:16:26.988585', 'step': 3258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:27.046489', 'step': 3258, 'epoch': 1}
{'type': 'loss', 'content': 0.11209124326705933, 'timestamp': '2025-10-02 00:16:27.050382', 'step': 3259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:27.112879', 'step': 3259, 'epoch': 1}
{'type': 'loss', 'content': 0.10453023761510849, 'timestamp': '2025-10-02 00:16:27.123240', 'step': 3260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:27.178831', 'step': 3260, 'epoch': 1}
{'type': 'loss', 'content': 0.03012738563120365, 'timestamp': '2025-10-02 00:16:27.181772', 'step': 3261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:16:27.263877', 'step': 3261, 'epoch': 1}
{'type': 'loss', 'content': 0.06451787799596786, 'timestamp': '2025-10-02 00:16:27.276385', 'step': 3262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:27.333148', 'step': 3262, 'epoch': 1}
{'type': 'loss', 'content': 0.023798029869794846, 'timestamp': '2025-10-02 00:16:27.340602', 'step': 3263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:27.402701', 'step': 3263, 'epoch': 1}
{'type': 'loss', 'content': 0.22268874943256378, 'timestamp': '2025-10-02 00:16:27.409350', 'step': 3264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:27.464953', 'step': 3264, 'epoch': 1}
{'type': 'loss', 'content': 0.12752006947994232, 'timestamp': '2025-10-02 00:16:27.468375', 'step': 3265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:27.532931', 'step': 3265, 'epoch': 1}
{'type': 'loss', 'content': 0.07142588496208191, 'timestamp': '2025-10-02 00:16:27.536067', 'step': 3266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:27.603450', 'step': 3266, 'epoch': 1}
{'type': 'loss', 'content': 0.10700857639312744, 'timestamp': '2025-10-02 00:16:27.606125', 'step': 3267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:27.675181', 'step': 3267, 'epoch': 1}
{'type': 'loss', 'content': 0.047946274280548096, 'timestamp': '2025-10-02 00:16:27.686149', 'step': 3268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:27.748164', 'step': 3268, 'epoch': 1}
{'type': 'loss', 'content': 0.11100776493549347, 'timestamp': '2025-10-02 00:16:27.754512', 'step': 3269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:27.814294', 'step': 3269, 'epoch': 1}
{'type': 'loss', 'content': 0.020900005474686623, 'timestamp': '2025-10-02 00:16:27.831638', 'step': 3270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:27.926214', 'step': 3270, 'epoch': 1}
{'type': 'loss', 'content': 0.056262362748384476, 'timestamp': '2025-10-02 00:16:27.936693', 'step': 3271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:28.000586', 'step': 3271, 'epoch': 1}
{'type': 'loss', 'content': 0.07657552510499954, 'timestamp': '2025-10-02 00:16:28.013086', 'step': 3272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:28.081898', 'step': 3272, 'epoch': 1}
{'type': 'loss', 'content': 0.04830339178442955, 'timestamp': '2025-10-02 00:16:28.089645', 'step': 3273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:28.166234', 'step': 3273, 'epoch': 1}
{'type': 'loss', 'content': 0.08675441145896912, 'timestamp': '2025-10-02 00:16:28.172051', 'step': 3274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:28.245308', 'step': 3274, 'epoch': 1}
{'type': 'loss', 'content': 0.15939144790172577, 'timestamp': '2025-10-02 00:16:28.253965', 'step': 3275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:28.319410', 'step': 3275, 'epoch': 1}
{'type': 'loss', 'content': 0.16077958047389984, 'timestamp': '2025-10-02 00:16:28.345150', 'step': 3276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:28.422957', 'step': 3276, 'epoch': 1}
{'type': 'loss', 'content': 0.038612768054008484, 'timestamp': '2025-10-02 00:16:28.443974', 'step': 3277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:28.519943', 'step': 3277, 'epoch': 1}
{'type': 'loss', 'content': 0.09084003418684006, 'timestamp': '2025-10-02 00:16:28.526211', 'step': 3278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:28.589735', 'step': 3278, 'epoch': 1}
{'type': 'loss', 'content': 0.139791801571846, 'timestamp': '2025-10-02 00:16:28.610400', 'step': 3279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:28.714304', 'step': 3279, 'epoch': 1}
{'type': 'loss', 'content': 0.16343426704406738, 'timestamp': '2025-10-02 00:16:28.725256', 'step': 3280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:28.781047', 'step': 3280, 'epoch': 1}
{'type': 'loss', 'content': 0.12090911716222763, 'timestamp': '2025-10-02 00:16:28.786074', 'step': 3281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:28.847488', 'step': 3281, 'epoch': 1}
{'type': 'loss', 'content': 0.16206300258636475, 'timestamp': '2025-10-02 00:16:28.850343', 'step': 3282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:28.914667', 'step': 3282, 'epoch': 1}
{'type': 'loss', 'content': 0.1816808432340622, 'timestamp': '2025-10-02 00:16:28.917026', 'step': 3283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:28.974030', 'step': 3283, 'epoch': 1}
{'type': 'loss', 'content': 0.17485541105270386, 'timestamp': '2025-10-02 00:16:28.980008', 'step': 3284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:29.035159', 'step': 3284, 'epoch': 1}
{'type': 'loss', 'content': 0.1353181004524231, 'timestamp': '2025-10-02 00:16:29.038080', 'step': 3285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:29.102064', 'step': 3285, 'epoch': 1}
{'type': 'loss', 'content': 0.032321032136678696, 'timestamp': '2025-10-02 00:16:29.112568', 'step': 3286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:29.168349', 'step': 3286, 'epoch': 1}
{'type': 'loss', 'content': 0.11189167946577072, 'timestamp': '2025-10-02 00:16:29.174092', 'step': 3287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:29.235749', 'step': 3287, 'epoch': 1}
{'type': 'loss', 'content': 0.036147620528936386, 'timestamp': '2025-10-02 00:16:29.247049', 'step': 3288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:29.302214', 'step': 3288, 'epoch': 1}
{'type': 'loss', 'content': 0.1318899691104889, 'timestamp': '2025-10-02 00:16:29.304809', 'step': 3289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:29.359322', 'step': 3289, 'epoch': 1}
{'type': 'loss', 'content': 0.15975293517112732, 'timestamp': '2025-10-02 00:16:29.362210', 'step': 3290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:29.418261', 'step': 3290, 'epoch': 1}
{'type': 'loss', 'content': 0.050677571445703506, 'timestamp': '2025-10-02 00:16:29.426346', 'step': 3291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:29.481957', 'step': 3291, 'epoch': 1}
{'type': 'loss', 'content': 0.02505640685558319, 'timestamp': '2025-10-02 00:16:29.487913', 'step': 3292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:29.542855', 'step': 3292, 'epoch': 1}
{'type': 'loss', 'content': 0.05145549029111862, 'timestamp': '2025-10-02 00:16:29.545490', 'step': 3293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:29.608816', 'step': 3293, 'epoch': 1}
{'type': 'loss', 'content': 0.08436483144760132, 'timestamp': '2025-10-02 00:16:29.619670', 'step': 3294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:29.675384', 'step': 3294, 'epoch': 1}
{'type': 'loss', 'content': 0.03478206321597099, 'timestamp': '2025-10-02 00:16:29.681125', 'step': 3295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:29.738065', 'step': 3295, 'epoch': 1}
{'type': 'loss', 'content': 0.07549624890089035, 'timestamp': '2025-10-02 00:16:29.746269', 'step': 3296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:29.800819', 'step': 3296, 'epoch': 1}
{'type': 'loss', 'content': 0.08867033571004868, 'timestamp': '2025-10-02 00:16:29.810573', 'step': 3297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:29.878486', 'step': 3297, 'epoch': 1}
{'type': 'loss', 'content': 0.15239174664020538, 'timestamp': '2025-10-02 00:16:29.881231', 'step': 3298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:29.943014', 'step': 3298, 'epoch': 1}
{'type': 'loss', 'content': 0.08430057764053345, 'timestamp': '2025-10-02 00:16:29.955559', 'step': 3299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:30.019758', 'step': 3299, 'epoch': 1}
{'type': 'loss', 'content': 0.19669322669506073, 'timestamp': '2025-10-02 00:16:30.030545', 'step': 3300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:30.089058', 'step': 3300, 'epoch': 1}
{'type': 'loss', 'content': 0.05542071536183357, 'timestamp': '2025-10-02 00:16:30.102383', 'step': 3301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:30.168485', 'step': 3301, 'epoch': 1}
{'type': 'loss', 'content': 0.1492169350385666, 'timestamp': '2025-10-02 00:16:30.171874', 'step': 3302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:30.228318', 'step': 3302, 'epoch': 1}
{'type': 'loss', 'content': 0.06351093202829361, 'timestamp': '2025-10-02 00:16:30.237575', 'step': 3303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:30.296778', 'step': 3303, 'epoch': 1}
{'type': 'loss', 'content': 0.06027602404356003, 'timestamp': '2025-10-02 00:16:30.303055', 'step': 3304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:30.370797', 'step': 3304, 'epoch': 1}
{'type': 'loss', 'content': 0.21131767332553864, 'timestamp': '2025-10-02 00:16:30.378696', 'step': 3305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:30.445099', 'step': 3305, 'epoch': 1}
{'type': 'loss', 'content': 0.11200606822967529, 'timestamp': '2025-10-02 00:16:30.448685', 'step': 3306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:30.506421', 'step': 3306, 'epoch': 1}
{'type': 'loss', 'content': 0.14899134635925293, 'timestamp': '2025-10-02 00:16:30.509507', 'step': 3307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:30.572839', 'step': 3307, 'epoch': 1}
{'type': 'loss', 'content': 0.046606481075286865, 'timestamp': '2025-10-02 00:16:30.581488', 'step': 3308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:30.645605', 'step': 3308, 'epoch': 1}
{'type': 'loss', 'content': 0.03656366840004921, 'timestamp': '2025-10-02 00:16:30.657635', 'step': 3309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:30.714722', 'step': 3309, 'epoch': 1}
{'type': 'loss', 'content': 0.09629138559103012, 'timestamp': '2025-10-02 00:16:30.724555', 'step': 3310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:30.791766', 'step': 3310, 'epoch': 1}
{'type': 'loss', 'content': 0.08172976970672607, 'timestamp': '2025-10-02 00:16:30.796060', 'step': 3311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:30.858370', 'step': 3311, 'epoch': 1}
{'type': 'loss', 'content': 0.04156043007969856, 'timestamp': '2025-10-02 00:16:30.868750', 'step': 3312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:30.927445', 'step': 3312, 'epoch': 1}
{'type': 'loss', 'content': 0.06536725908517838, 'timestamp': '2025-10-02 00:16:30.933560', 'step': 3313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:30.998100', 'step': 3313, 'epoch': 1}
{'type': 'loss', 'content': 0.061014898121356964, 'timestamp': '2025-10-02 00:16:31.007726', 'step': 3314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:31.073485', 'step': 3314, 'epoch': 1}
{'type': 'loss', 'content': 0.08478789776563644, 'timestamp': '2025-10-02 00:16:31.083458', 'step': 3315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:31.148004', 'step': 3315, 'epoch': 1}
{'type': 'loss', 'content': 0.03381863608956337, 'timestamp': '2025-10-02 00:16:31.162080', 'step': 3316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:31.222341', 'step': 3316, 'epoch': 1}
{'type': 'loss', 'content': 0.08840856701135635, 'timestamp': '2025-10-02 00:16:31.230352', 'step': 3317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:31.287461', 'step': 3317, 'epoch': 1}
{'type': 'loss', 'content': 0.035115256905555725, 'timestamp': '2025-10-02 00:16:31.297264', 'step': 3318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:31.354375', 'step': 3318, 'epoch': 1}
{'type': 'loss', 'content': 0.09285124391317368, 'timestamp': '2025-10-02 00:16:31.363931', 'step': 3319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:31.426099', 'step': 3319, 'epoch': 1}
{'type': 'loss', 'content': 0.014451994560658932, 'timestamp': '2025-10-02 00:16:31.436516', 'step': 3320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:31.495901', 'step': 3320, 'epoch': 1}
{'type': 'loss', 'content': 0.13680846989154816, 'timestamp': '2025-10-02 00:16:31.498981', 'step': 3321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:31.555657', 'step': 3321, 'epoch': 1}
{'type': 'loss', 'content': 0.20840245485305786, 'timestamp': '2025-10-02 00:16:31.558701', 'step': 3322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:31.613829', 'step': 3322, 'epoch': 1}
{'type': 'loss', 'content': 0.08493027836084366, 'timestamp': '2025-10-02 00:16:31.617390', 'step': 3323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:31.677364', 'step': 3323, 'epoch': 1}
{'type': 'loss', 'content': 0.13885584473609924, 'timestamp': '2025-10-02 00:16:31.683894', 'step': 3324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:31.744731', 'step': 3324, 'epoch': 1}
{'type': 'loss', 'content': 0.1835612952709198, 'timestamp': '2025-10-02 00:16:31.751891', 'step': 3325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:31.819755', 'step': 3325, 'epoch': 1}
{'type': 'loss', 'content': 0.2114746868610382, 'timestamp': '2025-10-02 00:16:31.827012', 'step': 3326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:31.894346', 'step': 3326, 'epoch': 1}
{'type': 'loss', 'content': 0.09685677289962769, 'timestamp': '2025-10-02 00:16:31.897534', 'step': 3327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:31.951918', 'step': 3327, 'epoch': 1}
{'type': 'loss', 'content': 0.12298233062028885, 'timestamp': '2025-10-02 00:16:31.962987', 'step': 3328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:32.023196', 'step': 3328, 'epoch': 1}
{'type': 'loss', 'content': 0.04980278015136719, 'timestamp': '2025-10-02 00:16:32.031332', 'step': 3329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:16:32.102750', 'step': 3329, 'epoch': 1}
{'type': 'loss', 'content': 0.0175505131483078, 'timestamp': '2025-10-02 00:16:32.115030', 'step': 3330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:32.176588', 'step': 3330, 'epoch': 1}
{'type': 'loss', 'content': 0.04394356906414032, 'timestamp': '2025-10-02 00:16:32.179768', 'step': 3331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:32.240757', 'step': 3331, 'epoch': 1}
{'type': 'loss', 'content': 0.09571880102157593, 'timestamp': '2025-10-02 00:16:32.247804', 'step': 3332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:32.309847', 'step': 3332, 'epoch': 1}
{'type': 'loss', 'content': 0.024914627894759178, 'timestamp': '2025-10-02 00:16:32.321360', 'step': 3333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:32.378177', 'step': 3333, 'epoch': 1}
{'type': 'loss', 'content': 0.15764722228050232, 'timestamp': '2025-10-02 00:16:32.380894', 'step': 3334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:32.437503', 'step': 3334, 'epoch': 1}
{'type': 'loss', 'content': 0.08018159121274948, 'timestamp': '2025-10-02 00:16:32.440713', 'step': 3335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:32.502129', 'step': 3335, 'epoch': 1}
{'type': 'loss', 'content': 0.04542016610503197, 'timestamp': '2025-10-02 00:16:32.508858', 'step': 3336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:32.565515', 'step': 3336, 'epoch': 1}
{'type': 'loss', 'content': 0.14627866446971893, 'timestamp': '2025-10-02 00:16:32.568677', 'step': 3337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:32.628270', 'step': 3337, 'epoch': 1}
{'type': 'loss', 'content': 0.09203272312879562, 'timestamp': '2025-10-02 00:16:32.630833', 'step': 3338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:32.692036', 'step': 3338, 'epoch': 1}
{'type': 'loss', 'content': 0.10825269669294357, 'timestamp': '2025-10-02 00:16:32.697986', 'step': 3339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:32.755653', 'step': 3339, 'epoch': 1}
{'type': 'loss', 'content': 0.021570397540926933, 'timestamp': '2025-10-02 00:16:32.763905', 'step': 3340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:32.825432', 'step': 3340, 'epoch': 1}
{'type': 'loss', 'content': 0.043530095368623734, 'timestamp': '2025-10-02 00:16:32.828765', 'step': 3341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:16:32.907430', 'step': 3341, 'epoch': 1}
{'type': 'loss', 'content': 0.034877147525548935, 'timestamp': '2025-10-02 00:16:32.919676', 'step': 3342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:32.973954', 'step': 3342, 'epoch': 1}
{'type': 'loss', 'content': 0.07234896719455719, 'timestamp': '2025-10-02 00:16:32.981655', 'step': 3343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:33.035408', 'step': 3343, 'epoch': 1}
{'type': 'loss', 'content': 0.04850541427731514, 'timestamp': '2025-10-02 00:16:33.041546', 'step': 3344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:33.094197', 'step': 3344, 'epoch': 1}
{'type': 'loss', 'content': 0.2169206440448761, 'timestamp': '2025-10-02 00:16:33.097049', 'step': 3345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:33.151445', 'step': 3345, 'epoch': 1}
{'type': 'loss', 'content': 0.04241948947310448, 'timestamp': '2025-10-02 00:16:33.154134', 'step': 3346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:33.208448', 'step': 3346, 'epoch': 1}
{'type': 'loss', 'content': 0.026039322838187218, 'timestamp': '2025-10-02 00:16:33.211524', 'step': 3347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:33.266464', 'step': 3347, 'epoch': 1}
{'type': 'loss', 'content': 0.1897670030593872, 'timestamp': '2025-10-02 00:16:33.273023', 'step': 3348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:33.325492', 'step': 3348, 'epoch': 1}
{'type': 'loss', 'content': 0.13780449330806732, 'timestamp': '2025-10-02 00:16:33.328005', 'step': 3349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:33.381653', 'step': 3349, 'epoch': 1}
{'type': 'loss', 'content': 0.06723484396934509, 'timestamp': '2025-10-02 00:16:33.384334', 'step': 3350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:33.438412', 'step': 3350, 'epoch': 1}
{'type': 'loss', 'content': 0.16584093868732452, 'timestamp': '2025-10-02 00:16:33.440848', 'step': 3351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:33.494918', 'step': 3351, 'epoch': 1}
{'type': 'loss', 'content': 0.06400594115257263, 'timestamp': '2025-10-02 00:16:33.501128', 'step': 3352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:33.554578', 'step': 3352, 'epoch': 1}
{'type': 'loss', 'content': 0.03698129579424858, 'timestamp': '2025-10-02 00:16:33.559152', 'step': 3353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:33.614462', 'step': 3353, 'epoch': 1}
{'type': 'loss', 'content': 0.13891921937465668, 'timestamp': '2025-10-02 00:16:33.617047', 'step': 3354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:33.670902', 'step': 3354, 'epoch': 1}
{'type': 'loss', 'content': 0.21484604477882385, 'timestamp': '2025-10-02 00:16:33.674833', 'step': 3355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:33.729651', 'step': 3355, 'epoch': 1}
{'type': 'loss', 'content': 0.07955854386091232, 'timestamp': '2025-10-02 00:16:33.736562', 'step': 3356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:33.790564', 'step': 3356, 'epoch': 1}
{'type': 'loss', 'content': 0.0913255512714386, 'timestamp': '2025-10-02 00:16:33.800179', 'step': 3357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:33.856243', 'step': 3357, 'epoch': 1}
{'type': 'loss', 'content': 0.11888182163238525, 'timestamp': '2025-10-02 00:16:33.865762', 'step': 3358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:33.920903', 'step': 3358, 'epoch': 1}
{'type': 'loss', 'content': 0.25152334570884705, 'timestamp': '2025-10-02 00:16:33.924783', 'step': 3359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:33.980925', 'step': 3359, 'epoch': 1}
{'type': 'loss', 'content': 0.11044470220804214, 'timestamp': '2025-10-02 00:16:33.987723', 'step': 3360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:34.044710', 'step': 3360, 'epoch': 1}
{'type': 'loss', 'content': 0.05029069632291794, 'timestamp': '2025-10-02 00:16:34.050744', 'step': 3361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:34.104743', 'step': 3361, 'epoch': 1}
{'type': 'loss', 'content': 0.14321506023406982, 'timestamp': '2025-10-02 00:16:34.108091', 'step': 3362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:34.164195', 'step': 3362, 'epoch': 1}
{'type': 'loss', 'content': 0.06298617273569107, 'timestamp': '2025-10-02 00:16:34.166842', 'step': 3363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:34.222135', 'step': 3363, 'epoch': 1}
{'type': 'loss', 'content': 0.0772046223282814, 'timestamp': '2025-10-02 00:16:34.229402', 'step': 3364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:34.287410', 'step': 3364, 'epoch': 1}
{'type': 'loss', 'content': 0.17651775479316711, 'timestamp': '2025-10-02 00:16:34.290722', 'step': 3365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:34.344358', 'step': 3365, 'epoch': 1}
{'type': 'loss', 'content': 0.0940762534737587, 'timestamp': '2025-10-02 00:16:34.349123', 'step': 3366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:34.403502', 'step': 3366, 'epoch': 1}
{'type': 'loss', 'content': 0.0976867824792862, 'timestamp': '2025-10-02 00:16:34.406568', 'step': 3367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:34.461599', 'step': 3367, 'epoch': 1}
{'type': 'loss', 'content': 0.11278791725635529, 'timestamp': '2025-10-02 00:16:34.474557', 'step': 3368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:34.536506', 'step': 3368, 'epoch': 1}
{'type': 'loss', 'content': 0.2623804807662964, 'timestamp': '2025-10-02 00:16:34.541944', 'step': 3369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:34.611583', 'step': 3369, 'epoch': 1}
{'type': 'loss', 'content': 0.044868312776088715, 'timestamp': '2025-10-02 00:16:34.621392', 'step': 3370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:34.676105', 'step': 3370, 'epoch': 1}
{'type': 'loss', 'content': 0.22093108296394348, 'timestamp': '2025-10-02 00:16:34.679106', 'step': 3371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:34.735062', 'step': 3371, 'epoch': 1}
{'type': 'loss', 'content': 0.09264977276325226, 'timestamp': '2025-10-02 00:16:34.742972', 'step': 3372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:34.801242', 'step': 3372, 'epoch': 1}
{'type': 'loss', 'content': 0.1037052571773529, 'timestamp': '2025-10-02 00:16:34.804398', 'step': 3373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:34.862684', 'step': 3373, 'epoch': 1}
{'type': 'loss', 'content': 0.06012518331408501, 'timestamp': '2025-10-02 00:16:34.868813', 'step': 3374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:34.932075', 'step': 3374, 'epoch': 1}
{'type': 'loss', 'content': 0.13268017768859863, 'timestamp': '2025-10-02 00:16:34.941555', 'step': 3375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:34.998886', 'step': 3375, 'epoch': 1}
{'type': 'loss', 'content': 0.16111735999584198, 'timestamp': '2025-10-02 00:16:35.005853', 'step': 3376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:35.059276', 'step': 3376, 'epoch': 1}
{'type': 'loss', 'content': 0.15672606229782104, 'timestamp': '2025-10-02 00:16:35.062066', 'step': 3377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:35.122441', 'step': 3377, 'epoch': 1}
{'type': 'loss', 'content': 0.24855414032936096, 'timestamp': '2025-10-02 00:16:35.125298', 'step': 3378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:35.180057', 'step': 3378, 'epoch': 1}
{'type': 'loss', 'content': 0.2775211036205292, 'timestamp': '2025-10-02 00:16:35.184623', 'step': 3379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:35.245995', 'step': 3379, 'epoch': 1}
{'type': 'loss', 'content': 0.19469822943210602, 'timestamp': '2025-10-02 00:16:35.252853', 'step': 3380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:35.308950', 'step': 3380, 'epoch': 1}
{'type': 'loss', 'content': 0.016364391893148422, 'timestamp': '2025-10-02 00:16:35.311609', 'step': 3381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:35.366923', 'step': 3381, 'epoch': 1}
{'type': 'loss', 'content': 0.12519647181034088, 'timestamp': '2025-10-02 00:16:35.369491', 'step': 3382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:35.423811', 'step': 3382, 'epoch': 1}
{'type': 'loss', 'content': 0.045022670179605484, 'timestamp': '2025-10-02 00:16:35.429922', 'step': 3383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:35.483606', 'step': 3383, 'epoch': 1}
{'type': 'loss', 'content': 0.11324120312929153, 'timestamp': '2025-10-02 00:16:35.489711', 'step': 3384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:35.543354', 'step': 3384, 'epoch': 1}
{'type': 'loss', 'content': 0.017407411709427834, 'timestamp': '2025-10-02 00:16:35.553812', 'step': 3385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:35.607919', 'step': 3385, 'epoch': 1}
{'type': 'loss', 'content': 0.1857532560825348, 'timestamp': '2025-10-02 00:16:35.610582', 'step': 3386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:35.666177', 'step': 3386, 'epoch': 1}
{'type': 'loss', 'content': 0.1716051548719406, 'timestamp': '2025-10-02 00:16:35.675966', 'step': 3387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:35.729551', 'step': 3387, 'epoch': 1}
{'type': 'loss', 'content': 0.25640350580215454, 'timestamp': '2025-10-02 00:16:35.736687', 'step': 3388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:35.790271', 'step': 3388, 'epoch': 1}
{'type': 'loss', 'content': 0.23026596009731293, 'timestamp': '2025-10-02 00:16:35.792965', 'step': 3389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:35.848091', 'step': 3389, 'epoch': 1}
{'type': 'loss', 'content': 0.09472615271806717, 'timestamp': '2025-10-02 00:16:35.850735', 'step': 3390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:35.904856', 'step': 3390, 'epoch': 1}
{'type': 'loss', 'content': 0.23631541430950165, 'timestamp': '2025-10-02 00:16:35.907329', 'step': 3391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:35.961487', 'step': 3391, 'epoch': 1}
{'type': 'loss', 'content': 0.0542706735432148, 'timestamp': '2025-10-02 00:16:35.971808', 'step': 3392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:36.025162', 'step': 3392, 'epoch': 1}
{'type': 'loss', 'content': 0.05186980590224266, 'timestamp': '2025-10-02 00:16:36.027431', 'step': 3393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:36.081008', 'step': 3393, 'epoch': 1}
{'type': 'loss', 'content': 0.056459616869688034, 'timestamp': '2025-10-02 00:16:36.083518', 'step': 3394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:36.141177', 'step': 3394, 'epoch': 1}
{'type': 'loss', 'content': 0.046167366206645966, 'timestamp': '2025-10-02 00:16:36.143757', 'step': 3395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:36.197746', 'step': 3395, 'epoch': 1}
{'type': 'loss', 'content': 0.059785038232803345, 'timestamp': '2025-10-02 00:16:36.204801', 'step': 3396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:36.261995', 'step': 3396, 'epoch': 1}
{'type': 'loss', 'content': 0.035734280943870544, 'timestamp': '2025-10-02 00:16:36.273175', 'step': 3397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:36.328881', 'step': 3397, 'epoch': 1}
{'type': 'loss', 'content': 0.12525895237922668, 'timestamp': '2025-10-02 00:16:36.331458', 'step': 3398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:36.385551', 'step': 3398, 'epoch': 1}
{'type': 'loss', 'content': 0.08218186348676682, 'timestamp': '2025-10-02 00:16:36.387962', 'step': 3399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:36.442070', 'step': 3399, 'epoch': 1}
{'type': 'loss', 'content': 0.056969884783029556, 'timestamp': '2025-10-02 00:16:36.448379', 'step': 3400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:36.501881', 'step': 3400, 'epoch': 1}
{'type': 'loss', 'content': 0.11319843679666519, 'timestamp': '2025-10-02 00:16:36.504383', 'step': 3401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:36.558410', 'step': 3401, 'epoch': 1}
{'type': 'loss', 'content': 0.06973704695701599, 'timestamp': '2025-10-02 00:16:36.566075', 'step': 3402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:16:36.620767', 'step': 3402, 'epoch': 1}
{'type': 'loss', 'content': 0.16080811619758606, 'timestamp': '2025-10-02 00:16:36.623467', 'step': 3403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:36.677933', 'step': 3403, 'epoch': 1}
{'type': 'loss', 'content': 0.07426884770393372, 'timestamp': '2025-10-02 00:16:36.685017', 'step': 3404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:36.738557', 'step': 3404, 'epoch': 1}
{'type': 'loss', 'content': 0.07930875569581985, 'timestamp': '2025-10-02 00:16:36.741128', 'step': 3405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:16:36.803678', 'step': 3405, 'epoch': 1}
{'type': 'loss', 'content': 0.06293985992670059, 'timestamp': '2025-10-02 00:16:36.814773', 'step': 3406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:36.869672', 'step': 3406, 'epoch': 1}
{'type': 'loss', 'content': 0.028578899800777435, 'timestamp': '2025-10-02 00:16:36.872577', 'step': 3407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:36.927117', 'step': 3407, 'epoch': 1}
{'type': 'loss', 'content': 0.10700511932373047, 'timestamp': '2025-10-02 00:16:36.933352', 'step': 3408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:36.993181', 'step': 3408, 'epoch': 1}
{'type': 'loss', 'content': 0.059530653059482574, 'timestamp': '2025-10-02 00:16:37.004730', 'step': 3409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:37.061440', 'step': 3409, 'epoch': 1}
{'type': 'loss', 'content': 0.1325787752866745, 'timestamp': '2025-10-02 00:16:37.071219', 'step': 3410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:37.126127', 'step': 3410, 'epoch': 1}
{'type': 'loss', 'content': 0.0412631593644619, 'timestamp': '2025-10-02 00:16:37.128986', 'step': 3411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:37.184001', 'step': 3411, 'epoch': 1}
{'type': 'loss', 'content': 0.06972920894622803, 'timestamp': '2025-10-02 00:16:37.190792', 'step': 3412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:37.243831', 'step': 3412, 'epoch': 1}
{'type': 'loss', 'content': 0.15862038731575012, 'timestamp': '2025-10-02 00:16:37.246577', 'step': 3413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:37.300944', 'step': 3413, 'epoch': 1}
{'type': 'loss', 'content': 0.016877854242920876, 'timestamp': '2025-10-02 00:16:37.308878', 'step': 3414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:37.364384', 'step': 3414, 'epoch': 1}
{'type': 'loss', 'content': 0.04374057799577713, 'timestamp': '2025-10-02 00:16:37.373851', 'step': 3415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:37.428716', 'step': 3415, 'epoch': 1}
{'type': 'loss', 'content': 0.04738054797053337, 'timestamp': '2025-10-02 00:16:37.434626', 'step': 3416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:37.489068', 'step': 3416, 'epoch': 1}
{'type': 'loss', 'content': 0.028622381389141083, 'timestamp': '2025-10-02 00:16:37.499587', 'step': 3417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:37.553401', 'step': 3417, 'epoch': 1}
{'type': 'loss', 'content': 0.16677990555763245, 'timestamp': '2025-10-02 00:16:37.556058', 'step': 3418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:16:37.615648', 'step': 3418, 'epoch': 1}
{'type': 'loss', 'content': 0.02353201061487198, 'timestamp': '2025-10-02 00:16:37.626040', 'step': 3419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:37.682093', 'step': 3419, 'epoch': 1}
{'type': 'loss', 'content': 0.025852104648947716, 'timestamp': '2025-10-02 00:16:37.692467', 'step': 3420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:37.745976', 'step': 3420, 'epoch': 1}
{'type': 'loss', 'content': 0.08822530508041382, 'timestamp': '2025-10-02 00:16:37.752131', 'step': 3421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:16:37.828667', 'step': 3421, 'epoch': 1}
{'type': 'loss', 'content': 0.023524854332208633, 'timestamp': '2025-10-02 00:16:37.842503', 'step': 3422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:37.898429', 'step': 3422, 'epoch': 1}
{'type': 'loss', 'content': 0.12231595069169998, 'timestamp': '2025-10-02 00:16:37.900888', 'step': 3423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:37.954805', 'step': 3423, 'epoch': 1}
{'type': 'loss', 'content': 0.1910809874534607, 'timestamp': '2025-10-02 00:16:37.960858', 'step': 3424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:38.014098', 'step': 3424, 'epoch': 1}
{'type': 'loss', 'content': 0.05232015624642372, 'timestamp': '2025-10-02 00:16:38.020254', 'step': 3425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:38.075708', 'step': 3425, 'epoch': 1}
{'type': 'loss', 'content': 0.07959769666194916, 'timestamp': '2025-10-02 00:16:38.085493', 'step': 3426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:16:38.147598', 'step': 3426, 'epoch': 1}
{'type': 'loss', 'content': 0.12176575511693954, 'timestamp': '2025-10-02 00:16:38.150757', 'step': 3427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:38.204813', 'step': 3427, 'epoch': 1}
{'type': 'loss', 'content': 0.08939111232757568, 'timestamp': '2025-10-02 00:16:38.211306', 'step': 3428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:38.265028', 'step': 3428, 'epoch': 1}
{'type': 'loss', 'content': 0.12243319302797318, 'timestamp': '2025-10-02 00:16:38.272838', 'step': 3429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:38.326915', 'step': 3429, 'epoch': 1}
{'type': 'loss', 'content': 0.0484192930161953, 'timestamp': '2025-10-02 00:16:38.329553', 'step': 3430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:38.384872', 'step': 3430, 'epoch': 1}
{'type': 'loss', 'content': 0.030377764254808426, 'timestamp': '2025-10-02 00:16:38.388051', 'step': 3431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:38.442250', 'step': 3431, 'epoch': 1}
{'type': 'loss', 'content': 0.1068345233798027, 'timestamp': '2025-10-02 00:16:38.448514', 'step': 3432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:38.503658', 'step': 3432, 'epoch': 1}
{'type': 'loss', 'content': 0.11627218872308731, 'timestamp': '2025-10-02 00:16:38.506473', 'step': 3433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:38.560778', 'step': 3433, 'epoch': 1}
{'type': 'loss', 'content': 0.03749888017773628, 'timestamp': '2025-10-02 00:16:38.570395', 'step': 3434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:38.625115', 'step': 3434, 'epoch': 1}
{'type': 'loss', 'content': 0.07950504124164581, 'timestamp': '2025-10-02 00:16:38.627675', 'step': 3435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:38.681662', 'step': 3435, 'epoch': 1}
{'type': 'loss', 'content': 0.14345115423202515, 'timestamp': '2025-10-02 00:16:38.688138', 'step': 3436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:38.741077', 'step': 3436, 'epoch': 1}
{'type': 'loss', 'content': 0.08256131410598755, 'timestamp': '2025-10-02 00:16:38.743962', 'step': 3437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:38.799173', 'step': 3437, 'epoch': 1}
{'type': 'loss', 'content': 0.06131211668252945, 'timestamp': '2025-10-02 00:16:38.805291', 'step': 3438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:16:38.859042', 'step': 3438, 'epoch': 1}
{'type': 'loss', 'content': 0.1543990671634674, 'timestamp': '2025-10-02 00:16:38.861548', 'step': 3439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:38.916107', 'step': 3439, 'epoch': 1}
{'type': 'loss', 'content': 0.10926114022731781, 'timestamp': '2025-10-02 00:16:38.926654', 'step': 3440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:16:38.979783', 'step': 3440, 'epoch': 1}
{'type': 'loss', 'content': 0.16683506965637207, 'timestamp': '2025-10-02 00:16:38.982579', 'step': 3441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:39.036812', 'step': 3441, 'epoch': 1}
{'type': 'loss', 'content': 0.10872256010770798, 'timestamp': '2025-10-02 00:16:39.043001', 'step': 3442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:39.097636', 'step': 3442, 'epoch': 1}
{'type': 'loss', 'content': 0.04126454517245293, 'timestamp': '2025-10-02 00:16:39.105255', 'step': 3443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:39.160073', 'step': 3443, 'epoch': 1}
{'type': 'loss', 'content': 0.11484353989362717, 'timestamp': '2025-10-02 00:16:39.170668', 'step': 3444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:39.224499', 'step': 3444, 'epoch': 1}
{'type': 'loss', 'content': 0.09590211510658264, 'timestamp': '2025-10-02 00:16:39.226852', 'step': 3445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:39.280168', 'step': 3445, 'epoch': 1}
{'type': 'loss', 'content': 0.10360702127218246, 'timestamp': '2025-10-02 00:16:39.282999', 'step': 3446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:39.337396', 'step': 3446, 'epoch': 1}
{'type': 'loss', 'content': 0.037899766117334366, 'timestamp': '2025-10-02 00:16:39.343462', 'step': 3447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:39.398140', 'step': 3447, 'epoch': 1}
{'type': 'loss', 'content': 0.0970192402601242, 'timestamp': '2025-10-02 00:16:39.404783', 'step': 3448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:39.464165', 'step': 3448, 'epoch': 1}
{'type': 'loss', 'content': 0.053468093276023865, 'timestamp': '2025-10-02 00:16:39.475726', 'step': 3449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:39.530488', 'step': 3449, 'epoch': 1}
{'type': 'loss', 'content': 0.03757613152265549, 'timestamp': '2025-10-02 00:16:39.539560', 'step': 3450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:39.592935', 'step': 3450, 'epoch': 1}
{'type': 'loss', 'content': 0.061480987817049026, 'timestamp': '2025-10-02 00:16:39.595648', 'step': 3451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:16:39.649044', 'step': 3451, 'epoch': 1}
{'type': 'loss', 'content': 0.1279759258031845, 'timestamp': '2025-10-02 00:16:39.654897', 'step': 3452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:39.708175', 'step': 3452, 'epoch': 1}
{'type': 'loss', 'content': 0.12401583790779114, 'timestamp': '2025-10-02 00:16:39.710863', 'step': 3453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:39.764040', 'step': 3453, 'epoch': 1}
{'type': 'loss', 'content': 0.17120689153671265, 'timestamp': '2025-10-02 00:16:39.766418', 'step': 3454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:39.820245', 'step': 3454, 'epoch': 1}
{'type': 'loss', 'content': 0.09015755355358124, 'timestamp': '2025-10-02 00:16:39.822662', 'step': 3455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:39.877788', 'step': 3455, 'epoch': 1}
{'type': 'loss', 'content': 0.10302483290433884, 'timestamp': '2025-10-02 00:16:39.884208', 'step': 3456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:39.937311', 'step': 3456, 'epoch': 1}
{'type': 'loss', 'content': 0.08343497663736343, 'timestamp': '2025-10-02 00:16:39.947022', 'step': 3457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:40.006094', 'step': 3457, 'epoch': 1}
{'type': 'loss', 'content': 0.2221549153327942, 'timestamp': '2025-10-02 00:16:40.011024', 'step': 3458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:40.071349', 'step': 3458, 'epoch': 1}
{'type': 'loss', 'content': 0.0653543695807457, 'timestamp': '2025-10-02 00:16:40.075526', 'step': 3459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:40.136850', 'step': 3459, 'epoch': 1}
{'type': 'loss', 'content': 0.2651744484901428, 'timestamp': '2025-10-02 00:16:40.145138', 'step': 3460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:40.204349', 'step': 3460, 'epoch': 1}
{'type': 'loss', 'content': 0.2343621701002121, 'timestamp': '2025-10-02 00:16:40.209286', 'step': 3461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:16:40.268085', 'step': 3461, 'epoch': 1}
{'type': 'loss', 'content': 0.03380569443106651, 'timestamp': '2025-10-02 00:16:40.273417', 'step': 3462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:40.331405', 'step': 3462, 'epoch': 1}
{'type': 'loss', 'content': 0.07059112936258316, 'timestamp': '2025-10-02 00:16:40.335635', 'step': 3463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:40.393352', 'step': 3463, 'epoch': 1}
{'type': 'loss', 'content': 0.18455491960048676, 'timestamp': '2025-10-02 00:16:40.400209', 'step': 3464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:40.455829', 'step': 3464, 'epoch': 1}
{'type': 'loss', 'content': 0.031088877469301224, 'timestamp': '2025-10-02 00:16:40.465720', 'step': 3465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:40.523891', 'step': 3465, 'epoch': 1}
{'type': 'loss', 'content': 0.07949759811162949, 'timestamp': '2025-10-02 00:16:40.531735', 'step': 3466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:40.590484', 'step': 3466, 'epoch': 1}
{'type': 'loss', 'content': 0.1193723976612091, 'timestamp': '2025-10-02 00:16:40.594853', 'step': 3467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:40.653426', 'step': 3467, 'epoch': 1}
{'type': 'loss', 'content': 0.026666833087801933, 'timestamp': '2025-10-02 00:16:40.666031', 'step': 3468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:16:40.725774', 'step': 3468, 'epoch': 1}
{'type': 'loss', 'content': 0.06307578086853027, 'timestamp': '2025-10-02 00:16:40.733517', 'step': 3469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:16:40.788137', 'step': 3469, 'epoch': 1}
{'type': 'loss', 'content': 0.06863671541213989, 'timestamp': '2025-10-02 00:16:40.797758', 'step': 3470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:16:40.856508', 'step': 3470, 'epoch': 1}
{'type': 'loss', 'content': 0.10256770998239517, 'timestamp': '2025-10-02 00:16:40.859952', 'step': 3471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:16:40.918445', 'step': 3471, 'epoch': 1}
{'type': 'loss', 'content': 0.05801917612552643, 'timestamp': '2025-10-02 00:16:40.924638', 'step': 3472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:16:40.980721', 'step': 3472, 'epoch': 1}
{'type': 'loss', 'content': 0.11690127849578857, 'timestamp': '2025-10-02 00:16:40.988458', 'step': 3473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:41.061181', 'step': 3473, 'epoch': 1}
{'type': 'loss', 'content': 0.18562930822372437, 'timestamp': '2025-10-02 00:16:41.065120', 'step': 3474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:16:41.138180', 'step': 3474, 'epoch': 1}
{'type': 'loss', 'content': 0.07410893589258194, 'timestamp': '2025-10-02 00:16:41.144266', 'step': 3475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:16:41.225242', 'step': 3475, 'epoch': 1}
{'type': 'loss', 'content': 0.08607055246829987, 'timestamp': '2025-10-02 00:16:41.236744', 'step': 3476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:41.307685', 'step': 3476, 'epoch': 1}
{'type': 'loss', 'content': 0.15054750442504883, 'timestamp': '2025-10-02 00:16:41.316346', 'step': 3477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:16:41.385893', 'step': 3477, 'epoch': 1}
{'type': 'loss', 'content': 0.22314508259296417, 'timestamp': '2025-10-02 00:16:41.395250', 'step': 3478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:16:41.460218', 'step': 3478, 'epoch': 1}
{'type': 'loss', 'content': 0.2183569073677063, 'timestamp': '2025-10-02 00:16:41.463572', 'step': 3479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:16:41.520601', 'step': 3479, 'epoch': 1}
{'type': 'loss', 'content': 0.06304951012134552, 'timestamp': '2025-10-02 00:16:41.531167', 'step': 3480, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:17:10.963017', 'step': 3480, 'epoch': 1}
{'type': 'pplx', 'content': 95.13218681239886, 'timestamp': '2025-10-02 00:17:10.969708', 'step': 3480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:11.037749', 'step': 3480, 'epoch': 1}
{'type': 'loss', 'content': 0.08012573421001434, 'timestamp': '2025-10-02 00:17:11.041243', 'step': 3481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:11.102767', 'step': 3481, 'epoch': 1}
{'type': 'loss', 'content': 0.09661233425140381, 'timestamp': '2025-10-02 00:17:11.109563', 'step': 3482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:11.172076', 'step': 3482, 'epoch': 1}
{'type': 'loss', 'content': 0.04372018203139305, 'timestamp': '2025-10-02 00:17:11.177673', 'step': 3483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:11.237343', 'step': 3483, 'epoch': 1}
{'type': 'loss', 'content': 0.07955443859100342, 'timestamp': '2025-10-02 00:17:11.247453', 'step': 3484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:11.321799', 'step': 3484, 'epoch': 1}
{'type': 'loss', 'content': 0.06051128730177879, 'timestamp': '2025-10-02 00:17:11.333155', 'step': 3485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:11.392451', 'step': 3485, 'epoch': 1}
{'type': 'loss', 'content': 0.04495798051357269, 'timestamp': '2025-10-02 00:17:11.398261', 'step': 3486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:11.461491', 'step': 3486, 'epoch': 1}
{'type': 'loss', 'content': 0.0237730722874403, 'timestamp': '2025-10-02 00:17:11.471709', 'step': 3487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:11.530019', 'step': 3487, 'epoch': 1}
{'type': 'loss', 'content': 0.04884130135178566, 'timestamp': '2025-10-02 00:17:11.540183', 'step': 3488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:17:11.600420', 'step': 3488, 'epoch': 1}
{'type': 'loss', 'content': 0.16666346788406372, 'timestamp': '2025-10-02 00:17:11.612575', 'step': 3489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:11.672066', 'step': 3489, 'epoch': 1}
{'type': 'loss', 'content': 0.030708255246281624, 'timestamp': '2025-10-02 00:17:11.682773', 'step': 3490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:11.750113', 'step': 3490, 'epoch': 1}
{'type': 'loss', 'content': 0.03453683480620384, 'timestamp': '2025-10-02 00:17:11.757228', 'step': 3491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:11.825318', 'step': 3491, 'epoch': 1}
{'type': 'loss', 'content': 0.08354008197784424, 'timestamp': '2025-10-02 00:17:11.838732', 'step': 3492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:11.897121', 'step': 3492, 'epoch': 1}
{'type': 'loss', 'content': 0.2016654759645462, 'timestamp': '2025-10-02 00:17:11.907498', 'step': 3493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:11.967946', 'step': 3493, 'epoch': 1}
{'type': 'loss', 'content': 0.06959253549575806, 'timestamp': '2025-10-02 00:17:11.975443', 'step': 3494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:12.039230', 'step': 3494, 'epoch': 1}
{'type': 'loss', 'content': 0.1357230395078659, 'timestamp': '2025-10-02 00:17:12.048766', 'step': 3495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:12.112674', 'step': 3495, 'epoch': 1}
{'type': 'loss', 'content': 0.04099995642900467, 'timestamp': '2025-10-02 00:17:12.121663', 'step': 3496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:12.201356', 'step': 3496, 'epoch': 1}
{'type': 'loss', 'content': 0.06617061793804169, 'timestamp': '2025-10-02 00:17:12.206630', 'step': 3497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:12.283587', 'step': 3497, 'epoch': 1}
{'type': 'loss', 'content': 0.09967679530382156, 'timestamp': '2025-10-02 00:17:12.288830', 'step': 3498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:12.351612', 'step': 3498, 'epoch': 1}
{'type': 'loss', 'content': 0.07873359322547913, 'timestamp': '2025-10-02 00:17:12.357432', 'step': 3499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:12.418351', 'step': 3499, 'epoch': 1}
{'type': 'loss', 'content': 0.0605613999068737, 'timestamp': '2025-10-02 00:17:12.428529', 'step': 3500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-10-02 00:17:12.897825', 'step': 3500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:12.954201', 'step': 3500, 'epoch': 1}
{'type': 'loss', 'content': 0.1479041427373886, 'timestamp': '2025-10-02 00:17:12.959816', 'step': 3501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:13.024248', 'step': 3501, 'epoch': 1}
{'type': 'loss', 'content': 0.09627340734004974, 'timestamp': '2025-10-02 00:17:13.029337', 'step': 3502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:13.093248', 'step': 3502, 'epoch': 1}
{'type': 'loss', 'content': 0.1544325053691864, 'timestamp': '2025-10-02 00:17:13.098910', 'step': 3503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:13.159812', 'step': 3503, 'epoch': 1}
{'type': 'loss', 'content': 0.06685947626829147, 'timestamp': '2025-10-02 00:17:13.168583', 'step': 3504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:13.230206', 'step': 3504, 'epoch': 1}
{'type': 'loss', 'content': 0.12763817608356476, 'timestamp': '2025-10-02 00:17:13.237037', 'step': 3505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:13.339422', 'step': 3505, 'epoch': 1}
{'type': 'loss', 'content': 0.2508942186832428, 'timestamp': '2025-10-02 00:17:13.345565', 'step': 3506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:17:13.407469', 'step': 3506, 'epoch': 1}
{'type': 'loss', 'content': 0.21342714130878448, 'timestamp': '2025-10-02 00:17:13.412436', 'step': 3507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:13.482527', 'step': 3507, 'epoch': 1}
{'type': 'loss', 'content': 0.044643569737672806, 'timestamp': '2025-10-02 00:17:13.494003', 'step': 3508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:13.566834', 'step': 3508, 'epoch': 1}
{'type': 'loss', 'content': 0.0920066386461258, 'timestamp': '2025-10-02 00:17:13.576427', 'step': 3509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:13.636872', 'step': 3509, 'epoch': 1}
{'type': 'loss', 'content': 0.03253348916769028, 'timestamp': '2025-10-02 00:17:13.646284', 'step': 3510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:13.733174', 'step': 3510, 'epoch': 1}
{'type': 'loss', 'content': 0.08264441788196564, 'timestamp': '2025-10-02 00:17:13.735965', 'step': 3511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:13.806346', 'step': 3511, 'epoch': 1}
{'type': 'loss', 'content': 0.040593840181827545, 'timestamp': '2025-10-02 00:17:13.814334', 'step': 3512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:13.890749', 'step': 3512, 'epoch': 1}
{'type': 'loss', 'content': 0.08477672189474106, 'timestamp': '2025-10-02 00:17:13.893817', 'step': 3513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:13.984344', 'step': 3513, 'epoch': 1}
{'type': 'loss', 'content': 0.0619804672896862, 'timestamp': '2025-10-02 00:17:13.999264', 'step': 3514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:14.073865', 'step': 3514, 'epoch': 1}
{'type': 'loss', 'content': 0.08133523911237717, 'timestamp': '2025-10-02 00:17:14.088864', 'step': 3515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:14.173914', 'step': 3515, 'epoch': 1}
{'type': 'loss', 'content': 0.04942615330219269, 'timestamp': '2025-10-02 00:17:14.183437', 'step': 3516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:14.254806', 'step': 3516, 'epoch': 1}
{'type': 'loss', 'content': 0.14728236198425293, 'timestamp': '2025-10-02 00:17:14.272562', 'step': 3517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:14.361848', 'step': 3517, 'epoch': 1}
{'type': 'loss', 'content': 0.0989396870136261, 'timestamp': '2025-10-02 00:17:14.365070', 'step': 3518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:14.433586', 'step': 3518, 'epoch': 1}
{'type': 'loss', 'content': 0.10558510571718216, 'timestamp': '2025-10-02 00:17:14.437357', 'step': 3519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:14.511757', 'step': 3519, 'epoch': 1}
{'type': 'loss', 'content': 0.056660860776901245, 'timestamp': '2025-10-02 00:17:14.519769', 'step': 3520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:17:14.578185', 'step': 3520, 'epoch': 1}
{'type': 'loss', 'content': 0.1268104910850525, 'timestamp': '2025-10-02 00:17:14.581843', 'step': 3521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:14.668168', 'step': 3521, 'epoch': 1}
{'type': 'loss', 'content': 0.05434032902121544, 'timestamp': '2025-10-02 00:17:14.674130', 'step': 3522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:14.734842', 'step': 3522, 'epoch': 1}
{'type': 'loss', 'content': 0.040825795382261276, 'timestamp': '2025-10-02 00:17:14.744424', 'step': 3523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:14.850013', 'step': 3523, 'epoch': 1}
{'type': 'loss', 'content': 0.06222623586654663, 'timestamp': '2025-10-02 00:17:14.860508', 'step': 3524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:14.951227', 'step': 3524, 'epoch': 1}
{'type': 'loss', 'content': 0.052201252430677414, 'timestamp': '2025-10-02 00:17:14.962138', 'step': 3525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:15.055935', 'step': 3525, 'epoch': 1}
{'type': 'loss', 'content': 0.11483067274093628, 'timestamp': '2025-10-02 00:17:15.059216', 'step': 3526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:15.128300', 'step': 3526, 'epoch': 1}
{'type': 'loss', 'content': 0.03653785213828087, 'timestamp': '2025-10-02 00:17:15.137700', 'step': 3527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:15.196375', 'step': 3527, 'epoch': 1}
{'type': 'loss', 'content': 0.02357574738562107, 'timestamp': '2025-10-02 00:17:15.204785', 'step': 3528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:15.269246', 'step': 3528, 'epoch': 1}
{'type': 'loss', 'content': 0.1292911022901535, 'timestamp': '2025-10-02 00:17:15.287863', 'step': 3529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:15.384964', 'step': 3529, 'epoch': 1}
{'type': 'loss', 'content': 0.25418415665626526, 'timestamp': '2025-10-02 00:17:15.397361', 'step': 3530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:15.460909', 'step': 3530, 'epoch': 1}
{'type': 'loss', 'content': 0.1608581840991974, 'timestamp': '2025-10-02 00:17:15.465638', 'step': 3531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:15.550753', 'step': 3531, 'epoch': 1}
{'type': 'loss', 'content': 0.03078160621225834, 'timestamp': '2025-10-02 00:17:15.569056', 'step': 3532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:15.640496', 'step': 3532, 'epoch': 1}
{'type': 'loss', 'content': 0.12844057381153107, 'timestamp': '2025-10-02 00:17:15.644668', 'step': 3533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:15.702495', 'step': 3533, 'epoch': 1}
{'type': 'loss', 'content': 0.08494465053081512, 'timestamp': '2025-10-02 00:17:15.716499', 'step': 3534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:15.788631', 'step': 3534, 'epoch': 1}
{'type': 'loss', 'content': 0.13060463964939117, 'timestamp': '2025-10-02 00:17:15.791604', 'step': 3535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:15.864101', 'step': 3535, 'epoch': 1}
{'type': 'loss', 'content': 0.0840449407696724, 'timestamp': '2025-10-02 00:17:15.874246', 'step': 3536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:15.932393', 'step': 3536, 'epoch': 1}
{'type': 'loss', 'content': 0.08081217110157013, 'timestamp': '2025-10-02 00:17:15.936444', 'step': 3537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:16.006207', 'step': 3537, 'epoch': 1}
{'type': 'loss', 'content': 0.0693044513463974, 'timestamp': '2025-10-02 00:17:16.026322', 'step': 3538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:16.123330', 'step': 3538, 'epoch': 1}
{'type': 'loss', 'content': 0.10237623006105423, 'timestamp': '2025-10-02 00:17:16.127740', 'step': 3539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:16.185870', 'step': 3539, 'epoch': 1}
{'type': 'loss', 'content': 0.1809401661157608, 'timestamp': '2025-10-02 00:17:16.192502', 'step': 3540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:16.276160', 'step': 3540, 'epoch': 1}
{'type': 'loss', 'content': 0.12953370809555054, 'timestamp': '2025-10-02 00:17:16.291635', 'step': 3541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:16.351858', 'step': 3541, 'epoch': 1}
{'type': 'loss', 'content': 0.04741934686899185, 'timestamp': '2025-10-02 00:17:16.355148', 'step': 3542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:16.428804', 'step': 3542, 'epoch': 1}
{'type': 'loss', 'content': 0.20827561616897583, 'timestamp': '2025-10-02 00:17:16.433291', 'step': 3543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:16.491700', 'step': 3543, 'epoch': 1}
{'type': 'loss', 'content': 0.057196568697690964, 'timestamp': '2025-10-02 00:17:16.499033', 'step': 3544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:16.576047', 'step': 3544, 'epoch': 1}
{'type': 'loss', 'content': 0.036262933164834976, 'timestamp': '2025-10-02 00:17:16.586275', 'step': 3545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:16.645671', 'step': 3545, 'epoch': 1}
{'type': 'loss', 'content': 0.14201629161834717, 'timestamp': '2025-10-02 00:17:16.650145', 'step': 3546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:16.733255', 'step': 3546, 'epoch': 1}
{'type': 'loss', 'content': 0.03330698609352112, 'timestamp': '2025-10-02 00:17:16.743403', 'step': 3547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:16.849729', 'step': 3547, 'epoch': 1}
{'type': 'loss', 'content': 0.03766588494181633, 'timestamp': '2025-10-02 00:17:16.860082', 'step': 3548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:16.948228', 'step': 3548, 'epoch': 1}
{'type': 'loss', 'content': 0.06897671520709991, 'timestamp': '2025-10-02 00:17:16.961601', 'step': 3549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:17.043255', 'step': 3549, 'epoch': 1}
{'type': 'loss', 'content': 0.13840781152248383, 'timestamp': '2025-10-02 00:17:17.056496', 'step': 3550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:17.143505', 'step': 3550, 'epoch': 1}
{'type': 'loss', 'content': 0.1507185995578766, 'timestamp': '2025-10-02 00:17:17.148354', 'step': 3551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:17.245290', 'step': 3551, 'epoch': 1}
{'type': 'loss', 'content': 0.12805376946926117, 'timestamp': '2025-10-02 00:17:17.261628', 'step': 3552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:17.354379', 'step': 3552, 'epoch': 1}
{'type': 'loss', 'content': 0.07397712767124176, 'timestamp': '2025-10-02 00:17:17.367242', 'step': 3553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:17.447887', 'step': 3553, 'epoch': 1}
{'type': 'loss', 'content': 0.11238791793584824, 'timestamp': '2025-10-02 00:17:17.453733', 'step': 3554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:17:17.539368', 'step': 3554, 'epoch': 1}
{'type': 'loss', 'content': 0.010457553900778294, 'timestamp': '2025-10-02 00:17:17.551662', 'step': 3555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:17.618211', 'step': 3555, 'epoch': 1}
{'type': 'loss', 'content': 0.06757643073797226, 'timestamp': '2025-10-02 00:17:17.625901', 'step': 3556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:17.697477', 'step': 3556, 'epoch': 1}
{'type': 'loss', 'content': 0.05863508582115173, 'timestamp': '2025-10-02 00:17:17.711571', 'step': 3557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:17.805947', 'step': 3557, 'epoch': 1}
{'type': 'loss', 'content': 0.052481960505247116, 'timestamp': '2025-10-02 00:17:17.816442', 'step': 3558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:17.874238', 'step': 3558, 'epoch': 1}
{'type': 'loss', 'content': 0.10026329010725021, 'timestamp': '2025-10-02 00:17:17.877613', 'step': 3559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:17.956481', 'step': 3559, 'epoch': 1}
{'type': 'loss', 'content': 0.01997382566332817, 'timestamp': '2025-10-02 00:17:17.973921', 'step': 3560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:18.041797', 'step': 3560, 'epoch': 1}
{'type': 'loss', 'content': 0.10816124081611633, 'timestamp': '2025-10-02 00:17:18.047181', 'step': 3561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:18.115601', 'step': 3561, 'epoch': 1}
{'type': 'loss', 'content': 0.08215479552745819, 'timestamp': '2025-10-02 00:17:18.131606', 'step': 3562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:18.214488', 'step': 3562, 'epoch': 1}
{'type': 'loss', 'content': 0.26579749584198, 'timestamp': '2025-10-02 00:17:18.218876', 'step': 3563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:18.301017', 'step': 3563, 'epoch': 1}
{'type': 'loss', 'content': 0.12795674800872803, 'timestamp': '2025-10-02 00:17:18.311310', 'step': 3564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:18.397374', 'step': 3564, 'epoch': 1}
{'type': 'loss', 'content': 0.04467695206403732, 'timestamp': '2025-10-02 00:17:18.404715', 'step': 3565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:17:18.481703', 'step': 3565, 'epoch': 1}
{'type': 'loss', 'content': 0.06070764362812042, 'timestamp': '2025-10-02 00:17:18.493607', 'step': 3566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:18.583281', 'step': 3566, 'epoch': 1}
{'type': 'loss', 'content': 0.06896286457777023, 'timestamp': '2025-10-02 00:17:18.600350', 'step': 3567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:18.695664', 'step': 3567, 'epoch': 1}
{'type': 'loss', 'content': 0.024902423843741417, 'timestamp': '2025-10-02 00:17:18.710339', 'step': 3568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:18.775326', 'step': 3568, 'epoch': 1}
{'type': 'loss', 'content': 0.034657981246709824, 'timestamp': '2025-10-02 00:17:18.781151', 'step': 3569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:18.877654', 'step': 3569, 'epoch': 1}
{'type': 'loss', 'content': 0.10525213181972504, 'timestamp': '2025-10-02 00:17:18.880299', 'step': 3570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:18.935964', 'step': 3570, 'epoch': 1}
{'type': 'loss', 'content': 0.09908117353916168, 'timestamp': '2025-10-02 00:17:18.944478', 'step': 3571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:19.023995', 'step': 3571, 'epoch': 1}
{'type': 'loss', 'content': 0.11952406167984009, 'timestamp': '2025-10-02 00:17:19.030685', 'step': 3572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:19.088793', 'step': 3572, 'epoch': 1}
{'type': 'loss', 'content': 0.10029573738574982, 'timestamp': '2025-10-02 00:17:19.092048', 'step': 3573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:19.154894', 'step': 3573, 'epoch': 1}
{'type': 'loss', 'content': 0.17719289660453796, 'timestamp': '2025-10-02 00:17:19.164610', 'step': 3574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:19.241744', 'step': 3574, 'epoch': 1}
{'type': 'loss', 'content': 0.10099747031927109, 'timestamp': '2025-10-02 00:17:19.249227', 'step': 3575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:19.318669', 'step': 3575, 'epoch': 1}
{'type': 'loss', 'content': 0.1109187975525856, 'timestamp': '2025-10-02 00:17:19.324788', 'step': 3576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:19.380780', 'step': 3576, 'epoch': 1}
{'type': 'loss', 'content': 0.0914861261844635, 'timestamp': '2025-10-02 00:17:19.388550', 'step': 3577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:19.460987', 'step': 3577, 'epoch': 1}
{'type': 'loss', 'content': 0.1598483920097351, 'timestamp': '2025-10-02 00:17:19.469119', 'step': 3578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:19.531923', 'step': 3578, 'epoch': 1}
{'type': 'loss', 'content': 0.06337632983922958, 'timestamp': '2025-10-02 00:17:19.541467', 'step': 3579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:19.614687', 'step': 3579, 'epoch': 1}
{'type': 'loss', 'content': 0.04744843766093254, 'timestamp': '2025-10-02 00:17:19.621568', 'step': 3580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:19.686491', 'step': 3580, 'epoch': 1}
{'type': 'loss', 'content': 0.1525101363658905, 'timestamp': '2025-10-02 00:17:19.689574', 'step': 3581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:19.746130', 'step': 3581, 'epoch': 1}
{'type': 'loss', 'content': 0.19882547855377197, 'timestamp': '2025-10-02 00:17:19.753618', 'step': 3582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:19.811730', 'step': 3582, 'epoch': 1}
{'type': 'loss', 'content': 0.10258365422487259, 'timestamp': '2025-10-02 00:17:19.817525', 'step': 3583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:19.874588', 'step': 3583, 'epoch': 1}
{'type': 'loss', 'content': 0.08613336086273193, 'timestamp': '2025-10-02 00:17:19.882638', 'step': 3584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:19.945148', 'step': 3584, 'epoch': 1}
{'type': 'loss', 'content': 0.18321742117404938, 'timestamp': '2025-10-02 00:17:19.954144', 'step': 3585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:20.026106', 'step': 3585, 'epoch': 1}
{'type': 'loss', 'content': 0.07626949995756149, 'timestamp': '2025-10-02 00:17:20.029390', 'step': 3586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:20.105014', 'step': 3586, 'epoch': 1}
{'type': 'loss', 'content': 0.05966716632246971, 'timestamp': '2025-10-02 00:17:20.110441', 'step': 3587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:20.168204', 'step': 3587, 'epoch': 1}
{'type': 'loss', 'content': 0.19042116403579712, 'timestamp': '2025-10-02 00:17:20.179454', 'step': 3588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:20.241788', 'step': 3588, 'epoch': 1}
{'type': 'loss', 'content': 0.12036707252264023, 'timestamp': '2025-10-02 00:17:20.252674', 'step': 3589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:20.336403', 'step': 3589, 'epoch': 1}
{'type': 'loss', 'content': 0.07872053235769272, 'timestamp': '2025-10-02 00:17:20.343590', 'step': 3590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:20.414801', 'step': 3590, 'epoch': 1}
{'type': 'loss', 'content': 0.08429456502199173, 'timestamp': '2025-10-02 00:17:20.417575', 'step': 3591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:20.474322', 'step': 3591, 'epoch': 1}
{'type': 'loss', 'content': 0.10573605448007584, 'timestamp': '2025-10-02 00:17:20.485655', 'step': 3592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:20.547462', 'step': 3592, 'epoch': 1}
{'type': 'loss', 'content': 0.014928151853382587, 'timestamp': '2025-10-02 00:17:20.557054', 'step': 3593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:20.634147', 'step': 3593, 'epoch': 1}
{'type': 'loss', 'content': 0.041533153504133224, 'timestamp': '2025-10-02 00:17:20.636809', 'step': 3594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:20.693493', 'step': 3594, 'epoch': 1}
{'type': 'loss', 'content': 0.0816073939204216, 'timestamp': '2025-10-02 00:17:20.703014', 'step': 3595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:20.763801', 'step': 3595, 'epoch': 1}
{'type': 'loss', 'content': 0.13603471219539642, 'timestamp': '2025-10-02 00:17:20.770189', 'step': 3596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:20.844959', 'step': 3596, 'epoch': 1}
{'type': 'loss', 'content': 0.030286194756627083, 'timestamp': '2025-10-02 00:17:20.857545', 'step': 3597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:20.921209', 'step': 3597, 'epoch': 1}
{'type': 'loss', 'content': 0.06246021389961243, 'timestamp': '2025-10-02 00:17:20.924404', 'step': 3598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:20.979656', 'step': 3598, 'epoch': 1}
{'type': 'loss', 'content': 0.05175982415676117, 'timestamp': '2025-10-02 00:17:20.989992', 'step': 3599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:21.046606', 'step': 3599, 'epoch': 1}
{'type': 'loss', 'content': 0.09486974775791168, 'timestamp': '2025-10-02 00:17:21.053175', 'step': 3600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:21.129159', 'step': 3600, 'epoch': 1}
{'type': 'loss', 'content': 0.04260920733213425, 'timestamp': '2025-10-02 00:17:21.135023', 'step': 3601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:21.198193', 'step': 3601, 'epoch': 1}
{'type': 'loss', 'content': 0.25673019886016846, 'timestamp': '2025-10-02 00:17:21.201119', 'step': 3602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:21.272982', 'step': 3602, 'epoch': 1}
{'type': 'loss', 'content': 0.05743242800235748, 'timestamp': '2025-10-02 00:17:21.283392', 'step': 3603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:21.351840', 'step': 3603, 'epoch': 1}
{'type': 'loss', 'content': 0.09290267527103424, 'timestamp': '2025-10-02 00:17:21.362509', 'step': 3604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:21.429301', 'step': 3604, 'epoch': 1}
{'type': 'loss', 'content': 0.022893445566296577, 'timestamp': '2025-10-02 00:17:21.440840', 'step': 3605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:21.504896', 'step': 3605, 'epoch': 1}
{'type': 'loss', 'content': 0.06440087407827377, 'timestamp': '2025-10-02 00:17:21.514381', 'step': 3606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:21.579528', 'step': 3606, 'epoch': 1}
{'type': 'loss', 'content': 0.07378284633159637, 'timestamp': '2025-10-02 00:17:21.587384', 'step': 3607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:21.657612', 'step': 3607, 'epoch': 1}
{'type': 'loss', 'content': 0.08808299899101257, 'timestamp': '2025-10-02 00:17:21.664847', 'step': 3608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:21.740835', 'step': 3608, 'epoch': 1}
{'type': 'loss', 'content': 0.09694086015224457, 'timestamp': '2025-10-02 00:17:21.752148', 'step': 3609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:21.815937', 'step': 3609, 'epoch': 1}
{'type': 'loss', 'content': 0.026186220347881317, 'timestamp': '2025-10-02 00:17:21.823731', 'step': 3610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:21.892723', 'step': 3610, 'epoch': 1}
{'type': 'loss', 'content': 0.0815078541636467, 'timestamp': '2025-10-02 00:17:21.900227', 'step': 3611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:21.955493', 'step': 3611, 'epoch': 1}
{'type': 'loss', 'content': 0.18223659694194794, 'timestamp': '2025-10-02 00:17:21.962303', 'step': 3612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:22.024924', 'step': 3612, 'epoch': 1}
{'type': 'loss', 'content': 0.033249631524086, 'timestamp': '2025-10-02 00:17:22.027460', 'step': 3613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:22.084856', 'step': 3613, 'epoch': 1}
{'type': 'loss', 'content': 0.16739241778850555, 'timestamp': '2025-10-02 00:17:22.094739', 'step': 3614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:22.170561', 'step': 3614, 'epoch': 1}
{'type': 'loss', 'content': 0.03493399918079376, 'timestamp': '2025-10-02 00:17:22.179926', 'step': 3615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:22.239314', 'step': 3615, 'epoch': 1}
{'type': 'loss', 'content': 0.12227141857147217, 'timestamp': '2025-10-02 00:17:22.245867', 'step': 3616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:22.305834', 'step': 3616, 'epoch': 1}
{'type': 'loss', 'content': 0.06639322638511658, 'timestamp': '2025-10-02 00:17:22.316841', 'step': 3617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:22.386384', 'step': 3617, 'epoch': 1}
{'type': 'loss', 'content': 0.07349053025245667, 'timestamp': '2025-10-02 00:17:22.397020', 'step': 3618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:22.464238', 'step': 3618, 'epoch': 1}
{'type': 'loss', 'content': 0.2553817331790924, 'timestamp': '2025-10-02 00:17:22.474113', 'step': 3619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:22.555703', 'step': 3619, 'epoch': 1}
{'type': 'loss', 'content': 0.11894406378269196, 'timestamp': '2025-10-02 00:17:22.562706', 'step': 3620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:22.635254', 'step': 3620, 'epoch': 1}
{'type': 'loss', 'content': 0.2106356918811798, 'timestamp': '2025-10-02 00:17:22.638412', 'step': 3621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:22.710527', 'step': 3621, 'epoch': 1}
{'type': 'loss', 'content': 0.03804435208439827, 'timestamp': '2025-10-02 00:17:22.720739', 'step': 3622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:22.776083', 'step': 3622, 'epoch': 1}
{'type': 'loss', 'content': 0.2310800403356552, 'timestamp': '2025-10-02 00:17:22.779489', 'step': 3623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:22.834653', 'step': 3623, 'epoch': 1}
{'type': 'loss', 'content': 0.09801953285932541, 'timestamp': '2025-10-02 00:17:22.840816', 'step': 3624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:22.898991', 'step': 3624, 'epoch': 1}
{'type': 'loss', 'content': 0.06350021809339523, 'timestamp': '2025-10-02 00:17:22.910008', 'step': 3625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:22.972195', 'step': 3625, 'epoch': 1}
{'type': 'loss', 'content': 0.0850193053483963, 'timestamp': '2025-10-02 00:17:22.976758', 'step': 3626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:23.039022', 'step': 3626, 'epoch': 1}
{'type': 'loss', 'content': 0.19302093982696533, 'timestamp': '2025-10-02 00:17:23.043182', 'step': 3627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:23.100714', 'step': 3627, 'epoch': 1}
{'type': 'loss', 'content': 0.1856294721364975, 'timestamp': '2025-10-02 00:17:23.109256', 'step': 3628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:23.165631', 'step': 3628, 'epoch': 1}
{'type': 'loss', 'content': 0.050280291587114334, 'timestamp': '2025-10-02 00:17:23.173069', 'step': 3629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:23.232881', 'step': 3629, 'epoch': 1}
{'type': 'loss', 'content': 0.11932142823934555, 'timestamp': '2025-10-02 00:17:23.235851', 'step': 3630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:23.302934', 'step': 3630, 'epoch': 1}
{'type': 'loss', 'content': 0.10035751014947891, 'timestamp': '2025-10-02 00:17:23.305392', 'step': 3631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:23.368655', 'step': 3631, 'epoch': 1}
{'type': 'loss', 'content': 0.058368533849716187, 'timestamp': '2025-10-02 00:17:23.378753', 'step': 3632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:23.447404', 'step': 3632, 'epoch': 1}
{'type': 'loss', 'content': 0.13268452882766724, 'timestamp': '2025-10-02 00:17:23.450787', 'step': 3633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:23.509896', 'step': 3633, 'epoch': 1}
{'type': 'loss', 'content': 0.11261636763811111, 'timestamp': '2025-10-02 00:17:23.512483', 'step': 3634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:23.575165', 'step': 3634, 'epoch': 1}
{'type': 'loss', 'content': 0.07871637493371964, 'timestamp': '2025-10-02 00:17:23.578224', 'step': 3635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:17:23.650157', 'step': 3635, 'epoch': 1}
{'type': 'loss', 'content': 0.00820187572389841, 'timestamp': '2025-10-02 00:17:23.662955', 'step': 3636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:23.723813', 'step': 3636, 'epoch': 1}
{'type': 'loss', 'content': 0.07216857373714447, 'timestamp': '2025-10-02 00:17:23.726253', 'step': 3637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:23.787207', 'step': 3637, 'epoch': 1}
{'type': 'loss', 'content': 0.21799875795841217, 'timestamp': '2025-10-02 00:17:23.792607', 'step': 3638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:23.853343', 'step': 3638, 'epoch': 1}
{'type': 'loss', 'content': 0.17567729949951172, 'timestamp': '2025-10-02 00:17:23.856847', 'step': 3639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:23.913829', 'step': 3639, 'epoch': 1}
{'type': 'loss', 'content': 0.12048877775669098, 'timestamp': '2025-10-02 00:17:23.921048', 'step': 3640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:23.979732', 'step': 3640, 'epoch': 1}
{'type': 'loss', 'content': 0.03240828961133957, 'timestamp': '2025-10-02 00:17:23.983225', 'step': 3641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:24.039949', 'step': 3641, 'epoch': 1}
{'type': 'loss', 'content': 0.24414324760437012, 'timestamp': '2025-10-02 00:17:24.043368', 'step': 3642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:24.102655', 'step': 3642, 'epoch': 1}
{'type': 'loss', 'content': 0.08025787770748138, 'timestamp': '2025-10-02 00:17:24.106384', 'step': 3643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:24.168758', 'step': 3643, 'epoch': 1}
{'type': 'loss', 'content': 0.07711812853813171, 'timestamp': '2025-10-02 00:17:24.174614', 'step': 3644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:24.228391', 'step': 3644, 'epoch': 1}
{'type': 'loss', 'content': 0.06468552350997925, 'timestamp': '2025-10-02 00:17:24.236057', 'step': 3645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:24.293097', 'step': 3645, 'epoch': 1}
{'type': 'loss', 'content': 0.05536209046840668, 'timestamp': '2025-10-02 00:17:24.302388', 'step': 3646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:17:24.371333', 'step': 3646, 'epoch': 1}
{'type': 'loss', 'content': 0.007238659076392651, 'timestamp': '2025-10-02 00:17:24.383322', 'step': 3647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:17:24.441606', 'step': 3647, 'epoch': 1}
{'type': 'loss', 'content': 0.26246973872184753, 'timestamp': '2025-10-02 00:17:24.447571', 'step': 3648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:24.504814', 'step': 3648, 'epoch': 1}
{'type': 'loss', 'content': 0.09041142463684082, 'timestamp': '2025-10-02 00:17:24.507177', 'step': 3649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:24.565868', 'step': 3649, 'epoch': 1}
{'type': 'loss', 'content': 0.07750171422958374, 'timestamp': '2025-10-02 00:17:24.573244', 'step': 3650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:24.642742', 'step': 3650, 'epoch': 1}
{'type': 'loss', 'content': 0.06109349802136421, 'timestamp': '2025-10-02 00:17:24.652267', 'step': 3651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:24.715751', 'step': 3651, 'epoch': 1}
{'type': 'loss', 'content': 0.13414210081100464, 'timestamp': '2025-10-02 00:17:24.725080', 'step': 3652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:24.785581', 'step': 3652, 'epoch': 1}
{'type': 'loss', 'content': 0.0659680962562561, 'timestamp': '2025-10-02 00:17:24.788725', 'step': 3653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:24.847021', 'step': 3653, 'epoch': 1}
{'type': 'loss', 'content': 0.15080304443836212, 'timestamp': '2025-10-02 00:17:24.849395', 'step': 3654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:24.906673', 'step': 3654, 'epoch': 1}
{'type': 'loss', 'content': 0.10824912041425705, 'timestamp': '2025-10-02 00:17:24.916012', 'step': 3655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:24.973580', 'step': 3655, 'epoch': 1}
{'type': 'loss', 'content': 0.03992723673582077, 'timestamp': '2025-10-02 00:17:24.981899', 'step': 3656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:25.039602', 'step': 3656, 'epoch': 1}
{'type': 'loss', 'content': 0.17773973941802979, 'timestamp': '2025-10-02 00:17:25.044387', 'step': 3657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:25.108304', 'step': 3657, 'epoch': 1}
{'type': 'loss', 'content': 0.05178146809339523, 'timestamp': '2025-10-02 00:17:25.112001', 'step': 3658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:25.168864', 'step': 3658, 'epoch': 1}
{'type': 'loss', 'content': 0.10123540461063385, 'timestamp': '2025-10-02 00:17:25.171559', 'step': 3659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:25.227009', 'step': 3659, 'epoch': 1}
{'type': 'loss', 'content': 0.0781107172369957, 'timestamp': '2025-10-02 00:17:25.234425', 'step': 3660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:25.290647', 'step': 3660, 'epoch': 1}
{'type': 'loss', 'content': 0.09300658106803894, 'timestamp': '2025-10-02 00:17:25.298261', 'step': 3661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:25.356138', 'step': 3661, 'epoch': 1}
{'type': 'loss', 'content': 0.04666030779480934, 'timestamp': '2025-10-02 00:17:25.363648', 'step': 3662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:25.424604', 'step': 3662, 'epoch': 1}
{'type': 'loss', 'content': 0.23778855800628662, 'timestamp': '2025-10-02 00:17:25.430480', 'step': 3663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:25.494206', 'step': 3663, 'epoch': 1}
{'type': 'loss', 'content': 0.0832764059305191, 'timestamp': '2025-10-02 00:17:25.503979', 'step': 3664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:25.574212', 'step': 3664, 'epoch': 1}
{'type': 'loss', 'content': 0.048396483063697815, 'timestamp': '2025-10-02 00:17:25.577479', 'step': 3665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:25.634888', 'step': 3665, 'epoch': 1}
{'type': 'loss', 'content': 0.1408805549144745, 'timestamp': '2025-10-02 00:17:25.639627', 'step': 3666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:17:25.710612', 'step': 3666, 'epoch': 1}
{'type': 'loss', 'content': 0.03735809028148651, 'timestamp': '2025-10-02 00:17:25.721423', 'step': 3667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:25.777657', 'step': 3667, 'epoch': 1}
{'type': 'loss', 'content': 0.07736866921186447, 'timestamp': '2025-10-02 00:17:25.787356', 'step': 3668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:25.853795', 'step': 3668, 'epoch': 1}
{'type': 'loss', 'content': 0.06043023243546486, 'timestamp': '2025-10-02 00:17:25.858529', 'step': 3669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:25.921340', 'step': 3669, 'epoch': 1}
{'type': 'loss', 'content': 0.0685015320777893, 'timestamp': '2025-10-02 00:17:25.925677', 'step': 3670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:25.981922', 'step': 3670, 'epoch': 1}
{'type': 'loss', 'content': 0.020142804831266403, 'timestamp': '2025-10-02 00:17:25.986541', 'step': 3671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:26.048792', 'step': 3671, 'epoch': 1}
{'type': 'loss', 'content': 0.04235563054680824, 'timestamp': '2025-10-02 00:17:26.057991', 'step': 3672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:26.118283', 'step': 3672, 'epoch': 1}
{'type': 'loss', 'content': 0.03555880859494209, 'timestamp': '2025-10-02 00:17:26.127899', 'step': 3673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:26.194323', 'step': 3673, 'epoch': 1}
{'type': 'loss', 'content': 0.055073004215955734, 'timestamp': '2025-10-02 00:17:26.197402', 'step': 3674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:26.259080', 'step': 3674, 'epoch': 1}
{'type': 'loss', 'content': 0.05152622610330582, 'timestamp': '2025-10-02 00:17:26.265122', 'step': 3675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:26.322647', 'step': 3675, 'epoch': 1}
{'type': 'loss', 'content': 0.017913514748215675, 'timestamp': '2025-10-02 00:17:26.332837', 'step': 3676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:26.394015', 'step': 3676, 'epoch': 1}
{'type': 'loss', 'content': 0.1131468340754509, 'timestamp': '2025-10-02 00:17:26.396408', 'step': 3677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:26.451638', 'step': 3677, 'epoch': 1}
{'type': 'loss', 'content': 0.15307098627090454, 'timestamp': '2025-10-02 00:17:26.458236', 'step': 3678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:26.527460', 'step': 3678, 'epoch': 1}
{'type': 'loss', 'content': 0.12107722461223602, 'timestamp': '2025-10-02 00:17:26.530451', 'step': 3679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:26.595719', 'step': 3679, 'epoch': 1}
{'type': 'loss', 'content': 0.1238706111907959, 'timestamp': '2025-10-02 00:17:26.606165', 'step': 3680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:26.668492', 'step': 3680, 'epoch': 1}
{'type': 'loss', 'content': 0.048721421509981155, 'timestamp': '2025-10-02 00:17:26.678076', 'step': 3681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:26.751509', 'step': 3681, 'epoch': 1}
{'type': 'loss', 'content': 0.07386402040719986, 'timestamp': '2025-10-02 00:17:26.757472', 'step': 3682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:26.822369', 'step': 3682, 'epoch': 1}
{'type': 'loss', 'content': 0.0942191481590271, 'timestamp': '2025-10-02 00:17:26.829707', 'step': 3683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:26.890167', 'step': 3683, 'epoch': 1}
{'type': 'loss', 'content': 0.011466837488114834, 'timestamp': '2025-10-02 00:17:26.900327', 'step': 3684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:26.963940', 'step': 3684, 'epoch': 1}
{'type': 'loss', 'content': 0.16135063767433167, 'timestamp': '2025-10-02 00:17:26.968372', 'step': 3685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:27.034491', 'step': 3685, 'epoch': 1}
{'type': 'loss', 'content': 0.03243781626224518, 'timestamp': '2025-10-02 00:17:27.044631', 'step': 3686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:27.112721', 'step': 3686, 'epoch': 1}
{'type': 'loss', 'content': 0.0930514708161354, 'timestamp': '2025-10-02 00:17:27.123364', 'step': 3687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:27.184934', 'step': 3687, 'epoch': 1}
{'type': 'loss', 'content': 0.0982215628027916, 'timestamp': '2025-10-02 00:17:27.194152', 'step': 3688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:27.257015', 'step': 3688, 'epoch': 1}
{'type': 'loss', 'content': 0.12813103199005127, 'timestamp': '2025-10-02 00:17:27.263373', 'step': 3689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:27.327578', 'step': 3689, 'epoch': 1}
{'type': 'loss', 'content': 0.12516285479068756, 'timestamp': '2025-10-02 00:17:27.334360', 'step': 3690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:27.391667', 'step': 3690, 'epoch': 1}
{'type': 'loss', 'content': 0.21477513015270233, 'timestamp': '2025-10-02 00:17:27.395404', 'step': 3691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:27.456483', 'step': 3691, 'epoch': 1}
{'type': 'loss', 'content': 0.17440921068191528, 'timestamp': '2025-10-02 00:17:27.468657', 'step': 3692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:27.538069', 'step': 3692, 'epoch': 1}
{'type': 'loss', 'content': 0.02153892256319523, 'timestamp': '2025-10-02 00:17:27.549638', 'step': 3693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:27.618484', 'step': 3693, 'epoch': 1}
{'type': 'loss', 'content': 0.0594460628926754, 'timestamp': '2025-10-02 00:17:27.628037', 'step': 3694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:27.704601', 'step': 3694, 'epoch': 1}
{'type': 'loss', 'content': 0.14456255733966827, 'timestamp': '2025-10-02 00:17:27.712577', 'step': 3695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:27.769941', 'step': 3695, 'epoch': 1}
{'type': 'loss', 'content': 0.11439046263694763, 'timestamp': '2025-10-02 00:17:27.777912', 'step': 3696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:27.857704', 'step': 3696, 'epoch': 1}
{'type': 'loss', 'content': 0.03457525745034218, 'timestamp': '2025-10-02 00:17:27.865360', 'step': 3697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:27.930169', 'step': 3697, 'epoch': 1}
{'type': 'loss', 'content': 0.2584243416786194, 'timestamp': '2025-10-02 00:17:27.933289', 'step': 3698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:27.995795', 'step': 3698, 'epoch': 1}
{'type': 'loss', 'content': 0.03816297650337219, 'timestamp': '2025-10-02 00:17:28.006047', 'step': 3699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:28.068882', 'step': 3699, 'epoch': 1}
{'type': 'loss', 'content': 0.17653979361057281, 'timestamp': '2025-10-02 00:17:28.076027', 'step': 3700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:28.132258', 'step': 3700, 'epoch': 1}
{'type': 'loss', 'content': 0.09757907688617706, 'timestamp': '2025-10-02 00:17:28.138140', 'step': 3701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:28.203599', 'step': 3701, 'epoch': 1}
{'type': 'loss', 'content': 0.09215644747018814, 'timestamp': '2025-10-02 00:17:28.206145', 'step': 3702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:28.283242', 'step': 3702, 'epoch': 1}
{'type': 'loss', 'content': 0.2599017322063446, 'timestamp': '2025-10-02 00:17:28.286476', 'step': 3703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:28.377209', 'step': 3703, 'epoch': 1}
{'type': 'loss', 'content': 0.20888330042362213, 'timestamp': '2025-10-02 00:17:28.392572', 'step': 3704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:28.458750', 'step': 3704, 'epoch': 1}
{'type': 'loss', 'content': 0.14584000408649445, 'timestamp': '2025-10-02 00:17:28.462996', 'step': 3705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:28.530811', 'step': 3705, 'epoch': 1}
{'type': 'loss', 'content': 0.05366871505975723, 'timestamp': '2025-10-02 00:17:28.534809', 'step': 3706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:28.599404', 'step': 3706, 'epoch': 1}
{'type': 'loss', 'content': 0.1322220265865326, 'timestamp': '2025-10-02 00:17:28.602354', 'step': 3707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:28.673903', 'step': 3707, 'epoch': 1}
{'type': 'loss', 'content': 0.055392757058143616, 'timestamp': '2025-10-02 00:17:28.680890', 'step': 3708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:28.755308', 'step': 3708, 'epoch': 1}
{'type': 'loss', 'content': 0.10855245590209961, 'timestamp': '2025-10-02 00:17:28.758850', 'step': 3709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:28.835473', 'step': 3709, 'epoch': 1}
{'type': 'loss', 'content': 0.25874102115631104, 'timestamp': '2025-10-02 00:17:28.838606', 'step': 3710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:28.919366', 'step': 3710, 'epoch': 1}
{'type': 'loss', 'content': 0.028015729039907455, 'timestamp': '2025-10-02 00:17:28.928931', 'step': 3711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:28.984838', 'step': 3711, 'epoch': 1}
{'type': 'loss', 'content': 0.09774913638830185, 'timestamp': '2025-10-02 00:17:28.991842', 'step': 3712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:29.056656', 'step': 3712, 'epoch': 1}
{'type': 'loss', 'content': 0.06690705567598343, 'timestamp': '2025-10-02 00:17:29.062477', 'step': 3713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:29.120846', 'step': 3713, 'epoch': 1}
{'type': 'loss', 'content': 0.061489131301641464, 'timestamp': '2025-10-02 00:17:29.124126', 'step': 3714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:29.189388', 'step': 3714, 'epoch': 1}
{'type': 'loss', 'content': 0.08683363348245621, 'timestamp': '2025-10-02 00:17:29.191866', 'step': 3715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:29.250409', 'step': 3715, 'epoch': 1}
{'type': 'loss', 'content': 0.06290589272975922, 'timestamp': '2025-10-02 00:17:29.257464', 'step': 3716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:29.318260', 'step': 3716, 'epoch': 1}
{'type': 'loss', 'content': 0.06319025158882141, 'timestamp': '2025-10-02 00:17:29.329281', 'step': 3717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:29.386170', 'step': 3717, 'epoch': 1}
{'type': 'loss', 'content': 0.11520813405513763, 'timestamp': '2025-10-02 00:17:29.395130', 'step': 3718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:29.462877', 'step': 3718, 'epoch': 1}
{'type': 'loss', 'content': 0.0489627830684185, 'timestamp': '2025-10-02 00:17:29.472198', 'step': 3719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:29.537961', 'step': 3719, 'epoch': 1}
{'type': 'loss', 'content': 0.0592774897813797, 'timestamp': '2025-10-02 00:17:29.549403', 'step': 3720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:29.607119', 'step': 3720, 'epoch': 1}
{'type': 'loss', 'content': 0.030785776674747467, 'timestamp': '2025-10-02 00:17:29.612502', 'step': 3721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:29.672036', 'step': 3721, 'epoch': 1}
{'type': 'loss', 'content': 0.03879401460289955, 'timestamp': '2025-10-02 00:17:29.678004', 'step': 3722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:29.733216', 'step': 3722, 'epoch': 1}
{'type': 'loss', 'content': 0.057963378727436066, 'timestamp': '2025-10-02 00:17:29.738828', 'step': 3723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:29.797143', 'step': 3723, 'epoch': 1}
{'type': 'loss', 'content': 0.06495518237352371, 'timestamp': '2025-10-02 00:17:29.805380', 'step': 3724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:29.861255', 'step': 3724, 'epoch': 1}
{'type': 'loss', 'content': 0.11881984770298004, 'timestamp': '2025-10-02 00:17:29.868746', 'step': 3725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:29.923692', 'step': 3725, 'epoch': 1}
{'type': 'loss', 'content': 0.20177605748176575, 'timestamp': '2025-10-02 00:17:29.926122', 'step': 3726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:29.984014', 'step': 3726, 'epoch': 1}
{'type': 'loss', 'content': 0.11929350346326828, 'timestamp': '2025-10-02 00:17:29.986366', 'step': 3727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:30.046176', 'step': 3727, 'epoch': 1}
{'type': 'loss', 'content': 0.08216357976198196, 'timestamp': '2025-10-02 00:17:30.054571', 'step': 3728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:30.111245', 'step': 3728, 'epoch': 1}
{'type': 'loss', 'content': 0.05336123704910278, 'timestamp': '2025-10-02 00:17:30.117154', 'step': 3729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:30.172603', 'step': 3729, 'epoch': 1}
{'type': 'loss', 'content': 0.02708597108721733, 'timestamp': '2025-10-02 00:17:30.176259', 'step': 3730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:30.236493', 'step': 3730, 'epoch': 1}
{'type': 'loss', 'content': 0.12366887927055359, 'timestamp': '2025-10-02 00:17:30.239098', 'step': 3731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:30.298502', 'step': 3731, 'epoch': 1}
{'type': 'loss', 'content': 0.20348894596099854, 'timestamp': '2025-10-02 00:17:30.304497', 'step': 3732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:30.364284', 'step': 3732, 'epoch': 1}
{'type': 'loss', 'content': 0.10420826077461243, 'timestamp': '2025-10-02 00:17:30.369462', 'step': 3733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:30.430089', 'step': 3733, 'epoch': 1}
{'type': 'loss', 'content': 0.07008524984121323, 'timestamp': '2025-10-02 00:17:30.433971', 'step': 3734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:30.489166', 'step': 3734, 'epoch': 1}
{'type': 'loss', 'content': 0.20905394852161407, 'timestamp': '2025-10-02 00:17:30.491640', 'step': 3735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:30.550328', 'step': 3735, 'epoch': 1}
{'type': 'loss', 'content': 0.027691373601555824, 'timestamp': '2025-10-02 00:17:30.556799', 'step': 3736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:30.628251', 'step': 3736, 'epoch': 1}
{'type': 'loss', 'content': 0.05609999969601631, 'timestamp': '2025-10-02 00:17:30.634150', 'step': 3737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:30.691670', 'step': 3737, 'epoch': 1}
{'type': 'loss', 'content': 0.12720003724098206, 'timestamp': '2025-10-02 00:17:30.698463', 'step': 3738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:30.761464', 'step': 3738, 'epoch': 1}
{'type': 'loss', 'content': 0.14555475115776062, 'timestamp': '2025-10-02 00:17:30.764840', 'step': 3739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:30.828784', 'step': 3739, 'epoch': 1}
{'type': 'loss', 'content': 0.0923926904797554, 'timestamp': '2025-10-02 00:17:30.837540', 'step': 3740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:30.892284', 'step': 3740, 'epoch': 1}
{'type': 'loss', 'content': 0.18803957104682922, 'timestamp': '2025-10-02 00:17:30.895712', 'step': 3741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:30.953860', 'step': 3741, 'epoch': 1}
{'type': 'loss', 'content': 0.11271754652261734, 'timestamp': '2025-10-02 00:17:30.957419', 'step': 3742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:31.012358', 'step': 3742, 'epoch': 1}
{'type': 'loss', 'content': 0.08874914795160294, 'timestamp': '2025-10-02 00:17:31.020775', 'step': 3743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:31.084355', 'step': 3743, 'epoch': 1}
{'type': 'loss', 'content': 0.0913572683930397, 'timestamp': '2025-10-02 00:17:31.091429', 'step': 3744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:31.155709', 'step': 3744, 'epoch': 1}
{'type': 'loss', 'content': 0.04164090380072594, 'timestamp': '2025-10-02 00:17:31.167033', 'step': 3745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:31.230309', 'step': 3745, 'epoch': 1}
{'type': 'loss', 'content': 0.07289707660675049, 'timestamp': '2025-10-02 00:17:31.232665', 'step': 3746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:31.302312', 'step': 3746, 'epoch': 1}
{'type': 'loss', 'content': 0.040180034935474396, 'timestamp': '2025-10-02 00:17:31.310897', 'step': 3747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:31.376073', 'step': 3747, 'epoch': 1}
{'type': 'loss', 'content': 0.07756977528333664, 'timestamp': '2025-10-02 00:17:31.386359', 'step': 3748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:31.457793', 'step': 3748, 'epoch': 1}
{'type': 'loss', 'content': 0.18129929900169373, 'timestamp': '2025-10-02 00:17:31.461236', 'step': 3749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:31.520363', 'step': 3749, 'epoch': 1}
{'type': 'loss', 'content': 0.038947030901908875, 'timestamp': '2025-10-02 00:17:31.527429', 'step': 3750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:31.590074', 'step': 3750, 'epoch': 1}
{'type': 'loss', 'content': 0.15521955490112305, 'timestamp': '2025-10-02 00:17:31.592389', 'step': 3751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:31.658643', 'step': 3751, 'epoch': 1}
{'type': 'loss', 'content': 0.09400232136249542, 'timestamp': '2025-10-02 00:17:31.666750', 'step': 3752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:31.721269', 'step': 3752, 'epoch': 1}
{'type': 'loss', 'content': 0.01655796729028225, 'timestamp': '2025-10-02 00:17:31.728837', 'step': 3753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:31.784850', 'step': 3753, 'epoch': 1}
{'type': 'loss', 'content': 0.11001144349575043, 'timestamp': '2025-10-02 00:17:31.787937', 'step': 3754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:31.844980', 'step': 3754, 'epoch': 1}
{'type': 'loss', 'content': 0.14244091510772705, 'timestamp': '2025-10-02 00:17:31.847780', 'step': 3755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:31.903113', 'step': 3755, 'epoch': 1}
{'type': 'loss', 'content': 0.11859466880559921, 'timestamp': '2025-10-02 00:17:31.909774', 'step': 3756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:31.964517', 'step': 3756, 'epoch': 1}
{'type': 'loss', 'content': 0.016057997941970825, 'timestamp': '2025-10-02 00:17:31.974008', 'step': 3757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:32.029205', 'step': 3757, 'epoch': 1}
{'type': 'loss', 'content': 0.03910151496529579, 'timestamp': '2025-10-02 00:17:32.038514', 'step': 3758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:32.093339', 'step': 3758, 'epoch': 1}
{'type': 'loss', 'content': 0.20310920476913452, 'timestamp': '2025-10-02 00:17:32.098389', 'step': 3759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:32.160060', 'step': 3759, 'epoch': 1}
{'type': 'loss', 'content': 0.10011488944292068, 'timestamp': '2025-10-02 00:17:32.166800', 'step': 3760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:32.230103', 'step': 3760, 'epoch': 1}
{'type': 'loss', 'content': 0.08401905745267868, 'timestamp': '2025-10-02 00:17:32.241011', 'step': 3761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:32.301914', 'step': 3761, 'epoch': 1}
{'type': 'loss', 'content': 0.057107292115688324, 'timestamp': '2025-10-02 00:17:32.307779', 'step': 3762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:32.372421', 'step': 3762, 'epoch': 1}
{'type': 'loss', 'content': 0.03818204253911972, 'timestamp': '2025-10-02 00:17:32.383106', 'step': 3763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:32.437391', 'step': 3763, 'epoch': 1}
{'type': 'loss', 'content': 0.2080957442522049, 'timestamp': '2025-10-02 00:17:32.443829', 'step': 3764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:32.498814', 'step': 3764, 'epoch': 1}
{'type': 'loss', 'content': 0.09641575068235397, 'timestamp': '2025-10-02 00:17:32.506196', 'step': 3765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:32.564092', 'step': 3765, 'epoch': 1}
{'type': 'loss', 'content': 0.02089432254433632, 'timestamp': '2025-10-02 00:17:32.567357', 'step': 3766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:32.623756', 'step': 3766, 'epoch': 1}
{'type': 'loss', 'content': 0.19546394050121307, 'timestamp': '2025-10-02 00:17:32.626733', 'step': 3767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:32.693695', 'step': 3767, 'epoch': 1}
{'type': 'loss', 'content': 0.027251005172729492, 'timestamp': '2025-10-02 00:17:32.704032', 'step': 3768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:32.759658', 'step': 3768, 'epoch': 1}
{'type': 'loss', 'content': 0.06433772295713425, 'timestamp': '2025-10-02 00:17:32.762075', 'step': 3769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:17:32.816087', 'step': 3769, 'epoch': 1}
{'type': 'loss', 'content': 0.22680673003196716, 'timestamp': '2025-10-02 00:17:32.818389', 'step': 3770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:32.875856', 'step': 3770, 'epoch': 1}
{'type': 'loss', 'content': 0.10125628113746643, 'timestamp': '2025-10-02 00:17:32.878412', 'step': 3771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:32.932933', 'step': 3771, 'epoch': 1}
{'type': 'loss', 'content': 0.09116201102733612, 'timestamp': '2025-10-02 00:17:32.939006', 'step': 3772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:32.994687', 'step': 3772, 'epoch': 1}
{'type': 'loss', 'content': 0.0416659414768219, 'timestamp': '2025-10-02 00:17:32.996997', 'step': 3773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:33.051273', 'step': 3773, 'epoch': 1}
{'type': 'loss', 'content': 0.11534595489501953, 'timestamp': '2025-10-02 00:17:33.053613', 'step': 3774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:33.108870', 'step': 3774, 'epoch': 1}
{'type': 'loss', 'content': 0.10273135453462601, 'timestamp': '2025-10-02 00:17:33.110924', 'step': 3775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:33.164826', 'step': 3775, 'epoch': 1}
{'type': 'loss', 'content': 0.0835469514131546, 'timestamp': '2025-10-02 00:17:33.171016', 'step': 3776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:33.224820', 'step': 3776, 'epoch': 1}
{'type': 'loss', 'content': 0.11057404428720474, 'timestamp': '2025-10-02 00:17:33.230750', 'step': 3777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:33.285172', 'step': 3777, 'epoch': 1}
{'type': 'loss', 'content': 0.17674008011817932, 'timestamp': '2025-10-02 00:17:33.295180', 'step': 3778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:33.350800', 'step': 3778, 'epoch': 1}
{'type': 'loss', 'content': 0.1055745929479599, 'timestamp': '2025-10-02 00:17:33.354980', 'step': 3779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:33.416696', 'step': 3779, 'epoch': 1}
{'type': 'loss', 'content': 0.11815629154443741, 'timestamp': '2025-10-02 00:17:33.422402', 'step': 3780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:33.475637', 'step': 3780, 'epoch': 1}
{'type': 'loss', 'content': 0.2265670746564865, 'timestamp': '2025-10-02 00:17:33.478395', 'step': 3781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:33.532258', 'step': 3781, 'epoch': 1}
{'type': 'loss', 'content': 0.1748935431241989, 'timestamp': '2025-10-02 00:17:33.538083', 'step': 3782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:17:33.601024', 'step': 3782, 'epoch': 1}
{'type': 'loss', 'content': 0.028501853346824646, 'timestamp': '2025-10-02 00:17:33.611877', 'step': 3783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:33.666483', 'step': 3783, 'epoch': 1}
{'type': 'loss', 'content': 0.10940054059028625, 'timestamp': '2025-10-02 00:17:33.672278', 'step': 3784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:33.726211', 'step': 3784, 'epoch': 1}
{'type': 'loss', 'content': 0.028970718383789062, 'timestamp': '2025-10-02 00:17:33.735671', 'step': 3785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:33.794788', 'step': 3785, 'epoch': 1}
{'type': 'loss', 'content': 0.04632602632045746, 'timestamp': '2025-10-02 00:17:33.805001', 'step': 3786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:33.858872', 'step': 3786, 'epoch': 1}
{'type': 'loss', 'content': 0.16148218512535095, 'timestamp': '2025-10-02 00:17:33.861415', 'step': 3787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:33.916483', 'step': 3787, 'epoch': 1}
{'type': 'loss', 'content': 0.09080390632152557, 'timestamp': '2025-10-02 00:17:33.926799', 'step': 3788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:33.983758', 'step': 3788, 'epoch': 1}
{'type': 'loss', 'content': 0.03854372352361679, 'timestamp': '2025-10-02 00:17:33.986040', 'step': 3789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:34.039370', 'step': 3789, 'epoch': 1}
{'type': 'loss', 'content': 0.14440195262432098, 'timestamp': '2025-10-02 00:17:34.042013', 'step': 3790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:34.096214', 'step': 3790, 'epoch': 1}
{'type': 'loss', 'content': 0.12134139239788055, 'timestamp': '2025-10-02 00:17:34.098695', 'step': 3791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:34.153482', 'step': 3791, 'epoch': 1}
{'type': 'loss', 'content': 0.06713911890983582, 'timestamp': '2025-10-02 00:17:34.160653', 'step': 3792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:34.214782', 'step': 3792, 'epoch': 1}
{'type': 'loss', 'content': 0.04247839376330376, 'timestamp': '2025-10-02 00:17:34.220721', 'step': 3793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:34.275114', 'step': 3793, 'epoch': 1}
{'type': 'loss', 'content': 0.20793336629867554, 'timestamp': '2025-10-02 00:17:34.277446', 'step': 3794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:34.331914', 'step': 3794, 'epoch': 1}
{'type': 'loss', 'content': 0.03879311680793762, 'timestamp': '2025-10-02 00:17:34.337865', 'step': 3795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:34.396797', 'step': 3795, 'epoch': 1}
{'type': 'loss', 'content': 0.04503776133060455, 'timestamp': '2025-10-02 00:17:34.407675', 'step': 3796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:34.461377', 'step': 3796, 'epoch': 1}
{'type': 'loss', 'content': 0.02684631571173668, 'timestamp': '2025-10-02 00:17:34.469117', 'step': 3797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:34.525119', 'step': 3797, 'epoch': 1}
{'type': 'loss', 'content': 0.03238266706466675, 'timestamp': '2025-10-02 00:17:34.534670', 'step': 3798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:34.589704', 'step': 3798, 'epoch': 1}
{'type': 'loss', 'content': 0.0672638788819313, 'timestamp': '2025-10-02 00:17:34.599166', 'step': 3799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:34.653082', 'step': 3799, 'epoch': 1}
{'type': 'loss', 'content': 0.04726870730519295, 'timestamp': '2025-10-02 00:17:34.659103', 'step': 3800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:34.712461', 'step': 3800, 'epoch': 1}
{'type': 'loss', 'content': 0.08195234090089798, 'timestamp': '2025-10-02 00:17:34.714680', 'step': 3801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:34.776696', 'step': 3801, 'epoch': 1}
{'type': 'loss', 'content': 0.06230420619249344, 'timestamp': '2025-10-02 00:17:34.787413', 'step': 3802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:34.849752', 'step': 3802, 'epoch': 1}
{'type': 'loss', 'content': 0.03188398480415344, 'timestamp': '2025-10-02 00:17:34.860263', 'step': 3803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:34.915602', 'step': 3803, 'epoch': 1}
{'type': 'loss', 'content': 0.058910612016916275, 'timestamp': '2025-10-02 00:17:34.921272', 'step': 3804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:34.975469', 'step': 3804, 'epoch': 1}
{'type': 'loss', 'content': 0.023324918001890182, 'timestamp': '2025-10-02 00:17:34.985040', 'step': 3805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:35.039728', 'step': 3805, 'epoch': 1}
{'type': 'loss', 'content': 0.025979025289416313, 'timestamp': '2025-10-02 00:17:35.042418', 'step': 3806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:35.096426', 'step': 3806, 'epoch': 1}
{'type': 'loss', 'content': 0.07356975972652435, 'timestamp': '2025-10-02 00:17:35.103908', 'step': 3807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:35.158258', 'step': 3807, 'epoch': 1}
{'type': 'loss', 'content': 0.1716650277376175, 'timestamp': '2025-10-02 00:17:35.163779', 'step': 3808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:35.217098', 'step': 3808, 'epoch': 1}
{'type': 'loss', 'content': 0.10335861146450043, 'timestamp': '2025-10-02 00:17:35.219214', 'step': 3809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:35.272665', 'step': 3809, 'epoch': 1}
{'type': 'loss', 'content': 0.1919737309217453, 'timestamp': '2025-10-02 00:17:35.275539', 'step': 3810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:35.329832', 'step': 3810, 'epoch': 1}
{'type': 'loss', 'content': 0.08076902478933334, 'timestamp': '2025-10-02 00:17:35.334230', 'step': 3811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:35.395550', 'step': 3811, 'epoch': 1}
{'type': 'loss', 'content': 0.022868165746331215, 'timestamp': '2025-10-02 00:17:35.406944', 'step': 3812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:35.461318', 'step': 3812, 'epoch': 1}
{'type': 'loss', 'content': 0.037046611309051514, 'timestamp': '2025-10-02 00:17:35.463519', 'step': 3813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:35.517401', 'step': 3813, 'epoch': 1}
{'type': 'loss', 'content': 0.29253098368644714, 'timestamp': '2025-10-02 00:17:35.520664', 'step': 3814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:17:35.595299', 'step': 3814, 'epoch': 1}
{'type': 'loss', 'content': 0.020685754716396332, 'timestamp': '2025-10-02 00:17:35.608521', 'step': 3815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:35.663943', 'step': 3815, 'epoch': 1}
{'type': 'loss', 'content': 0.058340802788734436, 'timestamp': '2025-10-02 00:17:35.670620', 'step': 3816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:35.725767', 'step': 3816, 'epoch': 1}
{'type': 'loss', 'content': 0.10331883281469345, 'timestamp': '2025-10-02 00:17:35.728850', 'step': 3817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:35.785472', 'step': 3817, 'epoch': 1}
{'type': 'loss', 'content': 0.16685235500335693, 'timestamp': '2025-10-02 00:17:35.788237', 'step': 3818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:35.844296', 'step': 3818, 'epoch': 1}
{'type': 'loss', 'content': 0.18424175679683685, 'timestamp': '2025-10-02 00:17:35.846945', 'step': 3819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:35.903419', 'step': 3819, 'epoch': 1}
{'type': 'loss', 'content': 0.159730926156044, 'timestamp': '2025-10-02 00:17:35.910383', 'step': 3820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:35.965922', 'step': 3820, 'epoch': 1}
{'type': 'loss', 'content': 0.1854296624660492, 'timestamp': '2025-10-02 00:17:35.968328', 'step': 3821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:36.023211', 'step': 3821, 'epoch': 1}
{'type': 'loss', 'content': 0.195037841796875, 'timestamp': '2025-10-02 00:17:36.026051', 'step': 3822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:36.087132', 'step': 3822, 'epoch': 1}
{'type': 'loss', 'content': 0.02911498211324215, 'timestamp': '2025-10-02 00:17:36.097326', 'step': 3823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:36.152926', 'step': 3823, 'epoch': 1}
{'type': 'loss', 'content': 0.2048928290605545, 'timestamp': '2025-10-02 00:17:36.158922', 'step': 3824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:36.213479', 'step': 3824, 'epoch': 1}
{'type': 'loss', 'content': 0.06620707362890244, 'timestamp': '2025-10-02 00:17:36.215852', 'step': 3825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:36.275484', 'step': 3825, 'epoch': 1}
{'type': 'loss', 'content': 0.045718882232904434, 'timestamp': '2025-10-02 00:17:36.285645', 'step': 3826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:36.343007', 'step': 3826, 'epoch': 1}
{'type': 'loss', 'content': 0.1711910516023636, 'timestamp': '2025-10-02 00:17:36.346093', 'step': 3827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:36.402268', 'step': 3827, 'epoch': 1}
{'type': 'loss', 'content': 0.026110010221600533, 'timestamp': '2025-10-02 00:17:36.412337', 'step': 3828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:36.469385', 'step': 3828, 'epoch': 1}
{'type': 'loss', 'content': 0.13755880296230316, 'timestamp': '2025-10-02 00:17:36.471738', 'step': 3829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:36.529671', 'step': 3829, 'epoch': 1}
{'type': 'loss', 'content': 0.11206882447004318, 'timestamp': '2025-10-02 00:17:36.532972', 'step': 3830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:36.596031', 'step': 3830, 'epoch': 1}
{'type': 'loss', 'content': 0.06518299877643585, 'timestamp': '2025-10-02 00:17:36.606522', 'step': 3831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:36.661057', 'step': 3831, 'epoch': 1}
{'type': 'loss', 'content': 0.1273600161075592, 'timestamp': '2025-10-02 00:17:36.667529', 'step': 3832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:36.723509', 'step': 3832, 'epoch': 1}
{'type': 'loss', 'content': 0.11931683868169785, 'timestamp': '2025-10-02 00:17:36.727333', 'step': 3833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:36.784593', 'step': 3833, 'epoch': 1}
{'type': 'loss', 'content': 0.24795658886432648, 'timestamp': '2025-10-02 00:17:36.787452', 'step': 3834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:36.843499', 'step': 3834, 'epoch': 1}
{'type': 'loss', 'content': 0.266539067029953, 'timestamp': '2025-10-02 00:17:36.846807', 'step': 3835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:36.909833', 'step': 3835, 'epoch': 1}
{'type': 'loss', 'content': 0.05286870524287224, 'timestamp': '2025-10-02 00:17:36.921113', 'step': 3836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:36.977982', 'step': 3836, 'epoch': 1}
{'type': 'loss', 'content': 0.07845469564199448, 'timestamp': '2025-10-02 00:17:36.980498', 'step': 3837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:37.038695', 'step': 3837, 'epoch': 1}
{'type': 'loss', 'content': 0.08751154690980911, 'timestamp': '2025-10-02 00:17:37.048255', 'step': 3838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:37.105605', 'step': 3838, 'epoch': 1}
{'type': 'loss', 'content': 0.09226951003074646, 'timestamp': '2025-10-02 00:17:37.115105', 'step': 3839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:37.171964', 'step': 3839, 'epoch': 1}
{'type': 'loss', 'content': 0.055527374148368835, 'timestamp': '2025-10-02 00:17:37.178796', 'step': 3840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:37.240488', 'step': 3840, 'epoch': 1}
{'type': 'loss', 'content': 0.05201960727572441, 'timestamp': '2025-10-02 00:17:37.251849', 'step': 3841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:37.313133', 'step': 3841, 'epoch': 1}
{'type': 'loss', 'content': 0.05172271281480789, 'timestamp': '2025-10-02 00:17:37.323330', 'step': 3842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:37.383126', 'step': 3842, 'epoch': 1}
{'type': 'loss', 'content': 0.11706909537315369, 'timestamp': '2025-10-02 00:17:37.385661', 'step': 3843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:37.442295', 'step': 3843, 'epoch': 1}
{'type': 'loss', 'content': 0.19787676632404327, 'timestamp': '2025-10-02 00:17:37.449098', 'step': 3844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:37.503872', 'step': 3844, 'epoch': 1}
{'type': 'loss', 'content': 0.13078881800174713, 'timestamp': '2025-10-02 00:17:37.513559', 'step': 3845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:37.568878', 'step': 3845, 'epoch': 1}
{'type': 'loss', 'content': 0.23070180416107178, 'timestamp': '2025-10-02 00:17:37.571743', 'step': 3846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:37.628037', 'step': 3846, 'epoch': 1}
{'type': 'loss', 'content': 0.058366160839796066, 'timestamp': '2025-10-02 00:17:37.635597', 'step': 3847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:37.692133', 'step': 3847, 'epoch': 1}
{'type': 'loss', 'content': 0.2060784101486206, 'timestamp': '2025-10-02 00:17:37.698535', 'step': 3848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:37.758963', 'step': 3848, 'epoch': 1}
{'type': 'loss', 'content': 0.09127463400363922, 'timestamp': '2025-10-02 00:17:37.761839', 'step': 3849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:37.825798', 'step': 3849, 'epoch': 1}
{'type': 'loss', 'content': 0.054156363010406494, 'timestamp': '2025-10-02 00:17:37.836347', 'step': 3850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:37.893154', 'step': 3850, 'epoch': 1}
{'type': 'loss', 'content': 0.09763645380735397, 'timestamp': '2025-10-02 00:17:37.895521', 'step': 3851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:37.951686', 'step': 3851, 'epoch': 1}
{'type': 'loss', 'content': 0.19106081128120422, 'timestamp': '2025-10-02 00:17:37.957359', 'step': 3852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:38.013103', 'step': 3852, 'epoch': 1}
{'type': 'loss', 'content': 0.028178835287690163, 'timestamp': '2025-10-02 00:17:38.015483', 'step': 3853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:38.078400', 'step': 3853, 'epoch': 1}
{'type': 'loss', 'content': 0.06507749110460281, 'timestamp': '2025-10-02 00:17:38.088896', 'step': 3854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:38.144845', 'step': 3854, 'epoch': 1}
{'type': 'loss', 'content': 0.06033291295170784, 'timestamp': '2025-10-02 00:17:38.147386', 'step': 3855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:38.203152', 'step': 3855, 'epoch': 1}
{'type': 'loss', 'content': 0.0364246591925621, 'timestamp': '2025-10-02 00:17:38.209798', 'step': 3856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:38.267602', 'step': 3856, 'epoch': 1}
{'type': 'loss', 'content': 0.1698676496744156, 'timestamp': '2025-10-02 00:17:38.270333', 'step': 3857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:17:38.339904', 'step': 3857, 'epoch': 1}
{'type': 'loss', 'content': 0.03878949210047722, 'timestamp': '2025-10-02 00:17:38.351827', 'step': 3858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:38.406325', 'step': 3858, 'epoch': 1}
{'type': 'loss', 'content': 0.1735672652721405, 'timestamp': '2025-10-02 00:17:38.409757', 'step': 3859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:38.464604', 'step': 3859, 'epoch': 1}
{'type': 'loss', 'content': 0.17825216054916382, 'timestamp': '2025-10-02 00:17:38.470780', 'step': 3860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:38.528133', 'step': 3860, 'epoch': 1}
{'type': 'loss', 'content': 0.13167619705200195, 'timestamp': '2025-10-02 00:17:38.539033', 'step': 3861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:38.592597', 'step': 3861, 'epoch': 1}
{'type': 'loss', 'content': 0.1535036265850067, 'timestamp': '2025-10-02 00:17:38.594949', 'step': 3862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:17:38.658128', 'step': 3862, 'epoch': 1}
{'type': 'loss', 'content': 0.04896696284413338, 'timestamp': '2025-10-02 00:17:38.668974', 'step': 3863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:38.723037', 'step': 3863, 'epoch': 1}
{'type': 'loss', 'content': 0.0835939422249794, 'timestamp': '2025-10-02 00:17:38.729128', 'step': 3864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:38.782617', 'step': 3864, 'epoch': 1}
{'type': 'loss', 'content': 0.13167645037174225, 'timestamp': '2025-10-02 00:17:38.784844', 'step': 3865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:38.838659', 'step': 3865, 'epoch': 1}
{'type': 'loss', 'content': 0.08123700320720673, 'timestamp': '2025-10-02 00:17:38.848059', 'step': 3866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:38.910155', 'step': 3866, 'epoch': 1}
{'type': 'loss', 'content': 0.07175517082214355, 'timestamp': '2025-10-02 00:17:38.917744', 'step': 3867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:38.972276', 'step': 3867, 'epoch': 1}
{'type': 'loss', 'content': 0.10826443880796432, 'timestamp': '2025-10-02 00:17:38.978151', 'step': 3868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:39.033364', 'step': 3868, 'epoch': 1}
{'type': 'loss', 'content': 0.16500215232372284, 'timestamp': '2025-10-02 00:17:39.035547', 'step': 3869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:39.089333', 'step': 3869, 'epoch': 1}
{'type': 'loss', 'content': 0.03216346725821495, 'timestamp': '2025-10-02 00:17:39.095208', 'step': 3870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:39.150603', 'step': 3870, 'epoch': 1}
{'type': 'loss', 'content': 0.07438445836305618, 'timestamp': '2025-10-02 00:17:39.158008', 'step': 3871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:39.212398', 'step': 3871, 'epoch': 1}
{'type': 'loss', 'content': 0.09680788964033127, 'timestamp': '2025-10-02 00:17:39.217933', 'step': 3872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:39.271190', 'step': 3872, 'epoch': 1}
{'type': 'loss', 'content': 0.05690537765622139, 'timestamp': '2025-10-02 00:17:39.278634', 'step': 3873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:39.333985', 'step': 3873, 'epoch': 1}
{'type': 'loss', 'content': 0.11284082382917404, 'timestamp': '2025-10-02 00:17:39.337236', 'step': 3874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:39.393137', 'step': 3874, 'epoch': 1}
{'type': 'loss', 'content': 0.09651171416044235, 'timestamp': '2025-10-02 00:17:39.395762', 'step': 3875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:39.452462', 'step': 3875, 'epoch': 1}
{'type': 'loss', 'content': 0.03431365638971329, 'timestamp': '2025-10-02 00:17:39.460545', 'step': 3876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:39.516157', 'step': 3876, 'epoch': 1}
{'type': 'loss', 'content': 0.07651745527982712, 'timestamp': '2025-10-02 00:17:39.518602', 'step': 3877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:39.573227', 'step': 3877, 'epoch': 1}
{'type': 'loss', 'content': 0.28190019726753235, 'timestamp': '2025-10-02 00:17:39.575817', 'step': 3878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:39.629807', 'step': 3878, 'epoch': 1}
{'type': 'loss', 'content': 0.1325719803571701, 'timestamp': '2025-10-02 00:17:39.632114', 'step': 3879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:39.686658', 'step': 3879, 'epoch': 1}
{'type': 'loss', 'content': 0.20904193818569183, 'timestamp': '2025-10-02 00:17:39.692473', 'step': 3880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:39.745808', 'step': 3880, 'epoch': 1}
{'type': 'loss', 'content': 0.07366359978914261, 'timestamp': '2025-10-02 00:17:39.749137', 'step': 3881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:39.803604', 'step': 3881, 'epoch': 1}
{'type': 'loss', 'content': 0.09201020002365112, 'timestamp': '2025-10-02 00:17:39.805887', 'step': 3882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:39.860807', 'step': 3882, 'epoch': 1}
{'type': 'loss', 'content': 0.1969350427389145, 'timestamp': '2025-10-02 00:17:39.863140', 'step': 3883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:39.917415', 'step': 3883, 'epoch': 1}
{'type': 'loss', 'content': 0.06501365453004837, 'timestamp': '2025-10-02 00:17:39.923602', 'step': 3884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:39.980572', 'step': 3884, 'epoch': 1}
{'type': 'loss', 'content': 0.04475182294845581, 'timestamp': '2025-10-02 00:17:39.991575', 'step': 3885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:17:40.062977', 'step': 3885, 'epoch': 1}
{'type': 'loss', 'content': 0.030825577676296234, 'timestamp': '2025-10-02 00:17:40.075560', 'step': 3886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:40.130397', 'step': 3886, 'epoch': 1}
{'type': 'loss', 'content': 0.1309145987033844, 'timestamp': '2025-10-02 00:17:40.132519', 'step': 3887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:40.187485', 'step': 3887, 'epoch': 1}
{'type': 'loss', 'content': 0.01749790459871292, 'timestamp': '2025-10-02 00:17:40.195671', 'step': 3888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:40.249411', 'step': 3888, 'epoch': 1}
{'type': 'loss', 'content': 0.13358823955059052, 'timestamp': '2025-10-02 00:17:40.251439', 'step': 3889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:40.305717', 'step': 3889, 'epoch': 1}
{'type': 'loss', 'content': 0.15363098680973053, 'timestamp': '2025-10-02 00:17:40.308156', 'step': 3890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:40.361696', 'step': 3890, 'epoch': 1}
{'type': 'loss', 'content': 0.09636130928993225, 'timestamp': '2025-10-02 00:17:40.367491', 'step': 3891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:40.423533', 'step': 3891, 'epoch': 1}
{'type': 'loss', 'content': 0.0740685686469078, 'timestamp': '2025-10-02 00:17:40.429543', 'step': 3892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:40.484484', 'step': 3892, 'epoch': 1}
{'type': 'loss', 'content': 0.09231157600879669, 'timestamp': '2025-10-02 00:17:40.486698', 'step': 3893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:40.541010', 'step': 3893, 'epoch': 1}
{'type': 'loss', 'content': 0.05763865262269974, 'timestamp': '2025-10-02 00:17:40.543393', 'step': 3894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:40.597976', 'step': 3894, 'epoch': 1}
{'type': 'loss', 'content': 0.06729567795991898, 'timestamp': '2025-10-02 00:17:40.603624', 'step': 3895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:40.659759', 'step': 3895, 'epoch': 1}
{'type': 'loss', 'content': 0.060135092586278915, 'timestamp': '2025-10-02 00:17:40.670059', 'step': 3896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:40.724804', 'step': 3896, 'epoch': 1}
{'type': 'loss', 'content': 0.06421738117933273, 'timestamp': '2025-10-02 00:17:40.727554', 'step': 3897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:40.783977', 'step': 3897, 'epoch': 1}
{'type': 'loss', 'content': 0.032262228429317474, 'timestamp': '2025-10-02 00:17:40.793529', 'step': 3898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:40.848839', 'step': 3898, 'epoch': 1}
{'type': 'loss', 'content': 0.09040305018424988, 'timestamp': '2025-10-02 00:17:40.851535', 'step': 3899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:40.907837', 'step': 3899, 'epoch': 1}
{'type': 'loss', 'content': 0.017762934789061546, 'timestamp': '2025-10-02 00:17:40.913812', 'step': 3900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:40.966954', 'step': 3900, 'epoch': 1}
{'type': 'loss', 'content': 0.06404846906661987, 'timestamp': '2025-10-02 00:17:40.974391', 'step': 3901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:41.028416', 'step': 3901, 'epoch': 1}
{'type': 'loss', 'content': 0.16072037816047668, 'timestamp': '2025-10-02 00:17:41.030574', 'step': 3902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:41.085248', 'step': 3902, 'epoch': 1}
{'type': 'loss', 'content': 0.054706115275621414, 'timestamp': '2025-10-02 00:17:41.092599', 'step': 3903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:41.146774', 'step': 3903, 'epoch': 1}
{'type': 'loss', 'content': 0.16878218948841095, 'timestamp': '2025-10-02 00:17:41.152718', 'step': 3904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:41.207369', 'step': 3904, 'epoch': 1}
{'type': 'loss', 'content': 0.023209603503346443, 'timestamp': '2025-10-02 00:17:41.214846', 'step': 3905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:41.268770', 'step': 3905, 'epoch': 1}
{'type': 'loss', 'content': 0.12481089681386948, 'timestamp': '2025-10-02 00:17:41.271262', 'step': 3906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:41.325528', 'step': 3906, 'epoch': 1}
{'type': 'loss', 'content': 0.06610220670700073, 'timestamp': '2025-10-02 00:17:41.327908', 'step': 3907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:41.381777', 'step': 3907, 'epoch': 1}
{'type': 'loss', 'content': 0.12193424254655838, 'timestamp': '2025-10-02 00:17:41.387943', 'step': 3908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:41.441349', 'step': 3908, 'epoch': 1}
{'type': 'loss', 'content': 0.10778207331895828, 'timestamp': '2025-10-02 00:17:41.443641', 'step': 3909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:41.497982', 'step': 3909, 'epoch': 1}
{'type': 'loss', 'content': 0.045419029891490936, 'timestamp': '2025-10-02 00:17:41.503973', 'step': 3910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:41.558963', 'step': 3910, 'epoch': 1}
{'type': 'loss', 'content': 0.09819599986076355, 'timestamp': '2025-10-02 00:17:41.564626', 'step': 3911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:41.619985', 'step': 3911, 'epoch': 1}
{'type': 'loss', 'content': 0.05804508924484253, 'timestamp': '2025-10-02 00:17:41.627824', 'step': 3912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:41.688022', 'step': 3912, 'epoch': 1}
{'type': 'loss', 'content': 0.08236045390367508, 'timestamp': '2025-10-02 00:17:41.699321', 'step': 3913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:41.754894', 'step': 3913, 'epoch': 1}
{'type': 'loss', 'content': 0.1338805854320526, 'timestamp': '2025-10-02 00:17:41.757162', 'step': 3914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:41.813529', 'step': 3914, 'epoch': 1}
{'type': 'loss', 'content': 0.10185858607292175, 'timestamp': '2025-10-02 00:17:41.823102', 'step': 3915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:17:41.893213', 'step': 3915, 'epoch': 1}
{'type': 'loss', 'content': 0.0362284816801548, 'timestamp': '2025-10-02 00:17:41.906248', 'step': 3916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:41.967062', 'step': 3916, 'epoch': 1}
{'type': 'loss', 'content': 0.051701635122299194, 'timestamp': '2025-10-02 00:17:41.978397', 'step': 3917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:42.033228', 'step': 3917, 'epoch': 1}
{'type': 'loss', 'content': 0.09414533525705338, 'timestamp': '2025-10-02 00:17:42.035475', 'step': 3918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:42.089612', 'step': 3918, 'epoch': 1}
{'type': 'loss', 'content': 0.08535641431808472, 'timestamp': '2025-10-02 00:17:42.091790', 'step': 3919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:42.146785', 'step': 3919, 'epoch': 1}
{'type': 'loss', 'content': 0.019166113808751106, 'timestamp': '2025-10-02 00:17:42.157110', 'step': 3920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:42.218882', 'step': 3920, 'epoch': 1}
{'type': 'loss', 'content': 0.028341177850961685, 'timestamp': '2025-10-02 00:17:42.230409', 'step': 3921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:42.285379', 'step': 3921, 'epoch': 1}
{'type': 'loss', 'content': 0.11846011877059937, 'timestamp': '2025-10-02 00:17:42.287461', 'step': 3922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:42.341246', 'step': 3922, 'epoch': 1}
{'type': 'loss', 'content': 0.2107122242450714, 'timestamp': '2025-10-02 00:17:42.343528', 'step': 3923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:42.398024', 'step': 3923, 'epoch': 1}
{'type': 'loss', 'content': 0.06700734049081802, 'timestamp': '2025-10-02 00:17:42.404582', 'step': 3924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:42.458379', 'step': 3924, 'epoch': 1}
{'type': 'loss', 'content': 0.16455349326133728, 'timestamp': '2025-10-02 00:17:42.460605', 'step': 3925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:42.515350', 'step': 3925, 'epoch': 1}
{'type': 'loss', 'content': 0.092562735080719, 'timestamp': '2025-10-02 00:17:42.517544', 'step': 3926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:42.572578', 'step': 3926, 'epoch': 1}
{'type': 'loss', 'content': 0.014219021424651146, 'timestamp': '2025-10-02 00:17:42.581962', 'step': 3927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:42.636592', 'step': 3927, 'epoch': 1}
{'type': 'loss', 'content': 0.09910805523395538, 'timestamp': '2025-10-02 00:17:42.644646', 'step': 3928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:42.698227', 'step': 3928, 'epoch': 1}
{'type': 'loss', 'content': 0.12354676425457001, 'timestamp': '2025-10-02 00:17:42.705779', 'step': 3929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:42.760510', 'step': 3929, 'epoch': 1}
{'type': 'loss', 'content': 0.1218661218881607, 'timestamp': '2025-10-02 00:17:42.762495', 'step': 3930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:42.816805', 'step': 3930, 'epoch': 1}
{'type': 'loss', 'content': 0.060599930584430695, 'timestamp': '2025-10-02 00:17:42.819331', 'step': 3931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:42.873749', 'step': 3931, 'epoch': 1}
{'type': 'loss', 'content': 0.15450580418109894, 'timestamp': '2025-10-02 00:17:42.879958', 'step': 3932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:42.940876', 'step': 3932, 'epoch': 1}
{'type': 'loss', 'content': 0.07774941623210907, 'timestamp': '2025-10-02 00:17:42.952430', 'step': 3933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:43.006589', 'step': 3933, 'epoch': 1}
{'type': 'loss', 'content': 0.06231691315770149, 'timestamp': '2025-10-02 00:17:43.008732', 'step': 3934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:43.062987', 'step': 3934, 'epoch': 1}
{'type': 'loss', 'content': 0.05812250077724457, 'timestamp': '2025-10-02 00:17:43.065356', 'step': 3935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:43.120084', 'step': 3935, 'epoch': 1}
{'type': 'loss', 'content': 0.05509782209992409, 'timestamp': '2025-10-02 00:17:43.126072', 'step': 3936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:43.179317', 'step': 3936, 'epoch': 1}
{'type': 'loss', 'content': 0.1761624813079834, 'timestamp': '2025-10-02 00:17:43.181571', 'step': 3937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:43.236152', 'step': 3937, 'epoch': 1}
{'type': 'loss', 'content': 0.06034143641591072, 'timestamp': '2025-10-02 00:17:43.243735', 'step': 3938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:43.297892', 'step': 3938, 'epoch': 1}
{'type': 'loss', 'content': 0.17350414395332336, 'timestamp': '2025-10-02 00:17:43.300158', 'step': 3939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:43.359653', 'step': 3939, 'epoch': 1}
{'type': 'loss', 'content': 0.008711840026080608, 'timestamp': '2025-10-02 00:17:43.370584', 'step': 3940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:43.423801', 'step': 3940, 'epoch': 1}
{'type': 'loss', 'content': 0.1308537721633911, 'timestamp': '2025-10-02 00:17:43.426285', 'step': 3941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:43.480419', 'step': 3941, 'epoch': 1}
{'type': 'loss', 'content': 0.16735300421714783, 'timestamp': '2025-10-02 00:17:43.483412', 'step': 3942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:43.537783', 'step': 3942, 'epoch': 1}
{'type': 'loss', 'content': 0.07848677784204483, 'timestamp': '2025-10-02 00:17:43.540019', 'step': 3943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:43.594919', 'step': 3943, 'epoch': 1}
{'type': 'loss', 'content': 0.06876466423273087, 'timestamp': '2025-10-02 00:17:43.605051', 'step': 3944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:43.658404', 'step': 3944, 'epoch': 1}
{'type': 'loss', 'content': 0.13919049501419067, 'timestamp': '2025-10-02 00:17:43.660594', 'step': 3945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:43.714085', 'step': 3945, 'epoch': 1}
{'type': 'loss', 'content': 0.11744429171085358, 'timestamp': '2025-10-02 00:17:43.716251', 'step': 3946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:43.770214', 'step': 3946, 'epoch': 1}
{'type': 'loss', 'content': 0.15071932971477509, 'timestamp': '2025-10-02 00:17:43.772544', 'step': 3947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:43.826250', 'step': 3947, 'epoch': 1}
{'type': 'loss', 'content': 0.1167389303445816, 'timestamp': '2025-10-02 00:17:43.831995', 'step': 3948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:43.885508', 'step': 3948, 'epoch': 1}
{'type': 'loss', 'content': 0.08961767703294754, 'timestamp': '2025-10-02 00:17:43.888433', 'step': 3949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:43.947859', 'step': 3949, 'epoch': 1}
{'type': 'loss', 'content': 0.057886213064193726, 'timestamp': '2025-10-02 00:17:43.958047', 'step': 3950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:44.012438', 'step': 3950, 'epoch': 1}
{'type': 'loss', 'content': 0.12882161140441895, 'timestamp': '2025-10-02 00:17:44.014824', 'step': 3951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:44.068659', 'step': 3951, 'epoch': 1}
{'type': 'loss', 'content': 0.04933585971593857, 'timestamp': '2025-10-02 00:17:44.076958', 'step': 3952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:44.129789', 'step': 3952, 'epoch': 1}
{'type': 'loss', 'content': 0.1615992933511734, 'timestamp': '2025-10-02 00:17:44.131951', 'step': 3953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:44.191185', 'step': 3953, 'epoch': 1}
{'type': 'loss', 'content': 0.025495514273643494, 'timestamp': '2025-10-02 00:17:44.201427', 'step': 3954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:44.256050', 'step': 3954, 'epoch': 1}
{'type': 'loss', 'content': 0.14516614377498627, 'timestamp': '2025-10-02 00:17:44.258343', 'step': 3955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:44.313133', 'step': 3955, 'epoch': 1}
{'type': 'loss', 'content': 0.07777713984251022, 'timestamp': '2025-10-02 00:17:44.319789', 'step': 3956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:44.373569', 'step': 3956, 'epoch': 1}
{'type': 'loss', 'content': 0.06265023350715637, 'timestamp': '2025-10-02 00:17:44.381152', 'step': 3957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:44.436749', 'step': 3957, 'epoch': 1}
{'type': 'loss', 'content': 0.031387340277433395, 'timestamp': '2025-10-02 00:17:44.442720', 'step': 3958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:44.496868', 'step': 3958, 'epoch': 1}
{'type': 'loss', 'content': 0.19657978415489197, 'timestamp': '2025-10-02 00:17:44.499125', 'step': 3959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:44.554173', 'step': 3959, 'epoch': 1}
{'type': 'loss', 'content': 0.09854193031787872, 'timestamp': '2025-10-02 00:17:44.564508', 'step': 3960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:44.618030', 'step': 3960, 'epoch': 1}
{'type': 'loss', 'content': 0.07752373069524765, 'timestamp': '2025-10-02 00:17:44.620787', 'step': 3961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:44.674690', 'step': 3961, 'epoch': 1}
{'type': 'loss', 'content': 0.26768729090690613, 'timestamp': '2025-10-02 00:17:44.676817', 'step': 3962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:44.730771', 'step': 3962, 'epoch': 1}
{'type': 'loss', 'content': 0.20870018005371094, 'timestamp': '2025-10-02 00:17:44.733031', 'step': 3963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:44.787172', 'step': 3963, 'epoch': 1}
{'type': 'loss', 'content': 0.1350935995578766, 'timestamp': '2025-10-02 00:17:44.793084', 'step': 3964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:44.847528', 'step': 3964, 'epoch': 1}
{'type': 'loss', 'content': 0.07448451220989227, 'timestamp': '2025-10-02 00:17:44.849899', 'step': 3965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:44.904999', 'step': 3965, 'epoch': 1}
{'type': 'loss', 'content': 0.019057998433709145, 'timestamp': '2025-10-02 00:17:44.912395', 'step': 3966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:44.967227', 'step': 3966, 'epoch': 1}
{'type': 'loss', 'content': 0.03785783424973488, 'timestamp': '2025-10-02 00:17:44.974782', 'step': 3967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:17:45.044954', 'step': 3967, 'epoch': 1}
{'type': 'loss', 'content': 0.012524046935141087, 'timestamp': '2025-10-02 00:17:45.058168', 'step': 3968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:45.111614', 'step': 3968, 'epoch': 1}
{'type': 'loss', 'content': 0.10431037843227386, 'timestamp': '2025-10-02 00:17:45.113864', 'step': 3969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:45.167955', 'step': 3969, 'epoch': 1}
{'type': 'loss', 'content': 0.17912524938583374, 'timestamp': '2025-10-02 00:17:45.170313', 'step': 3970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:17:45.246809', 'step': 3970, 'epoch': 1}
{'type': 'loss', 'content': 0.04138118028640747, 'timestamp': '2025-10-02 00:17:45.260636', 'step': 3971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:45.319590', 'step': 3971, 'epoch': 1}
{'type': 'loss', 'content': 0.03366633132100105, 'timestamp': '2025-10-02 00:17:45.325566', 'step': 3972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:45.380296', 'step': 3972, 'epoch': 1}
{'type': 'loss', 'content': 0.0462101548910141, 'timestamp': '2025-10-02 00:17:45.387949', 'step': 3973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:45.448811', 'step': 3973, 'epoch': 1}
{'type': 'loss', 'content': 0.02837083488702774, 'timestamp': '2025-10-02 00:17:45.459047', 'step': 3974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:45.514279', 'step': 3974, 'epoch': 1}
{'type': 'loss', 'content': 0.022470904514193535, 'timestamp': '2025-10-02 00:17:45.521627', 'step': 3975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:45.577807', 'step': 3975, 'epoch': 1}
{'type': 'loss', 'content': 0.12883710861206055, 'timestamp': '2025-10-02 00:17:45.584388', 'step': 3976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:45.638724', 'step': 3976, 'epoch': 1}
{'type': 'loss', 'content': 0.08087562024593353, 'timestamp': '2025-10-02 00:17:45.648074', 'step': 3977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:45.703014', 'step': 3977, 'epoch': 1}
{'type': 'loss', 'content': 0.18263374269008636, 'timestamp': '2025-10-02 00:17:45.705409', 'step': 3978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:17:45.779939', 'step': 3978, 'epoch': 1}
{'type': 'loss', 'content': 0.034324388951063156, 'timestamp': '2025-10-02 00:17:45.793385', 'step': 3979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:45.849096', 'step': 3979, 'epoch': 1}
{'type': 'loss', 'content': 0.190604105591774, 'timestamp': '2025-10-02 00:17:45.855282', 'step': 3980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:17:45.922883', 'step': 3980, 'epoch': 1}
{'type': 'loss', 'content': 0.06721736490726471, 'timestamp': '2025-10-02 00:17:45.936264', 'step': 3981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:45.997185', 'step': 3981, 'epoch': 1}
{'type': 'loss', 'content': 0.0901455506682396, 'timestamp': '2025-10-02 00:17:46.006703', 'step': 3982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:46.064991', 'step': 3982, 'epoch': 1}
{'type': 'loss', 'content': 0.05589950084686279, 'timestamp': '2025-10-02 00:17:46.067684', 'step': 3983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:46.122046', 'step': 3983, 'epoch': 1}
{'type': 'loss', 'content': 0.08681318163871765, 'timestamp': '2025-10-02 00:17:46.130449', 'step': 3984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:46.183787', 'step': 3984, 'epoch': 1}
{'type': 'loss', 'content': 0.1185901090502739, 'timestamp': '2025-10-02 00:17:46.186388', 'step': 3985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:46.241112', 'step': 3985, 'epoch': 1}
{'type': 'loss', 'content': 0.1824028193950653, 'timestamp': '2025-10-02 00:17:46.243696', 'step': 3986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:46.298487', 'step': 3986, 'epoch': 1}
{'type': 'loss', 'content': 0.0490853525698185, 'timestamp': '2025-10-02 00:17:46.305909', 'step': 3987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:46.361023', 'step': 3987, 'epoch': 1}
{'type': 'loss', 'content': 0.16386976838111877, 'timestamp': '2025-10-02 00:17:46.366857', 'step': 3988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:46.423754', 'step': 3988, 'epoch': 1}
{'type': 'loss', 'content': 0.15144138038158417, 'timestamp': '2025-10-02 00:17:46.426022', 'step': 3989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:46.481341', 'step': 3989, 'epoch': 1}
{'type': 'loss', 'content': 0.17330552637577057, 'timestamp': '2025-10-02 00:17:46.484254', 'step': 3990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:46.538848', 'step': 3990, 'epoch': 1}
{'type': 'loss', 'content': 0.041323594748973846, 'timestamp': '2025-10-02 00:17:46.542116', 'step': 3991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:46.598574', 'step': 3991, 'epoch': 1}
{'type': 'loss', 'content': 0.14558948576450348, 'timestamp': '2025-10-02 00:17:46.604713', 'step': 3992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:46.673469', 'step': 3992, 'epoch': 1}
{'type': 'loss', 'content': 0.03073118068277836, 'timestamp': '2025-10-02 00:17:46.684510', 'step': 3993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:46.739260', 'step': 3993, 'epoch': 1}
{'type': 'loss', 'content': 0.05447375029325485, 'timestamp': '2025-10-02 00:17:46.741775', 'step': 3994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:46.796590', 'step': 3994, 'epoch': 1}
{'type': 'loss', 'content': 0.041696079075336456, 'timestamp': '2025-10-02 00:17:46.799083', 'step': 3995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:46.853811', 'step': 3995, 'epoch': 1}
{'type': 'loss', 'content': 0.09554687142372131, 'timestamp': '2025-10-02 00:17:46.862185', 'step': 3996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:46.916917', 'step': 3996, 'epoch': 1}
{'type': 'loss', 'content': 0.130202978849411, 'timestamp': '2025-10-02 00:17:46.931244', 'step': 3997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:47.012495', 'step': 3997, 'epoch': 1}
{'type': 'loss', 'content': 0.03234951198101044, 'timestamp': '2025-10-02 00:17:47.020147', 'step': 3998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:47.114830', 'step': 3998, 'epoch': 1}
{'type': 'loss', 'content': 0.17440415918827057, 'timestamp': '2025-10-02 00:17:47.120253', 'step': 3999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:47.188499', 'step': 3999, 'epoch': 1}
{'type': 'loss', 'content': 0.061895471066236496, 'timestamp': '2025-10-02 00:17:47.196612', 'step': 4000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-10-02 00:17:47.649965', 'step': 4000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:47.714182', 'step': 4000, 'epoch': 1}
{'type': 'loss', 'content': 0.11070927232503891, 'timestamp': '2025-10-02 00:17:47.728568', 'step': 4001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:47.798110', 'step': 4001, 'epoch': 1}
{'type': 'loss', 'content': 0.10211826115846634, 'timestamp': '2025-10-02 00:17:47.806901', 'step': 4002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:47.882597', 'step': 4002, 'epoch': 1}
{'type': 'loss', 'content': 0.06914364546537399, 'timestamp': '2025-10-02 00:17:47.907778', 'step': 4003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:47.983358', 'step': 4003, 'epoch': 1}
{'type': 'loss', 'content': 0.08317656069993973, 'timestamp': '2025-10-02 00:17:47.990733', 'step': 4004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:48.066813', 'step': 4004, 'epoch': 1}
{'type': 'loss', 'content': 0.07255927473306656, 'timestamp': '2025-10-02 00:17:48.085233', 'step': 4005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:48.154503', 'step': 4005, 'epoch': 1}
{'type': 'loss', 'content': 0.12683333456516266, 'timestamp': '2025-10-02 00:17:48.162729', 'step': 4006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:48.243430', 'step': 4006, 'epoch': 1}
{'type': 'loss', 'content': 0.06298258900642395, 'timestamp': '2025-10-02 00:17:48.252993', 'step': 4007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:48.307189', 'step': 4007, 'epoch': 1}
{'type': 'loss', 'content': 0.03039715811610222, 'timestamp': '2025-10-02 00:17:48.313321', 'step': 4008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:48.367202', 'step': 4008, 'epoch': 1}
{'type': 'loss', 'content': 0.08737841993570328, 'timestamp': '2025-10-02 00:17:48.368981', 'step': 4009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:48.422708', 'step': 4009, 'epoch': 1}
{'type': 'loss', 'content': 0.08498481661081314, 'timestamp': '2025-10-02 00:17:48.425359', 'step': 4010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:48.479286', 'step': 4010, 'epoch': 1}
{'type': 'loss', 'content': 0.1770024448633194, 'timestamp': '2025-10-02 00:17:48.481633', 'step': 4011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:48.536596', 'step': 4011, 'epoch': 1}
{'type': 'loss', 'content': 0.02768067456781864, 'timestamp': '2025-10-02 00:17:48.544838', 'step': 4012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:48.598164', 'step': 4012, 'epoch': 1}
{'type': 'loss', 'content': 0.17577731609344482, 'timestamp': '2025-10-02 00:17:48.600544', 'step': 4013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:48.653930', 'step': 4013, 'epoch': 1}
{'type': 'loss', 'content': 0.09392283856868744, 'timestamp': '2025-10-02 00:17:48.656290', 'step': 4014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:48.709360', 'step': 4014, 'epoch': 1}
{'type': 'loss', 'content': 0.19389812648296356, 'timestamp': '2025-10-02 00:17:48.711589', 'step': 4015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:48.766092', 'step': 4015, 'epoch': 1}
{'type': 'loss', 'content': 0.1092129573225975, 'timestamp': '2025-10-02 00:17:48.771890', 'step': 4016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:48.825540', 'step': 4016, 'epoch': 1}
{'type': 'loss', 'content': 0.027408752590417862, 'timestamp': '2025-10-02 00:17:48.835812', 'step': 4017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:48.889749', 'step': 4017, 'epoch': 1}
{'type': 'loss', 'content': 0.04434994235634804, 'timestamp': '2025-10-02 00:17:48.892085', 'step': 4018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:48.945178', 'step': 4018, 'epoch': 1}
{'type': 'loss', 'content': 0.18055520951747894, 'timestamp': '2025-10-02 00:17:48.947258', 'step': 4019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:49.002200', 'step': 4019, 'epoch': 1}
{'type': 'loss', 'content': 0.07369379699230194, 'timestamp': '2025-10-02 00:17:49.008730', 'step': 4020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:49.062127', 'step': 4020, 'epoch': 1}
{'type': 'loss', 'content': 0.10044059157371521, 'timestamp': '2025-10-02 00:17:49.064582', 'step': 4021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 11520070000896.0}, 'timestamp': '2025-10-02 00:17:49.152115', 'step': 4021, 'epoch': 1}
{'type': 'loss', 'content': 0.0076714386232197285, 'timestamp': '2025-10-02 00:17:49.167445', 'step': 4022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:49.221764', 'step': 4022, 'epoch': 1}
{'type': 'loss', 'content': 0.09321754425764084, 'timestamp': '2025-10-02 00:17:49.231073', 'step': 4023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:49.293231', 'step': 4023, 'epoch': 1}
{'type': 'loss', 'content': 0.034229960292577744, 'timestamp': '2025-10-02 00:17:49.304674', 'step': 4024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:49.358517', 'step': 4024, 'epoch': 1}
{'type': 'loss', 'content': 0.029493795707821846, 'timestamp': '2025-10-02 00:17:49.361861', 'step': 4025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:17:49.431601', 'step': 4025, 'epoch': 1}
{'type': 'loss', 'content': 0.022106559947133064, 'timestamp': '2025-10-02 00:17:49.443963', 'step': 4026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:49.497829', 'step': 4026, 'epoch': 1}
{'type': 'loss', 'content': 0.13521279394626617, 'timestamp': '2025-10-02 00:17:49.500165', 'step': 4027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:49.554098', 'step': 4027, 'epoch': 1}
{'type': 'loss', 'content': 0.05013233795762062, 'timestamp': '2025-10-02 00:17:49.562422', 'step': 4028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:49.615677', 'step': 4028, 'epoch': 1}
{'type': 'loss', 'content': 0.10850293934345245, 'timestamp': '2025-10-02 00:17:49.618054', 'step': 4029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:49.673023', 'step': 4029, 'epoch': 1}
{'type': 'loss', 'content': 0.04595714062452316, 'timestamp': '2025-10-02 00:17:49.682575', 'step': 4030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:49.738656', 'step': 4030, 'epoch': 1}
{'type': 'loss', 'content': 0.06566910445690155, 'timestamp': '2025-10-02 00:17:49.748214', 'step': 4031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:49.802093', 'step': 4031, 'epoch': 1}
{'type': 'loss', 'content': 0.1968429982662201, 'timestamp': '2025-10-02 00:17:49.808580', 'step': 4032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:49.861812', 'step': 4032, 'epoch': 1}
{'type': 'loss', 'content': 0.1644357293844223, 'timestamp': '2025-10-02 00:17:49.864152', 'step': 4033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:49.918625', 'step': 4033, 'epoch': 1}
{'type': 'loss', 'content': 0.13917788863182068, 'timestamp': '2025-10-02 00:17:49.920924', 'step': 4034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:49.974492', 'step': 4034, 'epoch': 1}
{'type': 'loss', 'content': 0.08907292783260345, 'timestamp': '2025-10-02 00:17:49.976731', 'step': 4035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:50.030048', 'step': 4035, 'epoch': 1}
{'type': 'loss', 'content': 0.06983151286840439, 'timestamp': '2025-10-02 00:17:50.035834', 'step': 4036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:50.088978', 'step': 4036, 'epoch': 1}
{'type': 'loss', 'content': 0.13105998933315277, 'timestamp': '2025-10-02 00:17:50.091176', 'step': 4037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:50.144641', 'step': 4037, 'epoch': 1}
{'type': 'loss', 'content': 0.15857741236686707, 'timestamp': '2025-10-02 00:17:50.146880', 'step': 4038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:50.200866', 'step': 4038, 'epoch': 1}
{'type': 'loss', 'content': 0.18428809940814972, 'timestamp': '2025-10-02 00:17:50.203145', 'step': 4039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:50.256515', 'step': 4039, 'epoch': 1}
{'type': 'loss', 'content': 0.1531118005514145, 'timestamp': '2025-10-02 00:17:50.262061', 'step': 4040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:50.315774', 'step': 4040, 'epoch': 1}
{'type': 'loss', 'content': 0.032205164432525635, 'timestamp': '2025-10-02 00:17:50.318331', 'step': 4041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:50.372197', 'step': 4041, 'epoch': 1}
{'type': 'loss', 'content': 0.11735484004020691, 'timestamp': '2025-10-02 00:17:50.374821', 'step': 4042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:50.429224', 'step': 4042, 'epoch': 1}
{'type': 'loss', 'content': 0.0775250717997551, 'timestamp': '2025-10-02 00:17:50.435280', 'step': 4043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:50.490365', 'step': 4043, 'epoch': 1}
{'type': 'loss', 'content': 0.03556450456380844, 'timestamp': '2025-10-02 00:17:50.496020', 'step': 4044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:50.552930', 'step': 4044, 'epoch': 1}
{'type': 'loss', 'content': 0.08613323420286179, 'timestamp': '2025-10-02 00:17:50.563870', 'step': 4045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:50.618183', 'step': 4045, 'epoch': 1}
{'type': 'loss', 'content': 0.05090944468975067, 'timestamp': '2025-10-02 00:17:50.623900', 'step': 4046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:50.679553', 'step': 4046, 'epoch': 1}
{'type': 'loss', 'content': 0.07282985746860504, 'timestamp': '2025-10-02 00:17:50.685256', 'step': 4047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:50.739301', 'step': 4047, 'epoch': 1}
{'type': 'loss', 'content': 0.053757086396217346, 'timestamp': '2025-10-02 00:17:50.749455', 'step': 4048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:50.803894', 'step': 4048, 'epoch': 1}
{'type': 'loss', 'content': 0.11259709298610687, 'timestamp': '2025-10-02 00:17:50.806013', 'step': 4049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:50.860724', 'step': 4049, 'epoch': 1}
{'type': 'loss', 'content': 0.06353675574064255, 'timestamp': '2025-10-02 00:17:50.863535', 'step': 4050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:50.917900', 'step': 4050, 'epoch': 1}
{'type': 'loss', 'content': 0.16356347501277924, 'timestamp': '2025-10-02 00:17:50.920073', 'step': 4051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:50.978197', 'step': 4051, 'epoch': 1}
{'type': 'loss', 'content': 0.12368380278348923, 'timestamp': '2025-10-02 00:17:50.989176', 'step': 4052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:51.043316', 'step': 4052, 'epoch': 1}
{'type': 'loss', 'content': 0.10236631333827972, 'timestamp': '2025-10-02 00:17:51.045587', 'step': 4053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:51.099118', 'step': 4053, 'epoch': 1}
{'type': 'loss', 'content': 0.1043999046087265, 'timestamp': '2025-10-02 00:17:51.101471', 'step': 4054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:51.155706', 'step': 4054, 'epoch': 1}
{'type': 'loss', 'content': 0.042670685797929764, 'timestamp': '2025-10-02 00:17:51.158020', 'step': 4055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:51.211483', 'step': 4055, 'epoch': 1}
{'type': 'loss', 'content': 0.10171254724264145, 'timestamp': '2025-10-02 00:17:51.217455', 'step': 4056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:51.271019', 'step': 4056, 'epoch': 1}
{'type': 'loss', 'content': 0.07900398224592209, 'timestamp': '2025-10-02 00:17:51.273101', 'step': 4057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:17:51.335483', 'step': 4057, 'epoch': 1}
{'type': 'loss', 'content': 0.04254790022969246, 'timestamp': '2025-10-02 00:17:51.346333', 'step': 4058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:51.400332', 'step': 4058, 'epoch': 1}
{'type': 'loss', 'content': 0.13424113392829895, 'timestamp': '2025-10-02 00:17:51.402881', 'step': 4059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:51.457826', 'step': 4059, 'epoch': 1}
{'type': 'loss', 'content': 0.0925384908914566, 'timestamp': '2025-10-02 00:17:51.464487', 'step': 4060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:51.517808', 'step': 4060, 'epoch': 1}
{'type': 'loss', 'content': 0.08505434542894363, 'timestamp': '2025-10-02 00:17:51.520328', 'step': 4061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:51.574175', 'step': 4061, 'epoch': 1}
{'type': 'loss', 'content': 0.06792520731687546, 'timestamp': '2025-10-02 00:17:51.576597', 'step': 4062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:51.630170', 'step': 4062, 'epoch': 1}
{'type': 'loss', 'content': 0.14253516495227814, 'timestamp': '2025-10-02 00:17:51.632280', 'step': 4063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:17:51.693635', 'step': 4063, 'epoch': 1}
{'type': 'loss', 'content': 0.035581670701503754, 'timestamp': '2025-10-02 00:17:51.705061', 'step': 4064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:51.759584', 'step': 4064, 'epoch': 1}
{'type': 'loss', 'content': 0.12552686035633087, 'timestamp': '2025-10-02 00:17:51.762276', 'step': 4065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:51.816801', 'step': 4065, 'epoch': 1}
{'type': 'loss', 'content': 0.03308907523751259, 'timestamp': '2025-10-02 00:17:51.824442', 'step': 4066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:51.878937', 'step': 4066, 'epoch': 1}
{'type': 'loss', 'content': 0.12281566113233566, 'timestamp': '2025-10-02 00:17:51.881058', 'step': 4067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:51.935914', 'step': 4067, 'epoch': 1}
{'type': 'loss', 'content': 0.04793211817741394, 'timestamp': '2025-10-02 00:17:51.946254', 'step': 4068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:51.999944', 'step': 4068, 'epoch': 1}
{'type': 'loss', 'content': 0.06285101175308228, 'timestamp': '2025-10-02 00:17:52.005745', 'step': 4069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:52.060105', 'step': 4069, 'epoch': 1}
{'type': 'loss', 'content': 0.12959913909435272, 'timestamp': '2025-10-02 00:17:52.062398', 'step': 4070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:52.116805', 'step': 4070, 'epoch': 1}
{'type': 'loss', 'content': 0.03576251119375229, 'timestamp': '2025-10-02 00:17:52.124246', 'step': 4071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:52.178664', 'step': 4071, 'epoch': 1}
{'type': 'loss', 'content': 0.30511942505836487, 'timestamp': '2025-10-02 00:17:52.184694', 'step': 4072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:52.238453', 'step': 4072, 'epoch': 1}
{'type': 'loss', 'content': 0.04342297837138176, 'timestamp': '2025-10-02 00:17:52.247840', 'step': 4073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:52.302209', 'step': 4073, 'epoch': 1}
{'type': 'loss', 'content': 0.06301213055849075, 'timestamp': '2025-10-02 00:17:52.309657', 'step': 4074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:52.363813', 'step': 4074, 'epoch': 1}
{'type': 'loss', 'content': 0.10656291991472244, 'timestamp': '2025-10-02 00:17:52.366112', 'step': 4075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:52.420324', 'step': 4075, 'epoch': 1}
{'type': 'loss', 'content': 0.09982270747423172, 'timestamp': '2025-10-02 00:17:52.425998', 'step': 4076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:52.479194', 'step': 4076, 'epoch': 1}
{'type': 'loss', 'content': 0.02016729861497879, 'timestamp': '2025-10-02 00:17:52.489442', 'step': 4077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:17:52.551825', 'step': 4077, 'epoch': 1}
{'type': 'loss', 'content': 0.08307500183582306, 'timestamp': '2025-10-02 00:17:52.562309', 'step': 4078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:52.617213', 'step': 4078, 'epoch': 1}
{'type': 'loss', 'content': 0.04633018374443054, 'timestamp': '2025-10-02 00:17:52.626568', 'step': 4079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:52.682072', 'step': 4079, 'epoch': 1}
{'type': 'loss', 'content': 0.09438806027173996, 'timestamp': '2025-10-02 00:17:52.688293', 'step': 4080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:52.742386', 'step': 4080, 'epoch': 1}
{'type': 'loss', 'content': 0.06084899604320526, 'timestamp': '2025-10-02 00:17:52.751992', 'step': 4081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:52.806241', 'step': 4081, 'epoch': 1}
{'type': 'loss', 'content': 0.09590989351272583, 'timestamp': '2025-10-02 00:17:52.808693', 'step': 4082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:52.863123', 'step': 4082, 'epoch': 1}
{'type': 'loss', 'content': 0.04496666043996811, 'timestamp': '2025-10-02 00:17:52.865494', 'step': 4083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:52.919199', 'step': 4083, 'epoch': 1}
{'type': 'loss', 'content': 0.17568963766098022, 'timestamp': '2025-10-02 00:17:52.925039', 'step': 4084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:52.979192', 'step': 4084, 'epoch': 1}
{'type': 'loss', 'content': 0.0334969162940979, 'timestamp': '2025-10-02 00:17:52.981880', 'step': 4085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:53.037203', 'step': 4085, 'epoch': 1}
{'type': 'loss', 'content': 0.10672876238822937, 'timestamp': '2025-10-02 00:17:53.044788', 'step': 4086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:53.101131', 'step': 4086, 'epoch': 1}
{'type': 'loss', 'content': 0.10953682661056519, 'timestamp': '2025-10-02 00:17:53.103905', 'step': 4087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:53.161198', 'step': 4087, 'epoch': 1}
{'type': 'loss', 'content': 0.011710609309375286, 'timestamp': '2025-10-02 00:17:53.169626', 'step': 4088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:53.224895', 'step': 4088, 'epoch': 1}
{'type': 'loss', 'content': 0.07174449414014816, 'timestamp': '2025-10-02 00:17:53.230847', 'step': 4089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:53.301242', 'step': 4089, 'epoch': 1}
{'type': 'loss', 'content': 0.24493621289730072, 'timestamp': '2025-10-02 00:17:53.303851', 'step': 4090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:53.359925', 'step': 4090, 'epoch': 1}
{'type': 'loss', 'content': 0.07323014736175537, 'timestamp': '2025-10-02 00:17:53.363617', 'step': 4091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:53.420354', 'step': 4091, 'epoch': 1}
{'type': 'loss', 'content': 0.05504688620567322, 'timestamp': '2025-10-02 00:17:53.427566', 'step': 4092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:17:53.483864', 'step': 4092, 'epoch': 1}
{'type': 'loss', 'content': 0.18558762967586517, 'timestamp': '2025-10-02 00:17:53.486725', 'step': 4093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:53.543322', 'step': 4093, 'epoch': 1}
{'type': 'loss', 'content': 0.10197465866804123, 'timestamp': '2025-10-02 00:17:53.546701', 'step': 4094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:53.602322', 'step': 4094, 'epoch': 1}
{'type': 'loss', 'content': 0.11281430721282959, 'timestamp': '2025-10-02 00:17:53.606017', 'step': 4095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:53.664636', 'step': 4095, 'epoch': 1}
{'type': 'loss', 'content': 0.07019226253032684, 'timestamp': '2025-10-02 00:17:53.671157', 'step': 4096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:53.728473', 'step': 4096, 'epoch': 1}
{'type': 'loss', 'content': 0.008052581921219826, 'timestamp': '2025-10-02 00:17:53.734589', 'step': 4097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:53.790847', 'step': 4097, 'epoch': 1}
{'type': 'loss', 'content': 0.10113918781280518, 'timestamp': '2025-10-02 00:17:53.793988', 'step': 4098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:53.854854', 'step': 4098, 'epoch': 1}
{'type': 'loss', 'content': 0.05804870277643204, 'timestamp': '2025-10-02 00:17:53.865044', 'step': 4099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:53.921362', 'step': 4099, 'epoch': 1}
{'type': 'loss', 'content': 0.026349041610956192, 'timestamp': '2025-10-02 00:17:53.931489', 'step': 4100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:53.986979', 'step': 4100, 'epoch': 1}
{'type': 'loss', 'content': 0.08325658738613129, 'timestamp': '2025-10-02 00:17:53.989736', 'step': 4101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:54.045076', 'step': 4101, 'epoch': 1}
{'type': 'loss', 'content': 0.2242213934659958, 'timestamp': '2025-10-02 00:17:54.048240', 'step': 4102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:54.109086', 'step': 4102, 'epoch': 1}
{'type': 'loss', 'content': 0.15384510159492493, 'timestamp': '2025-10-02 00:17:54.119290', 'step': 4103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:54.176387', 'step': 4103, 'epoch': 1}
{'type': 'loss', 'content': 0.033564403653144836, 'timestamp': '2025-10-02 00:17:54.184675', 'step': 4104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:54.239809', 'step': 4104, 'epoch': 1}
{'type': 'loss', 'content': 0.08276195079088211, 'timestamp': '2025-10-02 00:17:54.247404', 'step': 4105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:54.303776', 'step': 4105, 'epoch': 1}
{'type': 'loss', 'content': 0.05875425413250923, 'timestamp': '2025-10-02 00:17:54.306625', 'step': 4106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:54.363261', 'step': 4106, 'epoch': 1}
{'type': 'loss', 'content': 0.06957408040761948, 'timestamp': '2025-10-02 00:17:54.366352', 'step': 4107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:54.422232', 'step': 4107, 'epoch': 1}
{'type': 'loss', 'content': 0.09332393854856491, 'timestamp': '2025-10-02 00:17:54.432559', 'step': 4108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:54.488679', 'step': 4108, 'epoch': 1}
{'type': 'loss', 'content': 0.04033035412430763, 'timestamp': '2025-10-02 00:17:54.491953', 'step': 4109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:54.547775', 'step': 4109, 'epoch': 1}
{'type': 'loss', 'content': 0.09383486211299896, 'timestamp': '2025-10-02 00:17:54.550938', 'step': 4110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:54.608195', 'step': 4110, 'epoch': 1}
{'type': 'loss', 'content': 0.18431515991687775, 'timestamp': '2025-10-02 00:17:54.611189', 'step': 4111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:54.666035', 'step': 4111, 'epoch': 1}
{'type': 'loss', 'content': 0.23691681027412415, 'timestamp': '2025-10-02 00:17:54.672508', 'step': 4112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:54.727587', 'step': 4112, 'epoch': 1}
{'type': 'loss', 'content': 0.08084419369697571, 'timestamp': '2025-10-02 00:17:54.730446', 'step': 4113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:54.786766', 'step': 4113, 'epoch': 1}
{'type': 'loss', 'content': 0.0653001144528389, 'timestamp': '2025-10-02 00:17:54.789753', 'step': 4114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:54.845170', 'step': 4114, 'epoch': 1}
{'type': 'loss', 'content': 0.11052227020263672, 'timestamp': '2025-10-02 00:17:54.848872', 'step': 4115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:54.904344', 'step': 4115, 'epoch': 1}
{'type': 'loss', 'content': 0.06672625243663788, 'timestamp': '2025-10-02 00:17:54.914475', 'step': 4116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:54.969919', 'step': 4116, 'epoch': 1}
{'type': 'loss', 'content': 0.10799390822649002, 'timestamp': '2025-10-02 00:17:54.980186', 'step': 4117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:55.037039', 'step': 4117, 'epoch': 1}
{'type': 'loss', 'content': 0.03439752012491226, 'timestamp': '2025-10-02 00:17:55.039237', 'step': 4118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:55.094105', 'step': 4118, 'epoch': 1}
{'type': 'loss', 'content': 0.18525627255439758, 'timestamp': '2025-10-02 00:17:55.096948', 'step': 4119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:55.153614', 'step': 4119, 'epoch': 1}
{'type': 'loss', 'content': 0.07035932689905167, 'timestamp': '2025-10-02 00:17:55.160195', 'step': 4120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:55.215587', 'step': 4120, 'epoch': 1}
{'type': 'loss', 'content': 0.0725225955247879, 'timestamp': '2025-10-02 00:17:55.217828', 'step': 4121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:55.272377', 'step': 4121, 'epoch': 1}
{'type': 'loss', 'content': 0.18559050559997559, 'timestamp': '2025-10-02 00:17:55.274943', 'step': 4122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:55.329731', 'step': 4122, 'epoch': 1}
{'type': 'loss', 'content': 0.16782625019550323, 'timestamp': '2025-10-02 00:17:55.332050', 'step': 4123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:17:55.386673', 'step': 4123, 'epoch': 1}
{'type': 'loss', 'content': 0.02808965928852558, 'timestamp': '2025-10-02 00:17:55.394862', 'step': 4124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:55.450711', 'step': 4124, 'epoch': 1}
{'type': 'loss', 'content': 0.15244972705841064, 'timestamp': '2025-10-02 00:17:55.452850', 'step': 4125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:55.511598', 'step': 4125, 'epoch': 1}
{'type': 'loss', 'content': 0.04417532682418823, 'timestamp': '2025-10-02 00:17:55.521789', 'step': 4126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:55.576370', 'step': 4126, 'epoch': 1}
{'type': 'loss', 'content': 0.08497081696987152, 'timestamp': '2025-10-02 00:17:55.578442', 'step': 4127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:55.633062', 'step': 4127, 'epoch': 1}
{'type': 'loss', 'content': 0.19472655653953552, 'timestamp': '2025-10-02 00:17:55.638962', 'step': 4128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:55.692711', 'step': 4128, 'epoch': 1}
{'type': 'loss', 'content': 0.06867075711488724, 'timestamp': '2025-10-02 00:17:55.694870', 'step': 4129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:55.749006', 'step': 4129, 'epoch': 1}
{'type': 'loss', 'content': 0.11604908108711243, 'timestamp': '2025-10-02 00:17:55.751195', 'step': 4130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:55.809857', 'step': 4130, 'epoch': 1}
{'type': 'loss', 'content': 0.026311874389648438, 'timestamp': '2025-10-02 00:17:55.820064', 'step': 4131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:55.884080', 'step': 4131, 'epoch': 1}
{'type': 'loss', 'content': 0.18386295437812805, 'timestamp': '2025-10-02 00:17:55.889929', 'step': 4132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:55.944192', 'step': 4132, 'epoch': 1}
{'type': 'loss', 'content': 0.10456344485282898, 'timestamp': '2025-10-02 00:17:55.946937', 'step': 4133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:56.002021', 'step': 4133, 'epoch': 1}
{'type': 'loss', 'content': 0.104378342628479, 'timestamp': '2025-10-02 00:17:56.004103', 'step': 4134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:56.058219', 'step': 4134, 'epoch': 1}
{'type': 'loss', 'content': 0.10225553810596466, 'timestamp': '2025-10-02 00:17:56.060746', 'step': 4135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:17:56.114837', 'step': 4135, 'epoch': 1}
{'type': 'loss', 'content': 0.21846672892570496, 'timestamp': '2025-10-02 00:17:56.121847', 'step': 4136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:56.176428', 'step': 4136, 'epoch': 1}
{'type': 'loss', 'content': 0.05393945053219795, 'timestamp': '2025-10-02 00:17:56.178737', 'step': 4137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:56.233409', 'step': 4137, 'epoch': 1}
{'type': 'loss', 'content': 0.03267476335167885, 'timestamp': '2025-10-02 00:17:56.239458', 'step': 4138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:56.293674', 'step': 4138, 'epoch': 1}
{'type': 'loss', 'content': 0.17083865404129028, 'timestamp': '2025-10-02 00:17:56.296153', 'step': 4139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:17:56.367957', 'step': 4139, 'epoch': 1}
{'type': 'loss', 'content': 0.057724226266145706, 'timestamp': '2025-10-02 00:17:56.381209', 'step': 4140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:56.437633', 'step': 4140, 'epoch': 1}
{'type': 'loss', 'content': 0.09488490223884583, 'timestamp': '2025-10-02 00:17:56.439914', 'step': 4141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:56.498326', 'step': 4141, 'epoch': 1}
{'type': 'loss', 'content': 0.04076176509261131, 'timestamp': '2025-10-02 00:17:56.508538', 'step': 4142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:56.568197', 'step': 4142, 'epoch': 1}
{'type': 'loss', 'content': 0.1733594536781311, 'timestamp': '2025-10-02 00:17:56.570462', 'step': 4143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:17:56.628171', 'step': 4143, 'epoch': 1}
{'type': 'loss', 'content': 0.08873884379863739, 'timestamp': '2025-10-02 00:17:56.633954', 'step': 4144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:56.692349', 'step': 4144, 'epoch': 1}
{'type': 'loss', 'content': 0.07357539981603622, 'timestamp': '2025-10-02 00:17:56.694626', 'step': 4145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:56.756727', 'step': 4145, 'epoch': 1}
{'type': 'loss', 'content': 0.07838800549507141, 'timestamp': '2025-10-02 00:17:56.758774', 'step': 4146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:56.815712', 'step': 4146, 'epoch': 1}
{'type': 'loss', 'content': 0.11474030464887619, 'timestamp': '2025-10-02 00:17:56.818421', 'step': 4147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:56.879189', 'step': 4147, 'epoch': 1}
{'type': 'loss', 'content': 0.16327768564224243, 'timestamp': '2025-10-02 00:17:56.885169', 'step': 4148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:56.938815', 'step': 4148, 'epoch': 1}
{'type': 'loss', 'content': 0.10141988843679428, 'timestamp': '2025-10-02 00:17:56.941051', 'step': 4149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:17:56.996148', 'step': 4149, 'epoch': 1}
{'type': 'loss', 'content': 0.17183834314346313, 'timestamp': '2025-10-02 00:17:56.998609', 'step': 4150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:57.057294', 'step': 4150, 'epoch': 1}
{'type': 'loss', 'content': 0.08802079409360886, 'timestamp': '2025-10-02 00:17:57.063152', 'step': 4151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:57.121787', 'step': 4151, 'epoch': 1}
{'type': 'loss', 'content': 0.07885736972093582, 'timestamp': '2025-10-02 00:17:57.132754', 'step': 4152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:57.187035', 'step': 4152, 'epoch': 1}
{'type': 'loss', 'content': 0.08050718903541565, 'timestamp': '2025-10-02 00:17:57.192590', 'step': 4153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:57.247204', 'step': 4153, 'epoch': 1}
{'type': 'loss', 'content': 0.05222350358963013, 'timestamp': '2025-10-02 00:17:57.253030', 'step': 4154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:57.308427', 'step': 4154, 'epoch': 1}
{'type': 'loss', 'content': 0.03567538037896156, 'timestamp': '2025-10-02 00:17:57.317984', 'step': 4155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:57.372351', 'step': 4155, 'epoch': 1}
{'type': 'loss', 'content': 0.05839601159095764, 'timestamp': '2025-10-02 00:17:57.378992', 'step': 4156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:17:57.432232', 'step': 4156, 'epoch': 1}
{'type': 'loss', 'content': 0.12765920162200928, 'timestamp': '2025-10-02 00:17:57.434480', 'step': 4157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:57.488310', 'step': 4157, 'epoch': 1}
{'type': 'loss', 'content': 0.14305682480335236, 'timestamp': '2025-10-02 00:17:57.490632', 'step': 4158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:57.544899', 'step': 4158, 'epoch': 1}
{'type': 'loss', 'content': 0.17824821174144745, 'timestamp': '2025-10-02 00:17:57.547361', 'step': 4159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:57.606190', 'step': 4159, 'epoch': 1}
{'type': 'loss', 'content': 0.1082332506775856, 'timestamp': '2025-10-02 00:17:57.617171', 'step': 4160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:17:57.671217', 'step': 4160, 'epoch': 1}
{'type': 'loss', 'content': 0.15974758565425873, 'timestamp': '2025-10-02 00:17:57.674113', 'step': 4161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:17:57.727511', 'step': 4161, 'epoch': 1}
{'type': 'loss', 'content': 0.13669896125793457, 'timestamp': '2025-10-02 00:17:57.730242', 'step': 4162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:17:57.784774', 'step': 4162, 'epoch': 1}
{'type': 'loss', 'content': 0.16165389120578766, 'timestamp': '2025-10-02 00:17:57.787049', 'step': 4163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:57.840510', 'step': 4163, 'epoch': 1}
{'type': 'loss', 'content': 0.13970927894115448, 'timestamp': '2025-10-02 00:17:57.846103', 'step': 4164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:57.899756', 'step': 4164, 'epoch': 1}
{'type': 'loss', 'content': 0.050905149430036545, 'timestamp': '2025-10-02 00:17:57.902471', 'step': 4165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:57.956141', 'step': 4165, 'epoch': 1}
{'type': 'loss', 'content': 0.15323427319526672, 'timestamp': '2025-10-02 00:17:57.958411', 'step': 4166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:58.012354', 'step': 4166, 'epoch': 1}
{'type': 'loss', 'content': 0.07252288609743118, 'timestamp': '2025-10-02 00:17:58.018242', 'step': 4167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:17:58.072596', 'step': 4167, 'epoch': 1}
{'type': 'loss', 'content': 0.03775739297270775, 'timestamp': '2025-10-02 00:17:58.078718', 'step': 4168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:58.132432', 'step': 4168, 'epoch': 1}
{'type': 'loss', 'content': 0.11721296608448029, 'timestamp': '2025-10-02 00:17:58.134961', 'step': 4169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:17:58.190499', 'step': 4169, 'epoch': 1}
{'type': 'loss', 'content': 0.038119640201330185, 'timestamp': '2025-10-02 00:17:58.200090', 'step': 4170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:17:58.263821', 'step': 4170, 'epoch': 1}
{'type': 'loss', 'content': 0.06168297678232193, 'timestamp': '2025-10-02 00:17:58.266340', 'step': 4171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:17:58.320509', 'step': 4171, 'epoch': 1}
{'type': 'loss', 'content': 0.042562875896692276, 'timestamp': '2025-10-02 00:17:58.330675', 'step': 4172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:17:58.391490', 'step': 4172, 'epoch': 1}
{'type': 'loss', 'content': 0.07669064402580261, 'timestamp': '2025-10-02 00:17:58.397592', 'step': 4173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:17:58.452268', 'step': 4173, 'epoch': 1}
{'type': 'loss', 'content': 0.21359005570411682, 'timestamp': '2025-10-02 00:17:58.454497', 'step': 4174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:17:58.513369', 'step': 4174, 'epoch': 1}
{'type': 'loss', 'content': 0.052021775394678116, 'timestamp': '2025-10-02 00:17:58.523595', 'step': 4175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:17:58.577606', 'step': 4175, 'epoch': 1}
{'type': 'loss', 'content': 0.06242566928267479, 'timestamp': '2025-10-02 00:17:58.584327', 'step': 4176, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:18:25.259673', 'step': 4176, 'epoch': 1}
{'type': 'pplx', 'content': 94.02621631526453, 'timestamp': '2025-10-02 00:18:25.265835', 'step': 4176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:25.324432', 'step': 4176, 'epoch': 1}
{'type': 'loss', 'content': 0.13041670620441437, 'timestamp': '2025-10-02 00:18:25.328054', 'step': 4177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:18:25.425674', 'step': 4177, 'epoch': 1}
{'type': 'loss', 'content': 0.06395106017589569, 'timestamp': '2025-10-02 00:18:25.439164', 'step': 4178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:25.501543', 'step': 4178, 'epoch': 1}
{'type': 'loss', 'content': 0.138701930642128, 'timestamp': '2025-10-02 00:18:25.517905', 'step': 4179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:25.586448', 'step': 4179, 'epoch': 1}
{'type': 'loss', 'content': 0.09293266385793686, 'timestamp': '2025-10-02 00:18:25.594594', 'step': 4180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:25.676540', 'step': 4180, 'epoch': 1}
{'type': 'loss', 'content': 0.07310966402292252, 'timestamp': '2025-10-02 00:18:25.690362', 'step': 4181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:25.771497', 'step': 4181, 'epoch': 1}
{'type': 'loss', 'content': 0.04261350631713867, 'timestamp': '2025-10-02 00:18:25.777294', 'step': 4182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:25.846181', 'step': 4182, 'epoch': 1}
{'type': 'loss', 'content': 0.08740313351154327, 'timestamp': '2025-10-02 00:18:25.851055', 'step': 4183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:25.918544', 'step': 4183, 'epoch': 1}
{'type': 'loss', 'content': 0.024959802627563477, 'timestamp': '2025-10-02 00:18:25.927408', 'step': 4184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:26.017698', 'step': 4184, 'epoch': 1}
{'type': 'loss', 'content': 0.020205102860927582, 'timestamp': '2025-10-02 00:18:26.029249', 'step': 4185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:26.102459', 'step': 4185, 'epoch': 1}
{'type': 'loss', 'content': 0.11135131120681763, 'timestamp': '2025-10-02 00:18:26.116223', 'step': 4186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:26.195380', 'step': 4186, 'epoch': 1}
{'type': 'loss', 'content': 0.06543931365013123, 'timestamp': '2025-10-02 00:18:26.207960', 'step': 4187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:26.277075', 'step': 4187, 'epoch': 1}
{'type': 'loss', 'content': 0.05077502503991127, 'timestamp': '2025-10-02 00:18:26.284753', 'step': 4188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:26.340895', 'step': 4188, 'epoch': 1}
{'type': 'loss', 'content': 0.24026882648468018, 'timestamp': '2025-10-02 00:18:26.353000', 'step': 4189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:26.428029', 'step': 4189, 'epoch': 1}
{'type': 'loss', 'content': 0.04364039748907089, 'timestamp': '2025-10-02 00:18:26.439428', 'step': 4190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:26.507775', 'step': 4190, 'epoch': 1}
{'type': 'loss', 'content': 0.09788311272859573, 'timestamp': '2025-10-02 00:18:26.519980', 'step': 4191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:26.606616', 'step': 4191, 'epoch': 1}
{'type': 'loss', 'content': 0.10880900174379349, 'timestamp': '2025-10-02 00:18:26.614048', 'step': 4192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:26.689693', 'step': 4192, 'epoch': 1}
{'type': 'loss', 'content': 0.045961689203977585, 'timestamp': '2025-10-02 00:18:26.694237', 'step': 4193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:18:26.781048', 'step': 4193, 'epoch': 1}
{'type': 'loss', 'content': 0.03283584490418434, 'timestamp': '2025-10-02 00:18:26.791893', 'step': 4194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:26.859441', 'step': 4194, 'epoch': 1}
{'type': 'loss', 'content': 0.21576808393001556, 'timestamp': '2025-10-02 00:18:26.870713', 'step': 4195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:26.948284', 'step': 4195, 'epoch': 1}
{'type': 'loss', 'content': 0.24396999180316925, 'timestamp': '2025-10-02 00:18:26.963819', 'step': 4196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:27.061857', 'step': 4196, 'epoch': 1}
{'type': 'loss', 'content': 0.024523913860321045, 'timestamp': '2025-10-02 00:18:27.075086', 'step': 4197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:27.144184', 'step': 4197, 'epoch': 1}
{'type': 'loss', 'content': 0.08948418498039246, 'timestamp': '2025-10-02 00:18:27.156196', 'step': 4198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:27.223021', 'step': 4198, 'epoch': 1}
{'type': 'loss', 'content': 0.05044400319457054, 'timestamp': '2025-10-02 00:18:27.228793', 'step': 4199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:27.305106', 'step': 4199, 'epoch': 1}
{'type': 'loss', 'content': 0.0645006000995636, 'timestamp': '2025-10-02 00:18:27.319699', 'step': 4200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:27.377499', 'step': 4200, 'epoch': 1}
{'type': 'loss', 'content': 0.12261486053466797, 'timestamp': '2025-10-02 00:18:27.380760', 'step': 4201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:18:27.482788', 'step': 4201, 'epoch': 1}
{'type': 'loss', 'content': 0.07467901706695557, 'timestamp': '2025-10-02 00:18:27.496563', 'step': 4202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:27.580919', 'step': 4202, 'epoch': 1}
{'type': 'loss', 'content': 0.13893412053585052, 'timestamp': '2025-10-02 00:18:27.594763', 'step': 4203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:27.666305', 'step': 4203, 'epoch': 1}
{'type': 'loss', 'content': 0.037683043628931046, 'timestamp': '2025-10-02 00:18:27.673227', 'step': 4204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:27.763474', 'step': 4204, 'epoch': 1}
{'type': 'loss', 'content': 0.13115836679935455, 'timestamp': '2025-10-02 00:18:27.775323', 'step': 4205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:27.859854', 'step': 4205, 'epoch': 1}
{'type': 'loss', 'content': 0.2422163486480713, 'timestamp': '2025-10-02 00:18:27.863765', 'step': 4206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:27.943888', 'step': 4206, 'epoch': 1}
{'type': 'loss', 'content': 0.10972224920988083, 'timestamp': '2025-10-02 00:18:27.955386', 'step': 4207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:28.031513', 'step': 4207, 'epoch': 1}
{'type': 'loss', 'content': 0.11431575566530228, 'timestamp': '2025-10-02 00:18:28.047800', 'step': 4208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:28.131179', 'step': 4208, 'epoch': 1}
{'type': 'loss', 'content': 0.13533636927604675, 'timestamp': '2025-10-02 00:18:28.145123', 'step': 4209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:28.221501', 'step': 4209, 'epoch': 1}
{'type': 'loss', 'content': 0.04652493819594383, 'timestamp': '2025-10-02 00:18:28.234492', 'step': 4210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:28.292138', 'step': 4210, 'epoch': 1}
{'type': 'loss', 'content': 0.06851450353860855, 'timestamp': '2025-10-02 00:18:28.306798', 'step': 4211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:28.389261', 'step': 4211, 'epoch': 1}
{'type': 'loss', 'content': 0.06142973527312279, 'timestamp': '2025-10-02 00:18:28.404915', 'step': 4212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:28.489847', 'step': 4212, 'epoch': 1}
{'type': 'loss', 'content': 0.11895353347063065, 'timestamp': '2025-10-02 00:18:28.494564', 'step': 4213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:28.578777', 'step': 4213, 'epoch': 1}
{'type': 'loss', 'content': 0.048766519874334335, 'timestamp': '2025-10-02 00:18:28.588598', 'step': 4214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:28.658264', 'step': 4214, 'epoch': 1}
{'type': 'loss', 'content': 0.09742919355630875, 'timestamp': '2025-10-02 00:18:28.662975', 'step': 4215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:28.771999', 'step': 4215, 'epoch': 1}
{'type': 'loss', 'content': 0.19241614639759064, 'timestamp': '2025-10-02 00:18:28.783694', 'step': 4216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:28.855242', 'step': 4216, 'epoch': 1}
{'type': 'loss', 'content': 0.05029573291540146, 'timestamp': '2025-10-02 00:18:28.860854', 'step': 4217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:18:28.923406', 'step': 4217, 'epoch': 1}
{'type': 'loss', 'content': 0.08159945160150528, 'timestamp': '2025-10-02 00:18:28.929342', 'step': 4218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:28.990614', 'step': 4218, 'epoch': 1}
{'type': 'loss', 'content': 0.04647888243198395, 'timestamp': '2025-10-02 00:18:29.000170', 'step': 4219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:29.096166', 'step': 4219, 'epoch': 1}
{'type': 'loss', 'content': 0.061270877718925476, 'timestamp': '2025-10-02 00:18:29.108570', 'step': 4220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:29.190864', 'step': 4220, 'epoch': 1}
{'type': 'loss', 'content': 0.1622040569782257, 'timestamp': '2025-10-02 00:18:29.196285', 'step': 4221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:29.277411', 'step': 4221, 'epoch': 1}
{'type': 'loss', 'content': 0.01780741661787033, 'timestamp': '2025-10-02 00:18:29.292291', 'step': 4222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:29.380707', 'step': 4222, 'epoch': 1}
{'type': 'loss', 'content': 0.016442904248833656, 'timestamp': '2025-10-02 00:18:29.395057', 'step': 4223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:29.480363', 'step': 4223, 'epoch': 1}
{'type': 'loss', 'content': 0.03472374752163887, 'timestamp': '2025-10-02 00:18:29.496012', 'step': 4224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:29.585167', 'step': 4224, 'epoch': 1}
{'type': 'loss', 'content': 0.08437097072601318, 'timestamp': '2025-10-02 00:18:29.591150', 'step': 4225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:29.673534', 'step': 4225, 'epoch': 1}
{'type': 'loss', 'content': 0.047098308801651, 'timestamp': '2025-10-02 00:18:29.685793', 'step': 4226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:29.776067', 'step': 4226, 'epoch': 1}
{'type': 'loss', 'content': 0.054906997829675674, 'timestamp': '2025-10-02 00:18:29.781049', 'step': 4227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:29.845963', 'step': 4227, 'epoch': 1}
{'type': 'loss', 'content': 0.040555696934461594, 'timestamp': '2025-10-02 00:18:29.853126', 'step': 4228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:29.915916', 'step': 4228, 'epoch': 1}
{'type': 'loss', 'content': 0.02813105843961239, 'timestamp': '2025-10-02 00:18:29.926399', 'step': 4229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:30.001995', 'step': 4229, 'epoch': 1}
{'type': 'loss', 'content': 0.11266452819108963, 'timestamp': '2025-10-02 00:18:30.006437', 'step': 4230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:30.099247', 'step': 4230, 'epoch': 1}
{'type': 'loss', 'content': 0.010664702393114567, 'timestamp': '2025-10-02 00:18:30.118360', 'step': 4231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:30.180956', 'step': 4231, 'epoch': 1}
{'type': 'loss', 'content': 0.06174960359930992, 'timestamp': '2025-10-02 00:18:30.189693', 'step': 4232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:30.266983', 'step': 4232, 'epoch': 1}
{'type': 'loss', 'content': 0.10654029250144958, 'timestamp': '2025-10-02 00:18:30.270607', 'step': 4233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:30.382444', 'step': 4233, 'epoch': 1}
{'type': 'loss', 'content': 0.07321479171514511, 'timestamp': '2025-10-02 00:18:30.386219', 'step': 4234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:30.469111', 'step': 4234, 'epoch': 1}
{'type': 'loss', 'content': 0.16417823731899261, 'timestamp': '2025-10-02 00:18:30.473014', 'step': 4235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:30.542037', 'step': 4235, 'epoch': 1}
{'type': 'loss', 'content': 0.038923703134059906, 'timestamp': '2025-10-02 00:18:30.558286', 'step': 4236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:30.629039', 'step': 4236, 'epoch': 1}
{'type': 'loss', 'content': 0.0378551185131073, 'timestamp': '2025-10-02 00:18:30.636724', 'step': 4237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:30.721646', 'step': 4237, 'epoch': 1}
{'type': 'loss', 'content': 0.08454187959432602, 'timestamp': '2025-10-02 00:18:30.732236', 'step': 4238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:30.814727', 'step': 4238, 'epoch': 1}
{'type': 'loss', 'content': 0.06906134635210037, 'timestamp': '2025-10-02 00:18:30.817636', 'step': 4239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:18:30.876728', 'step': 4239, 'epoch': 1}
{'type': 'loss', 'content': 0.1342245638370514, 'timestamp': '2025-10-02 00:18:30.895818', 'step': 4240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:30.987453', 'step': 4240, 'epoch': 1}
{'type': 'loss', 'content': 0.023488201200962067, 'timestamp': '2025-10-02 00:18:30.998359', 'step': 4241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:31.080155', 'step': 4241, 'epoch': 1}
{'type': 'loss', 'content': 0.04490877315402031, 'timestamp': '2025-10-02 00:18:31.092951', 'step': 4242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:18:31.183501', 'step': 4242, 'epoch': 1}
{'type': 'loss', 'content': 0.11765540391206741, 'timestamp': '2025-10-02 00:18:31.188542', 'step': 4243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:31.257459', 'step': 4243, 'epoch': 1}
{'type': 'loss', 'content': 0.1747554987668991, 'timestamp': '2025-10-02 00:18:31.272328', 'step': 4244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:31.355440', 'step': 4244, 'epoch': 1}
{'type': 'loss', 'content': 0.11868441104888916, 'timestamp': '2025-10-02 00:18:31.367018', 'step': 4245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:31.452364', 'step': 4245, 'epoch': 1}
{'type': 'loss', 'content': 0.1333232969045639, 'timestamp': '2025-10-02 00:18:31.462808', 'step': 4246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:31.552838', 'step': 4246, 'epoch': 1}
{'type': 'loss', 'content': 0.07192012667655945, 'timestamp': '2025-10-02 00:18:31.557716', 'step': 4247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:31.642716', 'step': 4247, 'epoch': 1}
{'type': 'loss', 'content': 0.0726773589849472, 'timestamp': '2025-10-02 00:18:31.650045', 'step': 4248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:31.719358', 'step': 4248, 'epoch': 1}
{'type': 'loss', 'content': 0.07570331543684006, 'timestamp': '2025-10-02 00:18:31.730757', 'step': 4249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:18:31.798405', 'step': 4249, 'epoch': 1}
{'type': 'loss', 'content': 0.06694532185792923, 'timestamp': '2025-10-02 00:18:31.802870', 'step': 4250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:31.876503', 'step': 4250, 'epoch': 1}
{'type': 'loss', 'content': 0.13716182112693787, 'timestamp': '2025-10-02 00:18:31.881292', 'step': 4251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:31.941165', 'step': 4251, 'epoch': 1}
{'type': 'loss', 'content': 0.1378314197063446, 'timestamp': '2025-10-02 00:18:31.948923', 'step': 4252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:32.015110', 'step': 4252, 'epoch': 1}
{'type': 'loss', 'content': 0.08099876344203949, 'timestamp': '2025-10-02 00:18:32.020118', 'step': 4253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:32.078626', 'step': 4253, 'epoch': 1}
{'type': 'loss', 'content': 0.12157396227121353, 'timestamp': '2025-10-02 00:18:32.092842', 'step': 4254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:32.181409', 'step': 4254, 'epoch': 1}
{'type': 'loss', 'content': 0.03815361484885216, 'timestamp': '2025-10-02 00:18:32.192540', 'step': 4255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:32.284958', 'step': 4255, 'epoch': 1}
{'type': 'loss', 'content': 0.21135313808918, 'timestamp': '2025-10-02 00:18:32.292074', 'step': 4256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:32.378371', 'step': 4256, 'epoch': 1}
{'type': 'loss', 'content': 0.22605256736278534, 'timestamp': '2025-10-02 00:18:32.380900', 'step': 4257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:32.437574', 'step': 4257, 'epoch': 1}
{'type': 'loss', 'content': 0.037596799433231354, 'timestamp': '2025-10-02 00:18:32.450664', 'step': 4258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:18:32.560268', 'step': 4258, 'epoch': 1}
{'type': 'loss', 'content': 0.043391671031713486, 'timestamp': '2025-10-02 00:18:32.572567', 'step': 4259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:32.674906', 'step': 4259, 'epoch': 1}
{'type': 'loss', 'content': 0.1339312493801117, 'timestamp': '2025-10-02 00:18:32.683051', 'step': 4260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:18:32.758356', 'step': 4260, 'epoch': 1}
{'type': 'loss', 'content': 0.04175139591097832, 'timestamp': '2025-10-02 00:18:32.773000', 'step': 4261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:32.869276', 'step': 4261, 'epoch': 1}
{'type': 'loss', 'content': 0.03711753338575363, 'timestamp': '2025-10-02 00:18:32.879353', 'step': 4262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:32.960905', 'step': 4262, 'epoch': 1}
{'type': 'loss', 'content': 0.04780609905719757, 'timestamp': '2025-10-02 00:18:32.970441', 'step': 4263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:33.040320', 'step': 4263, 'epoch': 1}
{'type': 'loss', 'content': 0.014872836880385876, 'timestamp': '2025-10-02 00:18:33.050972', 'step': 4264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:18:33.123934', 'step': 4264, 'epoch': 1}
{'type': 'loss', 'content': 0.05270377919077873, 'timestamp': '2025-10-02 00:18:33.135867', 'step': 4265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:18:33.222252', 'step': 4265, 'epoch': 1}
{'type': 'loss', 'content': 0.03480255976319313, 'timestamp': '2025-10-02 00:18:33.234819', 'step': 4266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:33.307572', 'step': 4266, 'epoch': 1}
{'type': 'loss', 'content': 0.05918412283062935, 'timestamp': '2025-10-02 00:18:33.314023', 'step': 4267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:33.387164', 'step': 4267, 'epoch': 1}
{'type': 'loss', 'content': 0.07142752408981323, 'timestamp': '2025-10-02 00:18:33.394472', 'step': 4268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:18:33.458537', 'step': 4268, 'epoch': 1}
{'type': 'loss', 'content': 0.1979026198387146, 'timestamp': '2025-10-02 00:18:33.468054', 'step': 4269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:18:33.550164', 'step': 4269, 'epoch': 1}
{'type': 'loss', 'content': 0.050401195883750916, 'timestamp': '2025-10-02 00:18:33.561260', 'step': 4270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:33.642149', 'step': 4270, 'epoch': 1}
{'type': 'loss', 'content': 0.09077353775501251, 'timestamp': '2025-10-02 00:18:33.646487', 'step': 4271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:33.717617', 'step': 4271, 'epoch': 1}
{'type': 'loss', 'content': 0.05582122132182121, 'timestamp': '2025-10-02 00:18:33.725959', 'step': 4272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:33.796674', 'step': 4272, 'epoch': 1}
{'type': 'loss', 'content': 0.26632726192474365, 'timestamp': '2025-10-02 00:18:33.804939', 'step': 4273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:18:33.883638', 'step': 4273, 'epoch': 1}
{'type': 'loss', 'content': 0.03431333601474762, 'timestamp': '2025-10-02 00:18:33.894525', 'step': 4274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:33.972167', 'step': 4274, 'epoch': 1}
{'type': 'loss', 'content': 0.12070077657699585, 'timestamp': '2025-10-02 00:18:33.980215', 'step': 4275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:34.049194', 'step': 4275, 'epoch': 1}
{'type': 'loss', 'content': 0.047118548303842545, 'timestamp': '2025-10-02 00:18:34.062726', 'step': 4276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:34.135076', 'step': 4276, 'epoch': 1}
{'type': 'loss', 'content': 0.03703096881508827, 'timestamp': '2025-10-02 00:18:34.138661', 'step': 4277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:34.204664', 'step': 4277, 'epoch': 1}
{'type': 'loss', 'content': 0.2859356105327606, 'timestamp': '2025-10-02 00:18:34.207537', 'step': 4278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:34.281508', 'step': 4278, 'epoch': 1}
{'type': 'loss', 'content': 0.12641119956970215, 'timestamp': '2025-10-02 00:18:34.284706', 'step': 4279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:34.360191', 'step': 4279, 'epoch': 1}
{'type': 'loss', 'content': 0.21399199962615967, 'timestamp': '2025-10-02 00:18:34.371477', 'step': 4280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:34.447854', 'step': 4280, 'epoch': 1}
{'type': 'loss', 'content': 0.07902386784553528, 'timestamp': '2025-10-02 00:18:34.459305', 'step': 4281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:18:34.552978', 'step': 4281, 'epoch': 1}
{'type': 'loss', 'content': 0.02756362222135067, 'timestamp': '2025-10-02 00:18:34.566695', 'step': 4282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:34.635744', 'step': 4282, 'epoch': 1}
{'type': 'loss', 'content': 0.14970752596855164, 'timestamp': '2025-10-02 00:18:34.639998', 'step': 4283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:34.697248', 'step': 4283, 'epoch': 1}
{'type': 'loss', 'content': 0.10109162330627441, 'timestamp': '2025-10-02 00:18:34.703606', 'step': 4284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:34.763437', 'step': 4284, 'epoch': 1}
{'type': 'loss', 'content': 0.20474039018154144, 'timestamp': '2025-10-02 00:18:34.767745', 'step': 4285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:34.829261', 'step': 4285, 'epoch': 1}
{'type': 'loss', 'content': 0.025695718824863434, 'timestamp': '2025-10-02 00:18:34.832552', 'step': 4286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:34.891076', 'step': 4286, 'epoch': 1}
{'type': 'loss', 'content': 0.11121883988380432, 'timestamp': '2025-10-02 00:18:34.900307', 'step': 4287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:34.976980', 'step': 4287, 'epoch': 1}
{'type': 'loss', 'content': 0.06190448999404907, 'timestamp': '2025-10-02 00:18:34.987455', 'step': 4288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:35.067814', 'step': 4288, 'epoch': 1}
{'type': 'loss', 'content': 0.15183959901332855, 'timestamp': '2025-10-02 00:18:35.077726', 'step': 4289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:35.139609', 'step': 4289, 'epoch': 1}
{'type': 'loss', 'content': 0.08619947731494904, 'timestamp': '2025-10-02 00:18:35.143596', 'step': 4290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:35.204374', 'step': 4290, 'epoch': 1}
{'type': 'loss', 'content': 0.0769946351647377, 'timestamp': '2025-10-02 00:18:35.207587', 'step': 4291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:35.276070', 'step': 4291, 'epoch': 1}
{'type': 'loss', 'content': 0.03349389135837555, 'timestamp': '2025-10-02 00:18:35.286303', 'step': 4292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:18:35.358853', 'step': 4292, 'epoch': 1}
{'type': 'loss', 'content': 0.06110599637031555, 'timestamp': '2025-10-02 00:18:35.370607', 'step': 4293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:35.435244', 'step': 4293, 'epoch': 1}
{'type': 'loss', 'content': 0.17275342345237732, 'timestamp': '2025-10-02 00:18:35.438690', 'step': 4294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:35.504060', 'step': 4294, 'epoch': 1}
{'type': 'loss', 'content': 0.04908932000398636, 'timestamp': '2025-10-02 00:18:35.513670', 'step': 4295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:35.575245', 'step': 4295, 'epoch': 1}
{'type': 'loss', 'content': 0.02383771911263466, 'timestamp': '2025-10-02 00:18:35.583604', 'step': 4296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:35.646520', 'step': 4296, 'epoch': 1}
{'type': 'loss', 'content': 0.09139447659254074, 'timestamp': '2025-10-02 00:18:35.650126', 'step': 4297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:18:35.726166', 'step': 4297, 'epoch': 1}
{'type': 'loss', 'content': 0.029509613290429115, 'timestamp': '2025-10-02 00:18:35.738448', 'step': 4298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:35.802657', 'step': 4298, 'epoch': 1}
{'type': 'loss', 'content': 0.11273933947086334, 'timestamp': '2025-10-02 00:18:35.808550', 'step': 4299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:35.871669', 'step': 4299, 'epoch': 1}
{'type': 'loss', 'content': 0.05753738805651665, 'timestamp': '2025-10-02 00:18:35.882062', 'step': 4300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:35.941767', 'step': 4300, 'epoch': 1}
{'type': 'loss', 'content': 0.07386419922113419, 'timestamp': '2025-10-02 00:18:35.952955', 'step': 4301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:36.011381', 'step': 4301, 'epoch': 1}
{'type': 'loss', 'content': 0.07711391896009445, 'timestamp': '2025-10-02 00:18:36.020116', 'step': 4302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:36.092947', 'step': 4302, 'epoch': 1}
{'type': 'loss', 'content': 0.035637401044368744, 'timestamp': '2025-10-02 00:18:36.103309', 'step': 4303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:36.212038', 'step': 4303, 'epoch': 1}
{'type': 'loss', 'content': 0.04397348687052727, 'timestamp': '2025-10-02 00:18:36.228128', 'step': 4304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:36.300404', 'step': 4304, 'epoch': 1}
{'type': 'loss', 'content': 0.06268526613712311, 'timestamp': '2025-10-02 00:18:36.308025', 'step': 4305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:36.369128', 'step': 4305, 'epoch': 1}
{'type': 'loss', 'content': 0.17868725955486298, 'timestamp': '2025-10-02 00:18:36.374378', 'step': 4306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:36.432145', 'step': 4306, 'epoch': 1}
{'type': 'loss', 'content': 0.05346003919839859, 'timestamp': '2025-10-02 00:18:36.439648', 'step': 4307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:36.517771', 'step': 4307, 'epoch': 1}
{'type': 'loss', 'content': 0.05576137453317642, 'timestamp': '2025-10-02 00:18:36.529286', 'step': 4308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:36.595494', 'step': 4308, 'epoch': 1}
{'type': 'loss', 'content': 0.06307864934206009, 'timestamp': '2025-10-02 00:18:36.607066', 'step': 4309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:36.665439', 'step': 4309, 'epoch': 1}
{'type': 'loss', 'content': 0.15594889223575592, 'timestamp': '2025-10-02 00:18:36.669155', 'step': 4310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:36.728747', 'step': 4310, 'epoch': 1}
{'type': 'loss', 'content': 0.06471937894821167, 'timestamp': '2025-10-02 00:18:36.743583', 'step': 4311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:36.807155', 'step': 4311, 'epoch': 1}
{'type': 'loss', 'content': 0.06885649263858795, 'timestamp': '2025-10-02 00:18:36.813885', 'step': 4312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:36.891835', 'step': 4312, 'epoch': 1}
{'type': 'loss', 'content': 0.02796471118927002, 'timestamp': '2025-10-02 00:18:36.903425', 'step': 4313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:36.967994', 'step': 4313, 'epoch': 1}
{'type': 'loss', 'content': 0.05455309897661209, 'timestamp': '2025-10-02 00:18:36.973723', 'step': 4314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:18:37.041164', 'step': 4314, 'epoch': 1}
{'type': 'loss', 'content': 0.19270144402980804, 'timestamp': '2025-10-02 00:18:37.045890', 'step': 4315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:18:37.122800', 'step': 4315, 'epoch': 1}
{'type': 'loss', 'content': 0.03260960057377815, 'timestamp': '2025-10-02 00:18:37.145211', 'step': 4316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:37.207017', 'step': 4316, 'epoch': 1}
{'type': 'loss', 'content': 0.07418879866600037, 'timestamp': '2025-10-02 00:18:37.212868', 'step': 4317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:37.298810', 'step': 4317, 'epoch': 1}
{'type': 'loss', 'content': 0.04717758670449257, 'timestamp': '2025-10-02 00:18:37.306410', 'step': 4318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:37.373568', 'step': 4318, 'epoch': 1}
{'type': 'loss', 'content': 0.03683651611208916, 'timestamp': '2025-10-02 00:18:37.384338', 'step': 4319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:37.449541', 'step': 4319, 'epoch': 1}
{'type': 'loss', 'content': 0.0542680062353611, 'timestamp': '2025-10-02 00:18:37.460698', 'step': 4320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:37.521163', 'step': 4320, 'epoch': 1}
{'type': 'loss', 'content': 0.16278454661369324, 'timestamp': '2025-10-02 00:18:37.525475', 'step': 4321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:37.586950', 'step': 4321, 'epoch': 1}
{'type': 'loss', 'content': 0.03504227101802826, 'timestamp': '2025-10-02 00:18:37.592856', 'step': 4322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:37.652380', 'step': 4322, 'epoch': 1}
{'type': 'loss', 'content': 0.050541799515485764, 'timestamp': '2025-10-02 00:18:37.662801', 'step': 4323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:37.718964', 'step': 4323, 'epoch': 1}
{'type': 'loss', 'content': 0.06225110962986946, 'timestamp': '2025-10-02 00:18:37.729126', 'step': 4324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:37.783030', 'step': 4324, 'epoch': 1}
{'type': 'loss', 'content': 0.18792882561683655, 'timestamp': '2025-10-02 00:18:37.788702', 'step': 4325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:37.845891', 'step': 4325, 'epoch': 1}
{'type': 'loss', 'content': 0.11167807132005692, 'timestamp': '2025-10-02 00:18:37.849395', 'step': 4326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:37.904128', 'step': 4326, 'epoch': 1}
{'type': 'loss', 'content': 0.14624467492103577, 'timestamp': '2025-10-02 00:18:37.907339', 'step': 4327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:37.961222', 'step': 4327, 'epoch': 1}
{'type': 'loss', 'content': 0.1467433124780655, 'timestamp': '2025-10-02 00:18:37.967846', 'step': 4328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:38.022587', 'step': 4328, 'epoch': 1}
{'type': 'loss', 'content': 0.074467733502388, 'timestamp': '2025-10-02 00:18:38.025090', 'step': 4329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:38.083905', 'step': 4329, 'epoch': 1}
{'type': 'loss', 'content': 0.03994179517030716, 'timestamp': '2025-10-02 00:18:38.094384', 'step': 4330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:38.150227', 'step': 4330, 'epoch': 1}
{'type': 'loss', 'content': 0.04936588928103447, 'timestamp': '2025-10-02 00:18:38.159514', 'step': 4331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:38.215685', 'step': 4331, 'epoch': 1}
{'type': 'loss', 'content': 0.02050771750509739, 'timestamp': '2025-10-02 00:18:38.222199', 'step': 4332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:38.276505', 'step': 4332, 'epoch': 1}
{'type': 'loss', 'content': 0.12844222784042358, 'timestamp': '2025-10-02 00:18:38.278924', 'step': 4333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:38.339375', 'step': 4333, 'epoch': 1}
{'type': 'loss', 'content': 0.09330558776855469, 'timestamp': '2025-10-02 00:18:38.349785', 'step': 4334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:38.408048', 'step': 4334, 'epoch': 1}
{'type': 'loss', 'content': 0.02350914292037487, 'timestamp': '2025-10-02 00:18:38.417837', 'step': 4335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:38.479548', 'step': 4335, 'epoch': 1}
{'type': 'loss', 'content': 0.0444403812289238, 'timestamp': '2025-10-02 00:18:38.490467', 'step': 4336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:38.553128', 'step': 4336, 'epoch': 1}
{'type': 'loss', 'content': 0.11879438161849976, 'timestamp': '2025-10-02 00:18:38.562814', 'step': 4337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:38.624565', 'step': 4337, 'epoch': 1}
{'type': 'loss', 'content': 0.239052876830101, 'timestamp': '2025-10-02 00:18:38.636672', 'step': 4338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:38.696803', 'step': 4338, 'epoch': 1}
{'type': 'loss', 'content': 0.061338525265455246, 'timestamp': '2025-10-02 00:18:38.704232', 'step': 4339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:38.777571', 'step': 4339, 'epoch': 1}
{'type': 'loss', 'content': 0.1082676500082016, 'timestamp': '2025-10-02 00:18:38.784943', 'step': 4340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:38.856180', 'step': 4340, 'epoch': 1}
{'type': 'loss', 'content': 0.0757068619132042, 'timestamp': '2025-10-02 00:18:38.865290', 'step': 4341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:38.926848', 'step': 4341, 'epoch': 1}
{'type': 'loss', 'content': 0.06208929792046547, 'timestamp': '2025-10-02 00:18:38.934509', 'step': 4342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:38.996318', 'step': 4342, 'epoch': 1}
{'type': 'loss', 'content': 0.07755237072706223, 'timestamp': '2025-10-02 00:18:38.999724', 'step': 4343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:39.068794', 'step': 4343, 'epoch': 1}
{'type': 'loss', 'content': 0.14835691452026367, 'timestamp': '2025-10-02 00:18:39.076004', 'step': 4344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:39.138440', 'step': 4344, 'epoch': 1}
{'type': 'loss', 'content': 0.0656128004193306, 'timestamp': '2025-10-02 00:18:39.141588', 'step': 4345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:39.204028', 'step': 4345, 'epoch': 1}
{'type': 'loss', 'content': 0.1743718534708023, 'timestamp': '2025-10-02 00:18:39.207200', 'step': 4346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:39.263962', 'step': 4346, 'epoch': 1}
{'type': 'loss', 'content': 0.09515012055635452, 'timestamp': '2025-10-02 00:18:39.268491', 'step': 4347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:39.326481', 'step': 4347, 'epoch': 1}
{'type': 'loss', 'content': 0.035580456256866455, 'timestamp': '2025-10-02 00:18:39.337100', 'step': 4348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:39.405795', 'step': 4348, 'epoch': 1}
{'type': 'loss', 'content': 0.07351355999708176, 'timestamp': '2025-10-02 00:18:39.416252', 'step': 4349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:39.477732', 'step': 4349, 'epoch': 1}
{'type': 'loss', 'content': 0.05458497256040573, 'timestamp': '2025-10-02 00:18:39.481038', 'step': 4350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:39.543705', 'step': 4350, 'epoch': 1}
{'type': 'loss', 'content': 0.09869987517595291, 'timestamp': '2025-10-02 00:18:39.547595', 'step': 4351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:39.617537', 'step': 4351, 'epoch': 1}
{'type': 'loss', 'content': 0.039232250303030014, 'timestamp': '2025-10-02 00:18:39.629049', 'step': 4352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:18:39.712763', 'step': 4352, 'epoch': 1}
{'type': 'loss', 'content': 0.06588302552700043, 'timestamp': '2025-10-02 00:18:39.726373', 'step': 4353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:39.786106', 'step': 4353, 'epoch': 1}
{'type': 'loss', 'content': 0.053917787969112396, 'timestamp': '2025-10-02 00:18:39.790350', 'step': 4354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:18:39.888106', 'step': 4354, 'epoch': 1}
{'type': 'loss', 'content': 0.04679447039961815, 'timestamp': '2025-10-02 00:18:39.902132', 'step': 4355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:39.972355', 'step': 4355, 'epoch': 1}
{'type': 'loss', 'content': 0.05503624677658081, 'timestamp': '2025-10-02 00:18:39.980176', 'step': 4356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:40.047430', 'step': 4356, 'epoch': 1}
{'type': 'loss', 'content': 0.1309552639722824, 'timestamp': '2025-10-02 00:18:40.051448', 'step': 4357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:40.118744', 'step': 4357, 'epoch': 1}
{'type': 'loss', 'content': 0.020144358277320862, 'timestamp': '2025-10-02 00:18:40.124796', 'step': 4358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:18:40.211980', 'step': 4358, 'epoch': 1}
{'type': 'loss', 'content': 0.03579510375857353, 'timestamp': '2025-10-02 00:18:40.224554', 'step': 4359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:18:40.309159', 'step': 4359, 'epoch': 1}
{'type': 'loss', 'content': 0.03517172485589981, 'timestamp': '2025-10-02 00:18:40.323906', 'step': 4360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:40.386847', 'step': 4360, 'epoch': 1}
{'type': 'loss', 'content': 0.07856934517621994, 'timestamp': '2025-10-02 00:18:40.390690', 'step': 4361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:40.454135', 'step': 4361, 'epoch': 1}
{'type': 'loss', 'content': 0.052853215485811234, 'timestamp': '2025-10-02 00:18:40.461937', 'step': 4362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:40.525414', 'step': 4362, 'epoch': 1}
{'type': 'loss', 'content': 0.1399354636669159, 'timestamp': '2025-10-02 00:18:40.529658', 'step': 4363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:40.591412', 'step': 4363, 'epoch': 1}
{'type': 'loss', 'content': 0.03240962326526642, 'timestamp': '2025-10-02 00:18:40.602152', 'step': 4364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:40.664821', 'step': 4364, 'epoch': 1}
{'type': 'loss', 'content': 0.18793319165706635, 'timestamp': '2025-10-02 00:18:40.668073', 'step': 4365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:40.725402', 'step': 4365, 'epoch': 1}
{'type': 'loss', 'content': 0.04679686203598976, 'timestamp': '2025-10-02 00:18:40.730029', 'step': 4366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:40.796065', 'step': 4366, 'epoch': 1}
{'type': 'loss', 'content': 0.19194431602954865, 'timestamp': '2025-10-02 00:18:40.799735', 'step': 4367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:40.877987', 'step': 4367, 'epoch': 1}
{'type': 'loss', 'content': 0.0475916787981987, 'timestamp': '2025-10-02 00:18:40.889514', 'step': 4368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:40.956696', 'step': 4368, 'epoch': 1}
{'type': 'loss', 'content': 0.3281177282333374, 'timestamp': '2025-10-02 00:18:40.960111', 'step': 4369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:41.040799', 'step': 4369, 'epoch': 1}
{'type': 'loss', 'content': 0.07713641971349716, 'timestamp': '2025-10-02 00:18:41.051505', 'step': 4370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:18:41.116243', 'step': 4370, 'epoch': 1}
{'type': 'loss', 'content': 0.04677141085267067, 'timestamp': '2025-10-02 00:18:41.127301', 'step': 4371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:41.198279', 'step': 4371, 'epoch': 1}
{'type': 'loss', 'content': 0.0738847479224205, 'timestamp': '2025-10-02 00:18:41.210026', 'step': 4372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:41.274296', 'step': 4372, 'epoch': 1}
{'type': 'loss', 'content': 0.06612637639045715, 'timestamp': '2025-10-02 00:18:41.285495', 'step': 4373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:41.349111', 'step': 4373, 'epoch': 1}
{'type': 'loss', 'content': 0.0758618488907814, 'timestamp': '2025-10-02 00:18:41.357997', 'step': 4374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:41.429796', 'step': 4374, 'epoch': 1}
{'type': 'loss', 'content': 0.10390085726976395, 'timestamp': '2025-10-02 00:18:41.435565', 'step': 4375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:41.498411', 'step': 4375, 'epoch': 1}
{'type': 'loss', 'content': 0.037964046001434326, 'timestamp': '2025-10-02 00:18:41.512823', 'step': 4376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:41.583218', 'step': 4376, 'epoch': 1}
{'type': 'loss', 'content': 0.12575101852416992, 'timestamp': '2025-10-02 00:18:41.586332', 'step': 4377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:41.650418', 'step': 4377, 'epoch': 1}
{'type': 'loss', 'content': 0.043600745499134064, 'timestamp': '2025-10-02 00:18:41.653393', 'step': 4378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:41.716130', 'step': 4378, 'epoch': 1}
{'type': 'loss', 'content': 0.09466694295406342, 'timestamp': '2025-10-02 00:18:41.719598', 'step': 4379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:41.776884', 'step': 4379, 'epoch': 1}
{'type': 'loss', 'content': 0.027671895921230316, 'timestamp': '2025-10-02 00:18:41.787144', 'step': 4380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:41.849305', 'step': 4380, 'epoch': 1}
{'type': 'loss', 'content': 0.08595909178256989, 'timestamp': '2025-10-02 00:18:41.852874', 'step': 4381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:41.910410', 'step': 4381, 'epoch': 1}
{'type': 'loss', 'content': 0.1714412122964859, 'timestamp': '2025-10-02 00:18:41.921460', 'step': 4382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:41.978256', 'step': 4382, 'epoch': 1}
{'type': 'loss', 'content': 0.08933219313621521, 'timestamp': '2025-10-02 00:18:41.981980', 'step': 4383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:42.038402', 'step': 4383, 'epoch': 1}
{'type': 'loss', 'content': 0.27307337522506714, 'timestamp': '2025-10-02 00:18:42.045585', 'step': 4384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:18:42.101495', 'step': 4384, 'epoch': 1}
{'type': 'loss', 'content': 0.131108358502388, 'timestamp': '2025-10-02 00:18:42.110592', 'step': 4385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:42.176373', 'step': 4385, 'epoch': 1}
{'type': 'loss', 'content': 0.03847294673323631, 'timestamp': '2025-10-02 00:18:42.179127', 'step': 4386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:42.245686', 'step': 4386, 'epoch': 1}
{'type': 'loss', 'content': 0.10196947306394577, 'timestamp': '2025-10-02 00:18:42.249239', 'step': 4387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:42.307065', 'step': 4387, 'epoch': 1}
{'type': 'loss', 'content': 0.0538611076772213, 'timestamp': '2025-10-02 00:18:42.313840', 'step': 4388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:42.378913', 'step': 4388, 'epoch': 1}
{'type': 'loss', 'content': 0.04199293628334999, 'timestamp': '2025-10-02 00:18:42.389428', 'step': 4389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:42.451294', 'step': 4389, 'epoch': 1}
{'type': 'loss', 'content': 0.10560327023267746, 'timestamp': '2025-10-02 00:18:42.457253', 'step': 4390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:42.513164', 'step': 4390, 'epoch': 1}
{'type': 'loss', 'content': 0.05299507826566696, 'timestamp': '2025-10-02 00:18:42.522963', 'step': 4391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:42.582540', 'step': 4391, 'epoch': 1}
{'type': 'loss', 'content': 0.09335175156593323, 'timestamp': '2025-10-02 00:18:42.589403', 'step': 4392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:42.644457', 'step': 4392, 'epoch': 1}
{'type': 'loss', 'content': 0.06882425397634506, 'timestamp': '2025-10-02 00:18:42.646858', 'step': 4393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:42.708358', 'step': 4393, 'epoch': 1}
{'type': 'loss', 'content': 0.034451983869075775, 'timestamp': '2025-10-02 00:18:42.717828', 'step': 4394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:42.774933', 'step': 4394, 'epoch': 1}
{'type': 'loss', 'content': 0.12168209254741669, 'timestamp': '2025-10-02 00:18:42.778486', 'step': 4395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:42.843060', 'step': 4395, 'epoch': 1}
{'type': 'loss', 'content': 0.06470950692892075, 'timestamp': '2025-10-02 00:18:42.851288', 'step': 4396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:42.905676', 'step': 4396, 'epoch': 1}
{'type': 'loss', 'content': 0.0329471230506897, 'timestamp': '2025-10-02 00:18:42.916130', 'step': 4397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:42.971882', 'step': 4397, 'epoch': 1}
{'type': 'loss', 'content': 0.07927004992961884, 'timestamp': '2025-10-02 00:18:42.975531', 'step': 4398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:43.034617', 'step': 4398, 'epoch': 1}
{'type': 'loss', 'content': 0.03796452283859253, 'timestamp': '2025-10-02 00:18:43.037717', 'step': 4399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:43.095243', 'step': 4399, 'epoch': 1}
{'type': 'loss', 'content': 0.13561120629310608, 'timestamp': '2025-10-02 00:18:43.102212', 'step': 4400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:43.158891', 'step': 4400, 'epoch': 1}
{'type': 'loss', 'content': 0.12120275944471359, 'timestamp': '2025-10-02 00:18:43.161235', 'step': 4401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:43.220744', 'step': 4401, 'epoch': 1}
{'type': 'loss', 'content': 0.18302029371261597, 'timestamp': '2025-10-02 00:18:43.223234', 'step': 4402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:43.279949', 'step': 4402, 'epoch': 1}
{'type': 'loss', 'content': 0.086147241294384, 'timestamp': '2025-10-02 00:18:43.283354', 'step': 4403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:43.348252', 'step': 4403, 'epoch': 1}
{'type': 'loss', 'content': 0.14330586791038513, 'timestamp': '2025-10-02 00:18:43.355439', 'step': 4404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:43.410649', 'step': 4404, 'epoch': 1}
{'type': 'loss', 'content': 0.06002004072070122, 'timestamp': '2025-10-02 00:18:43.420041', 'step': 4405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:43.479693', 'step': 4405, 'epoch': 1}
{'type': 'loss', 'content': 0.0657101720571518, 'timestamp': '2025-10-02 00:18:43.482452', 'step': 4406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:43.539176', 'step': 4406, 'epoch': 1}
{'type': 'loss', 'content': 0.05580679327249527, 'timestamp': '2025-10-02 00:18:43.548558', 'step': 4407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:43.605746', 'step': 4407, 'epoch': 1}
{'type': 'loss', 'content': 0.06657518446445465, 'timestamp': '2025-10-02 00:18:43.612281', 'step': 4408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:43.669515', 'step': 4408, 'epoch': 1}
{'type': 'loss', 'content': 0.04880131036043167, 'timestamp': '2025-10-02 00:18:43.672849', 'step': 4409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:43.728900', 'step': 4409, 'epoch': 1}
{'type': 'loss', 'content': 0.12791310250759125, 'timestamp': '2025-10-02 00:18:43.733270', 'step': 4410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:43.795015', 'step': 4410, 'epoch': 1}
{'type': 'loss', 'content': 0.09128621220588684, 'timestamp': '2025-10-02 00:18:43.804822', 'step': 4411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:43.862207', 'step': 4411, 'epoch': 1}
{'type': 'loss', 'content': 0.08720620721578598, 'timestamp': '2025-10-02 00:18:43.868979', 'step': 4412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:43.923774', 'step': 4412, 'epoch': 1}
{'type': 'loss', 'content': 0.1824037730693817, 'timestamp': '2025-10-02 00:18:43.927243', 'step': 4413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:43.985280', 'step': 4413, 'epoch': 1}
{'type': 'loss', 'content': 0.07716401666402817, 'timestamp': '2025-10-02 00:18:43.995136', 'step': 4414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:44.058184', 'step': 4414, 'epoch': 1}
{'type': 'loss', 'content': 0.05072195455431938, 'timestamp': '2025-10-02 00:18:44.060760', 'step': 4415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:44.116261', 'step': 4415, 'epoch': 1}
{'type': 'loss', 'content': 0.10037321597337723, 'timestamp': '2025-10-02 00:18:44.123024', 'step': 4416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:18:44.190782', 'step': 4416, 'epoch': 1}
{'type': 'loss', 'content': 0.034750815480947495, 'timestamp': '2025-10-02 00:18:44.204034', 'step': 4417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:44.260734', 'step': 4417, 'epoch': 1}
{'type': 'loss', 'content': 0.058271996676921844, 'timestamp': '2025-10-02 00:18:44.263907', 'step': 4418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:44.320451', 'step': 4418, 'epoch': 1}
{'type': 'loss', 'content': 0.16542954742908478, 'timestamp': '2025-10-02 00:18:44.323499', 'step': 4419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:44.379641', 'step': 4419, 'epoch': 1}
{'type': 'loss', 'content': 0.050414782017469406, 'timestamp': '2025-10-02 00:18:44.387933', 'step': 4420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:18:44.463039', 'step': 4420, 'epoch': 1}
{'type': 'loss', 'content': 0.0433398112654686, 'timestamp': '2025-10-02 00:18:44.477632', 'step': 4421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 11200068058304.0}, 'timestamp': '2025-10-02 00:18:44.562474', 'step': 4421, 'epoch': 1}
{'type': 'loss', 'content': 0.01029943861067295, 'timestamp': '2025-10-02 00:18:44.577876', 'step': 4422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:44.636911', 'step': 4422, 'epoch': 1}
{'type': 'loss', 'content': 0.07786157727241516, 'timestamp': '2025-10-02 00:18:44.640580', 'step': 4423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:44.695910', 'step': 4423, 'epoch': 1}
{'type': 'loss', 'content': 0.19218809902668, 'timestamp': '2025-10-02 00:18:44.703093', 'step': 4424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:44.761139', 'step': 4424, 'epoch': 1}
{'type': 'loss', 'content': 0.10064983367919922, 'timestamp': '2025-10-02 00:18:44.763678', 'step': 4425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:44.817778', 'step': 4425, 'epoch': 1}
{'type': 'loss', 'content': 0.04296348989009857, 'timestamp': '2025-10-02 00:18:44.827372', 'step': 4426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:18:44.888886', 'step': 4426, 'epoch': 1}
{'type': 'loss', 'content': 0.04598928987979889, 'timestamp': '2025-10-02 00:18:44.899775', 'step': 4427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:44.959186', 'step': 4427, 'epoch': 1}
{'type': 'loss', 'content': 0.01879151165485382, 'timestamp': '2025-10-02 00:18:44.970395', 'step': 4428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:45.023898', 'step': 4428, 'epoch': 1}
{'type': 'loss', 'content': 0.03260544314980507, 'timestamp': '2025-10-02 00:18:45.029689', 'step': 4429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:45.086359', 'step': 4429, 'epoch': 1}
{'type': 'loss', 'content': 0.07614241540431976, 'timestamp': '2025-10-02 00:18:45.094017', 'step': 4430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:45.148007', 'step': 4430, 'epoch': 1}
{'type': 'loss', 'content': 0.08940336853265762, 'timestamp': '2025-10-02 00:18:45.150708', 'step': 4431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:45.205029', 'step': 4431, 'epoch': 1}
{'type': 'loss', 'content': 0.10596206784248352, 'timestamp': '2025-10-02 00:18:45.211350', 'step': 4432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:45.263905', 'step': 4432, 'epoch': 1}
{'type': 'loss', 'content': 0.12092115730047226, 'timestamp': '2025-10-02 00:18:45.266351', 'step': 4433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:45.320085', 'step': 4433, 'epoch': 1}
{'type': 'loss', 'content': 0.18316839635372162, 'timestamp': '2025-10-02 00:18:45.322710', 'step': 4434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:45.377428', 'step': 4434, 'epoch': 1}
{'type': 'loss', 'content': 0.05431978404521942, 'timestamp': '2025-10-02 00:18:45.379715', 'step': 4435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:45.433426', 'step': 4435, 'epoch': 1}
{'type': 'loss', 'content': 0.061310216784477234, 'timestamp': '2025-10-02 00:18:45.440104', 'step': 4436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:45.493003', 'step': 4436, 'epoch': 1}
{'type': 'loss', 'content': 0.10320492833852768, 'timestamp': '2025-10-02 00:18:45.495611', 'step': 4437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:45.549601', 'step': 4437, 'epoch': 1}
{'type': 'loss', 'content': 0.03802688792347908, 'timestamp': '2025-10-02 00:18:45.555614', 'step': 4438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:45.609425', 'step': 4438, 'epoch': 1}
{'type': 'loss', 'content': 0.16093231737613678, 'timestamp': '2025-10-02 00:18:45.612262', 'step': 4439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:45.669734', 'step': 4439, 'epoch': 1}
{'type': 'loss', 'content': 0.06665380299091339, 'timestamp': '2025-10-02 00:18:45.678807', 'step': 4440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:45.733164', 'step': 4440, 'epoch': 1}
{'type': 'loss', 'content': 0.04107388108968735, 'timestamp': '2025-10-02 00:18:45.740921', 'step': 4441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:45.795933', 'step': 4441, 'epoch': 1}
{'type': 'loss', 'content': 0.030669301748275757, 'timestamp': '2025-10-02 00:18:45.805543', 'step': 4442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:45.859937', 'step': 4442, 'epoch': 1}
{'type': 'loss', 'content': 0.05074351653456688, 'timestamp': '2025-10-02 00:18:45.862236', 'step': 4443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:45.915608', 'step': 4443, 'epoch': 1}
{'type': 'loss', 'content': 0.1911865919828415, 'timestamp': '2025-10-02 00:18:45.921889', 'step': 4444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:45.975382', 'step': 4444, 'epoch': 1}
{'type': 'loss', 'content': 0.0729786679148674, 'timestamp': '2025-10-02 00:18:45.977831', 'step': 4445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:46.033615', 'step': 4445, 'epoch': 1}
{'type': 'loss', 'content': 0.035224512219429016, 'timestamp': '2025-10-02 00:18:46.043392', 'step': 4446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:46.096932', 'step': 4446, 'epoch': 1}
{'type': 'loss', 'content': 0.18494655191898346, 'timestamp': '2025-10-02 00:18:46.099400', 'step': 4447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:46.153170', 'step': 4447, 'epoch': 1}
{'type': 'loss', 'content': 0.08207479864358902, 'timestamp': '2025-10-02 00:18:46.158986', 'step': 4448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:46.211977', 'step': 4448, 'epoch': 1}
{'type': 'loss', 'content': 0.17006374895572662, 'timestamp': '2025-10-02 00:18:46.214390', 'step': 4449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:46.268252', 'step': 4449, 'epoch': 1}
{'type': 'loss', 'content': 0.1608896255493164, 'timestamp': '2025-10-02 00:18:46.270729', 'step': 4450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:46.324599', 'step': 4450, 'epoch': 1}
{'type': 'loss', 'content': 0.07933976501226425, 'timestamp': '2025-10-02 00:18:46.332077', 'step': 4451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:46.386792', 'step': 4451, 'epoch': 1}
{'type': 'loss', 'content': 0.17713233828544617, 'timestamp': '2025-10-02 00:18:46.392927', 'step': 4452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:46.446760', 'step': 4452, 'epoch': 1}
{'type': 'loss', 'content': 0.1547539234161377, 'timestamp': '2025-10-02 00:18:46.456317', 'step': 4453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:46.509764', 'step': 4453, 'epoch': 1}
{'type': 'loss', 'content': 0.1615525782108307, 'timestamp': '2025-10-02 00:18:46.512414', 'step': 4454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:46.566610', 'step': 4454, 'epoch': 1}
{'type': 'loss', 'content': 0.0861496552824974, 'timestamp': '2025-10-02 00:18:46.568933', 'step': 4455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:46.622867', 'step': 4455, 'epoch': 1}
{'type': 'loss', 'content': 0.07945634424686432, 'timestamp': '2025-10-02 00:18:46.628764', 'step': 4456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:46.682242', 'step': 4456, 'epoch': 1}
{'type': 'loss', 'content': 0.12785956263542175, 'timestamp': '2025-10-02 00:18:46.684568', 'step': 4457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:46.743412', 'step': 4457, 'epoch': 1}
{'type': 'loss', 'content': 0.019380904734134674, 'timestamp': '2025-10-02 00:18:46.753852', 'step': 4458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:46.809321', 'step': 4458, 'epoch': 1}
{'type': 'loss', 'content': 0.039719149470329285, 'timestamp': '2025-10-02 00:18:46.818882', 'step': 4459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:46.874411', 'step': 4459, 'epoch': 1}
{'type': 'loss', 'content': 0.041243746876716614, 'timestamp': '2025-10-02 00:18:46.880409', 'step': 4460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:46.934191', 'step': 4460, 'epoch': 1}
{'type': 'loss', 'content': 0.06753148138523102, 'timestamp': '2025-10-02 00:18:46.939446', 'step': 4461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:46.998725', 'step': 4461, 'epoch': 1}
{'type': 'loss', 'content': 0.033764827996492386, 'timestamp': '2025-10-02 00:18:47.009196', 'step': 4462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:47.064542', 'step': 4462, 'epoch': 1}
{'type': 'loss', 'content': 0.025451377034187317, 'timestamp': '2025-10-02 00:18:47.074338', 'step': 4463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:47.128167', 'step': 4463, 'epoch': 1}
{'type': 'loss', 'content': 0.16960838437080383, 'timestamp': '2025-10-02 00:18:47.134440', 'step': 4464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:47.188394', 'step': 4464, 'epoch': 1}
{'type': 'loss', 'content': 0.049592580646276474, 'timestamp': '2025-10-02 00:18:47.198201', 'step': 4465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:47.252543', 'step': 4465, 'epoch': 1}
{'type': 'loss', 'content': 0.0779934749007225, 'timestamp': '2025-10-02 00:18:47.262140', 'step': 4466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:47.316548', 'step': 4466, 'epoch': 1}
{'type': 'loss', 'content': 0.011274600401520729, 'timestamp': '2025-10-02 00:18:47.322741', 'step': 4467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:47.376610', 'step': 4467, 'epoch': 1}
{'type': 'loss', 'content': 0.12064595520496368, 'timestamp': '2025-10-02 00:18:47.383326', 'step': 4468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:47.437011', 'step': 4468, 'epoch': 1}
{'type': 'loss', 'content': 0.03540954366326332, 'timestamp': '2025-10-02 00:18:47.439380', 'step': 4469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:47.493656', 'step': 4469, 'epoch': 1}
{'type': 'loss', 'content': 0.21922719478607178, 'timestamp': '2025-10-02 00:18:47.496230', 'step': 4470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:18:47.557957', 'step': 4470, 'epoch': 1}
{'type': 'loss', 'content': 0.039374709129333496, 'timestamp': '2025-10-02 00:18:47.568854', 'step': 4471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:47.623426', 'step': 4471, 'epoch': 1}
{'type': 'loss', 'content': 0.0594831146299839, 'timestamp': '2025-10-02 00:18:47.631750', 'step': 4472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:47.686590', 'step': 4472, 'epoch': 1}
{'type': 'loss', 'content': 0.1194266751408577, 'timestamp': '2025-10-02 00:18:47.689548', 'step': 4473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:47.743584', 'step': 4473, 'epoch': 1}
{'type': 'loss', 'content': 0.22588689625263214, 'timestamp': '2025-10-02 00:18:47.746394', 'step': 4474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:47.801094', 'step': 4474, 'epoch': 1}
{'type': 'loss', 'content': 0.08090421557426453, 'timestamp': '2025-10-02 00:18:47.807012', 'step': 4475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:18:47.883829', 'step': 4475, 'epoch': 1}
{'type': 'loss', 'content': 0.028229476884007454, 'timestamp': '2025-10-02 00:18:47.898657', 'step': 4476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:47.952078', 'step': 4476, 'epoch': 1}
{'type': 'loss', 'content': 0.07977932691574097, 'timestamp': '2025-10-02 00:18:47.954611', 'step': 4477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:48.008256', 'step': 4477, 'epoch': 1}
{'type': 'loss', 'content': 0.18546997010707855, 'timestamp': '2025-10-02 00:18:48.010692', 'step': 4478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:48.064496', 'step': 4478, 'epoch': 1}
{'type': 'loss', 'content': 0.05026395246386528, 'timestamp': '2025-10-02 00:18:48.072045', 'step': 4479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:48.126794', 'step': 4479, 'epoch': 1}
{'type': 'loss', 'content': 0.2470138818025589, 'timestamp': '2025-10-02 00:18:48.132752', 'step': 4480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:18:48.198323', 'step': 4480, 'epoch': 1}
{'type': 'loss', 'content': 0.08576551079750061, 'timestamp': '2025-10-02 00:18:48.211579', 'step': 4481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:48.266217', 'step': 4481, 'epoch': 1}
{'type': 'loss', 'content': 0.06925296783447266, 'timestamp': '2025-10-02 00:18:48.275981', 'step': 4482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:48.330642', 'step': 4482, 'epoch': 1}
{'type': 'loss', 'content': 0.02516770362854004, 'timestamp': '2025-10-02 00:18:48.332963', 'step': 4483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:48.386560', 'step': 4483, 'epoch': 1}
{'type': 'loss', 'content': 0.045166898518800735, 'timestamp': '2025-10-02 00:18:48.392615', 'step': 4484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:48.446580', 'step': 4484, 'epoch': 1}
{'type': 'loss', 'content': 0.09730024635791779, 'timestamp': '2025-10-02 00:18:48.449158', 'step': 4485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:48.502589', 'step': 4485, 'epoch': 1}
{'type': 'loss', 'content': 0.2299099713563919, 'timestamp': '2025-10-02 00:18:48.504924', 'step': 4486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:48.559424', 'step': 4486, 'epoch': 1}
{'type': 'loss', 'content': 0.12642978131771088, 'timestamp': '2025-10-02 00:18:48.561832', 'step': 4487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:48.616602', 'step': 4487, 'epoch': 1}
{'type': 'loss', 'content': 0.05724699795246124, 'timestamp': '2025-10-02 00:18:48.627176', 'step': 4488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:48.680621', 'step': 4488, 'epoch': 1}
{'type': 'loss', 'content': 0.16117650270462036, 'timestamp': '2025-10-02 00:18:48.683127', 'step': 4489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:48.738948', 'step': 4489, 'epoch': 1}
{'type': 'loss', 'content': 0.027976926416158676, 'timestamp': '2025-10-02 00:18:48.748754', 'step': 4490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:48.803661', 'step': 4490, 'epoch': 1}
{'type': 'loss', 'content': 0.17146813869476318, 'timestamp': '2025-10-02 00:18:48.806121', 'step': 4491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:48.860013', 'step': 4491, 'epoch': 1}
{'type': 'loss', 'content': 0.039719827473163605, 'timestamp': '2025-10-02 00:18:48.868445', 'step': 4492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:48.921679', 'step': 4492, 'epoch': 1}
{'type': 'loss', 'content': 0.1514510214328766, 'timestamp': '2025-10-02 00:18:48.924242', 'step': 4493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:48.978133', 'step': 4493, 'epoch': 1}
{'type': 'loss', 'content': 0.08664350211620331, 'timestamp': '2025-10-02 00:18:48.980474', 'step': 4494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:49.038556', 'step': 4494, 'epoch': 1}
{'type': 'loss', 'content': 0.1403920203447342, 'timestamp': '2025-10-02 00:18:49.048966', 'step': 4495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:49.104910', 'step': 4495, 'epoch': 1}
{'type': 'loss', 'content': 0.0679759755730629, 'timestamp': '2025-10-02 00:18:49.111225', 'step': 4496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:49.164396', 'step': 4496, 'epoch': 1}
{'type': 'loss', 'content': 0.17003758251667023, 'timestamp': '2025-10-02 00:18:49.166879', 'step': 4497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:49.220671', 'step': 4497, 'epoch': 1}
{'type': 'loss', 'content': 0.042990706861019135, 'timestamp': '2025-10-02 00:18:49.226664', 'step': 4498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:49.281120', 'step': 4498, 'epoch': 1}
{'type': 'loss', 'content': 0.18889549374580383, 'timestamp': '2025-10-02 00:18:49.286977', 'step': 4499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:49.340687', 'step': 4499, 'epoch': 1}
{'type': 'loss', 'content': 0.06211184710264206, 'timestamp': '2025-10-02 00:18:49.346617', 'step': 4500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-10-02 00:18:49.762372', 'step': 4500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:49.818713', 'step': 4500, 'epoch': 1}
{'type': 'loss', 'content': 0.11062737554311752, 'timestamp': '2025-10-02 00:18:49.821012', 'step': 4501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:49.876582', 'step': 4501, 'epoch': 1}
{'type': 'loss', 'content': 0.14644622802734375, 'timestamp': '2025-10-02 00:18:49.883451', 'step': 4502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:49.937731', 'step': 4502, 'epoch': 1}
{'type': 'loss', 'content': 0.18313200771808624, 'timestamp': '2025-10-02 00:18:49.940019', 'step': 4503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:49.994443', 'step': 4503, 'epoch': 1}
{'type': 'loss', 'content': 0.0674266517162323, 'timestamp': '2025-10-02 00:18:50.000663', 'step': 4504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:50.054918', 'step': 4504, 'epoch': 1}
{'type': 'loss', 'content': 0.04881473258137703, 'timestamp': '2025-10-02 00:18:50.062241', 'step': 4505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:50.116142', 'step': 4505, 'epoch': 1}
{'type': 'loss', 'content': 0.21092067658901215, 'timestamp': '2025-10-02 00:18:50.118595', 'step': 4506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:50.173855', 'step': 4506, 'epoch': 1}
{'type': 'loss', 'content': 0.06445129960775375, 'timestamp': '2025-10-02 00:18:50.179673', 'step': 4507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:50.234456', 'step': 4507, 'epoch': 1}
{'type': 'loss', 'content': 0.03776252642273903, 'timestamp': '2025-10-02 00:18:50.240485', 'step': 4508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:50.293478', 'step': 4508, 'epoch': 1}
{'type': 'loss', 'content': 0.03257041424512863, 'timestamp': '2025-10-02 00:18:50.299385', 'step': 4509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:50.353203', 'step': 4509, 'epoch': 1}
{'type': 'loss', 'content': 0.10462608933448792, 'timestamp': '2025-10-02 00:18:50.356063', 'step': 4510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:18:50.417260', 'step': 4510, 'epoch': 1}
{'type': 'loss', 'content': 0.05747847631573677, 'timestamp': '2025-10-02 00:18:50.428156', 'step': 4511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:50.482017', 'step': 4511, 'epoch': 1}
{'type': 'loss', 'content': 0.15550804138183594, 'timestamp': '2025-10-02 00:18:50.488066', 'step': 4512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:50.540589', 'step': 4512, 'epoch': 1}
{'type': 'loss', 'content': 0.19581419229507446, 'timestamp': '2025-10-02 00:18:50.543106', 'step': 4513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:50.597458', 'step': 4513, 'epoch': 1}
{'type': 'loss', 'content': 0.042225636541843414, 'timestamp': '2025-10-02 00:18:50.604931', 'step': 4514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:50.658658', 'step': 4514, 'epoch': 1}
{'type': 'loss', 'content': 0.12325592339038849, 'timestamp': '2025-10-02 00:18:50.660919', 'step': 4515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:50.714583', 'step': 4515, 'epoch': 1}
{'type': 'loss', 'content': 0.019722824916243553, 'timestamp': '2025-10-02 00:18:50.720435', 'step': 4516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:50.773918', 'step': 4516, 'epoch': 1}
{'type': 'loss', 'content': 0.08015820384025574, 'timestamp': '2025-10-02 00:18:50.783319', 'step': 4517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:50.837052', 'step': 4517, 'epoch': 1}
{'type': 'loss', 'content': 0.10139783471822739, 'timestamp': '2025-10-02 00:18:50.839526', 'step': 4518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:50.893072', 'step': 4518, 'epoch': 1}
{'type': 'loss', 'content': 0.15256671607494354, 'timestamp': '2025-10-02 00:18:50.895409', 'step': 4519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:50.950009', 'step': 4519, 'epoch': 1}
{'type': 'loss', 'content': 0.012017824687063694, 'timestamp': '2025-10-02 00:18:50.956117', 'step': 4520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:51.010646', 'step': 4520, 'epoch': 1}
{'type': 'loss', 'content': 0.09767688065767288, 'timestamp': '2025-10-02 00:18:51.015231', 'step': 4521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:51.070164', 'step': 4521, 'epoch': 1}
{'type': 'loss', 'content': 0.03051276132464409, 'timestamp': '2025-10-02 00:18:51.072710', 'step': 4522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:51.127824', 'step': 4522, 'epoch': 1}
{'type': 'loss', 'content': 0.24527300894260406, 'timestamp': '2025-10-02 00:18:51.130238', 'step': 4523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:51.189219', 'step': 4523, 'epoch': 1}
{'type': 'loss', 'content': 0.1776060312986374, 'timestamp': '2025-10-02 00:18:51.201205', 'step': 4524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:51.255802', 'step': 4524, 'epoch': 1}
{'type': 'loss', 'content': 0.13946549594402313, 'timestamp': '2025-10-02 00:18:51.258309', 'step': 4525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:51.312263', 'step': 4525, 'epoch': 1}
{'type': 'loss', 'content': 0.05125734210014343, 'timestamp': '2025-10-02 00:18:51.319690', 'step': 4526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:51.375561', 'step': 4526, 'epoch': 1}
{'type': 'loss', 'content': 0.08556336909532547, 'timestamp': '2025-10-02 00:18:51.377817', 'step': 4527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:51.439231', 'step': 4527, 'epoch': 1}
{'type': 'loss', 'content': 0.05962902680039406, 'timestamp': '2025-10-02 00:18:51.450837', 'step': 4528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:51.510375', 'step': 4528, 'epoch': 1}
{'type': 'loss', 'content': 0.04805682972073555, 'timestamp': '2025-10-02 00:18:51.521849', 'step': 4529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:51.576463', 'step': 4529, 'epoch': 1}
{'type': 'loss', 'content': 0.12015330046415329, 'timestamp': '2025-10-02 00:18:51.578892', 'step': 4530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:51.635324', 'step': 4530, 'epoch': 1}
{'type': 'loss', 'content': 0.11084706336259842, 'timestamp': '2025-10-02 00:18:51.637345', 'step': 4531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:51.692190', 'step': 4531, 'epoch': 1}
{'type': 'loss', 'content': 0.0747847780585289, 'timestamp': '2025-10-02 00:18:51.701931', 'step': 4532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:51.757189', 'step': 4532, 'epoch': 1}
{'type': 'loss', 'content': 0.04643787816166878, 'timestamp': '2025-10-02 00:18:51.763877', 'step': 4533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:51.818113', 'step': 4533, 'epoch': 1}
{'type': 'loss', 'content': 0.11785174906253815, 'timestamp': '2025-10-02 00:18:51.820891', 'step': 4534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:51.874735', 'step': 4534, 'epoch': 1}
{'type': 'loss', 'content': 0.09584561735391617, 'timestamp': '2025-10-02 00:18:51.877285', 'step': 4535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:51.930738', 'step': 4535, 'epoch': 1}
{'type': 'loss', 'content': 0.1556379646062851, 'timestamp': '2025-10-02 00:18:51.937553', 'step': 4536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:51.994796', 'step': 4536, 'epoch': 1}
{'type': 'loss', 'content': 0.046879008412361145, 'timestamp': '2025-10-02 00:18:52.006108', 'step': 4537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:52.061365', 'step': 4537, 'epoch': 1}
{'type': 'loss', 'content': 0.1107635498046875, 'timestamp': '2025-10-02 00:18:52.066794', 'step': 4538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:52.129159', 'step': 4538, 'epoch': 1}
{'type': 'loss', 'content': 0.03201912343502045, 'timestamp': '2025-10-02 00:18:52.135084', 'step': 4539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:52.192718', 'step': 4539, 'epoch': 1}
{'type': 'loss', 'content': 0.04361944645643234, 'timestamp': '2025-10-02 00:18:52.200793', 'step': 4540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:52.254067', 'step': 4540, 'epoch': 1}
{'type': 'loss', 'content': 0.06625469774007797, 'timestamp': '2025-10-02 00:18:52.261111', 'step': 4541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:52.323265', 'step': 4541, 'epoch': 1}
{'type': 'loss', 'content': 0.05931715667247772, 'timestamp': '2025-10-02 00:18:52.333695', 'step': 4542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:52.392534', 'step': 4542, 'epoch': 1}
{'type': 'loss', 'content': 0.03807518631219864, 'timestamp': '2025-10-02 00:18:52.398398', 'step': 4543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:52.453467', 'step': 4543, 'epoch': 1}
{'type': 'loss', 'content': 0.12070664018392563, 'timestamp': '2025-10-02 00:18:52.460438', 'step': 4544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:52.515063', 'step': 4544, 'epoch': 1}
{'type': 'loss', 'content': 0.08822640031576157, 'timestamp': '2025-10-02 00:18:52.517236', 'step': 4545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:52.570623', 'step': 4545, 'epoch': 1}
{'type': 'loss', 'content': 0.20348264276981354, 'timestamp': '2025-10-02 00:18:52.572666', 'step': 4546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:52.630757', 'step': 4546, 'epoch': 1}
{'type': 'loss', 'content': 0.021626116707921028, 'timestamp': '2025-10-02 00:18:52.641184', 'step': 4547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:52.694711', 'step': 4547, 'epoch': 1}
{'type': 'loss', 'content': 0.059662070125341415, 'timestamp': '2025-10-02 00:18:52.700538', 'step': 4548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:52.752891', 'step': 4548, 'epoch': 1}
{'type': 'loss', 'content': 0.1868388056755066, 'timestamp': '2025-10-02 00:18:52.755331', 'step': 4549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:52.809537', 'step': 4549, 'epoch': 1}
{'type': 'loss', 'content': 0.06355207413434982, 'timestamp': '2025-10-02 00:18:52.818840', 'step': 4550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:52.873837', 'step': 4550, 'epoch': 1}
{'type': 'loss', 'content': 0.06086822971701622, 'timestamp': '2025-10-02 00:18:52.879490', 'step': 4551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:52.933502', 'step': 4551, 'epoch': 1}
{'type': 'loss', 'content': 0.02471424825489521, 'timestamp': '2025-10-02 00:18:52.940541', 'step': 4552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:18:53.013161', 'step': 4552, 'epoch': 1}
{'type': 'loss', 'content': 0.0062083289958536625, 'timestamp': '2025-10-02 00:18:53.028050', 'step': 4553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:53.081393', 'step': 4553, 'epoch': 1}
{'type': 'loss', 'content': 0.0563485361635685, 'timestamp': '2025-10-02 00:18:53.084336', 'step': 4554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:53.146884', 'step': 4554, 'epoch': 1}
{'type': 'loss', 'content': 0.10168934613466263, 'timestamp': '2025-10-02 00:18:53.156266', 'step': 4555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:53.211574', 'step': 4555, 'epoch': 1}
{'type': 'loss', 'content': 0.17190666496753693, 'timestamp': '2025-10-02 00:18:53.218270', 'step': 4556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:53.273045', 'step': 4556, 'epoch': 1}
{'type': 'loss', 'content': 0.08011692017316818, 'timestamp': '2025-10-02 00:18:53.275677', 'step': 4557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:53.331236', 'step': 4557, 'epoch': 1}
{'type': 'loss', 'content': 0.022597046568989754, 'timestamp': '2025-10-02 00:18:53.338989', 'step': 4558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:18:53.414759', 'step': 4558, 'epoch': 1}
{'type': 'loss', 'content': 0.04172307625412941, 'timestamp': '2025-10-02 00:18:53.425887', 'step': 4559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:53.482306', 'step': 4559, 'epoch': 1}
{'type': 'loss', 'content': 0.02968783490359783, 'timestamp': '2025-10-02 00:18:53.489239', 'step': 4560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:53.544307', 'step': 4560, 'epoch': 1}
{'type': 'loss', 'content': 0.06275754421949387, 'timestamp': '2025-10-02 00:18:53.550250', 'step': 4561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:53.607550', 'step': 4561, 'epoch': 1}
{'type': 'loss', 'content': 0.15583905577659607, 'timestamp': '2025-10-02 00:18:53.617041', 'step': 4562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:53.692586', 'step': 4562, 'epoch': 1}
{'type': 'loss', 'content': 0.15082095563411713, 'timestamp': '2025-10-02 00:18:53.703301', 'step': 4563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:53.759056', 'step': 4563, 'epoch': 1}
{'type': 'loss', 'content': 0.18294718861579895, 'timestamp': '2025-10-02 00:18:53.765444', 'step': 4564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:53.820250', 'step': 4564, 'epoch': 1}
{'type': 'loss', 'content': 0.1710858941078186, 'timestamp': '2025-10-02 00:18:53.822482', 'step': 4565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:53.877885', 'step': 4565, 'epoch': 1}
{'type': 'loss', 'content': 0.0843384638428688, 'timestamp': '2025-10-02 00:18:53.887651', 'step': 4566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:53.943126', 'step': 4566, 'epoch': 1}
{'type': 'loss', 'content': 0.21486550569534302, 'timestamp': '2025-10-02 00:18:53.946262', 'step': 4567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:54.000703', 'step': 4567, 'epoch': 1}
{'type': 'loss', 'content': 0.1390884965658188, 'timestamp': '2025-10-02 00:18:54.007171', 'step': 4568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:54.062392', 'step': 4568, 'epoch': 1}
{'type': 'loss', 'content': 0.11478804796934128, 'timestamp': '2025-10-02 00:18:54.064589', 'step': 4569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:54.121259', 'step': 4569, 'epoch': 1}
{'type': 'loss', 'content': 0.08979571610689163, 'timestamp': '2025-10-02 00:18:54.124111', 'step': 4570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:54.179987', 'step': 4570, 'epoch': 1}
{'type': 'loss', 'content': 0.13719551265239716, 'timestamp': '2025-10-02 00:18:54.182848', 'step': 4571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:54.237253', 'step': 4571, 'epoch': 1}
{'type': 'loss', 'content': 0.08375218510627747, 'timestamp': '2025-10-02 00:18:54.243067', 'step': 4572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:54.297772', 'step': 4572, 'epoch': 1}
{'type': 'loss', 'content': 0.12174408882856369, 'timestamp': '2025-10-02 00:18:54.300936', 'step': 4573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:18:54.357239', 'step': 4573, 'epoch': 1}
{'type': 'loss', 'content': 0.20621199905872345, 'timestamp': '2025-10-02 00:18:54.360375', 'step': 4574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:54.415235', 'step': 4574, 'epoch': 1}
{'type': 'loss', 'content': 0.18612223863601685, 'timestamp': '2025-10-02 00:18:54.418267', 'step': 4575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:54.473773', 'step': 4575, 'epoch': 1}
{'type': 'loss', 'content': 0.2577337324619293, 'timestamp': '2025-10-02 00:18:54.479795', 'step': 4576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:54.536398', 'step': 4576, 'epoch': 1}
{'type': 'loss', 'content': 0.140818253159523, 'timestamp': '2025-10-02 00:18:54.539972', 'step': 4577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:54.595628', 'step': 4577, 'epoch': 1}
{'type': 'loss', 'content': 0.061845485121011734, 'timestamp': '2025-10-02 00:18:54.598012', 'step': 4578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:54.653724', 'step': 4578, 'epoch': 1}
{'type': 'loss', 'content': 0.08695391565561295, 'timestamp': '2025-10-02 00:18:54.656693', 'step': 4579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:54.710858', 'step': 4579, 'epoch': 1}
{'type': 'loss', 'content': 0.10876542329788208, 'timestamp': '2025-10-02 00:18:54.717237', 'step': 4580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:54.771034', 'step': 4580, 'epoch': 1}
{'type': 'loss', 'content': 0.08538702875375748, 'timestamp': '2025-10-02 00:18:54.774221', 'step': 4581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:54.828701', 'step': 4581, 'epoch': 1}
{'type': 'loss', 'content': 0.18051376938819885, 'timestamp': '2025-10-02 00:18:54.831397', 'step': 4582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:54.887124', 'step': 4582, 'epoch': 1}
{'type': 'loss', 'content': 0.21485787630081177, 'timestamp': '2025-10-02 00:18:54.889676', 'step': 4583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:54.946071', 'step': 4583, 'epoch': 1}
{'type': 'loss', 'content': 0.07725629210472107, 'timestamp': '2025-10-02 00:18:54.952408', 'step': 4584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:55.007671', 'step': 4584, 'epoch': 1}
{'type': 'loss', 'content': 0.01794786937534809, 'timestamp': '2025-10-02 00:18:55.018134', 'step': 4585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:55.072965', 'step': 4585, 'epoch': 1}
{'type': 'loss', 'content': 0.06389376521110535, 'timestamp': '2025-10-02 00:18:55.075520', 'step': 4586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:55.129353', 'step': 4586, 'epoch': 1}
{'type': 'loss', 'content': 0.23707979917526245, 'timestamp': '2025-10-02 00:18:55.131669', 'step': 4587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:55.186976', 'step': 4587, 'epoch': 1}
{'type': 'loss', 'content': 0.07673176378011703, 'timestamp': '2025-10-02 00:18:55.192897', 'step': 4588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:55.246623', 'step': 4588, 'epoch': 1}
{'type': 'loss', 'content': 0.03196680545806885, 'timestamp': '2025-10-02 00:18:55.256311', 'step': 4589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:55.313697', 'step': 4589, 'epoch': 1}
{'type': 'loss', 'content': 0.07593255490064621, 'timestamp': '2025-10-02 00:18:55.319816', 'step': 4590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:55.375851', 'step': 4590, 'epoch': 1}
{'type': 'loss', 'content': 0.029406750574707985, 'timestamp': '2025-10-02 00:18:55.377934', 'step': 4591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:55.432750', 'step': 4591, 'epoch': 1}
{'type': 'loss', 'content': 0.17955276370048523, 'timestamp': '2025-10-02 00:18:55.438814', 'step': 4592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:55.493254', 'step': 4592, 'epoch': 1}
{'type': 'loss', 'content': 0.11160417646169662, 'timestamp': '2025-10-02 00:18:55.495744', 'step': 4593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:55.550827', 'step': 4593, 'epoch': 1}
{'type': 'loss', 'content': 0.03486643731594086, 'timestamp': '2025-10-02 00:18:55.558741', 'step': 4594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:55.613069', 'step': 4594, 'epoch': 1}
{'type': 'loss', 'content': 0.21029473841190338, 'timestamp': '2025-10-02 00:18:55.616089', 'step': 4595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:55.675613', 'step': 4595, 'epoch': 1}
{'type': 'loss', 'content': 0.04886947199702263, 'timestamp': '2025-10-02 00:18:55.686844', 'step': 4596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:55.740996', 'step': 4596, 'epoch': 1}
{'type': 'loss', 'content': 0.09375996887683868, 'timestamp': '2025-10-02 00:18:55.750776', 'step': 4597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:55.804983', 'step': 4597, 'epoch': 1}
{'type': 'loss', 'content': 0.05284913256764412, 'timestamp': '2025-10-02 00:18:55.807796', 'step': 4598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:55.862345', 'step': 4598, 'epoch': 1}
{'type': 'loss', 'content': 0.04321358725428581, 'timestamp': '2025-10-02 00:18:55.868472', 'step': 4599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:55.921896', 'step': 4599, 'epoch': 1}
{'type': 'loss', 'content': 0.17787407338619232, 'timestamp': '2025-10-02 00:18:55.927789', 'step': 4600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:55.985614', 'step': 4600, 'epoch': 1}
{'type': 'loss', 'content': 0.04921593517065048, 'timestamp': '2025-10-02 00:18:55.996828', 'step': 4601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:18:56.058832', 'step': 4601, 'epoch': 1}
{'type': 'loss', 'content': 0.041047848761081696, 'timestamp': '2025-10-02 00:18:56.069553', 'step': 4602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:56.123351', 'step': 4602, 'epoch': 1}
{'type': 'loss', 'content': 0.20270490646362305, 'timestamp': '2025-10-02 00:18:56.125913', 'step': 4603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:56.180679', 'step': 4603, 'epoch': 1}
{'type': 'loss', 'content': 0.05057070776820183, 'timestamp': '2025-10-02 00:18:56.191274', 'step': 4604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:56.248174', 'step': 4604, 'epoch': 1}
{'type': 'loss', 'content': 0.03166976198554039, 'timestamp': '2025-10-02 00:18:56.259426', 'step': 4605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:18:56.320022', 'step': 4605, 'epoch': 1}
{'type': 'loss', 'content': 0.058794207870960236, 'timestamp': '2025-10-02 00:18:56.322520', 'step': 4606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:56.376340', 'step': 4606, 'epoch': 1}
{'type': 'loss', 'content': 0.06781909614801407, 'timestamp': '2025-10-02 00:18:56.378909', 'step': 4607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:56.432561', 'step': 4607, 'epoch': 1}
{'type': 'loss', 'content': 0.09383993595838547, 'timestamp': '2025-10-02 00:18:56.438312', 'step': 4608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:56.491889', 'step': 4608, 'epoch': 1}
{'type': 'loss', 'content': 0.08404219895601273, 'timestamp': '2025-10-02 00:18:56.499614', 'step': 4609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:56.571416', 'step': 4609, 'epoch': 1}
{'type': 'loss', 'content': 0.21565933525562286, 'timestamp': '2025-10-02 00:18:56.574275', 'step': 4610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:18:56.651681', 'step': 4610, 'epoch': 1}
{'type': 'loss', 'content': 0.04836205765604973, 'timestamp': '2025-10-02 00:18:56.665417', 'step': 4611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:56.721869', 'step': 4611, 'epoch': 1}
{'type': 'loss', 'content': 0.17788398265838623, 'timestamp': '2025-10-02 00:18:56.728415', 'step': 4612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:56.782638', 'step': 4612, 'epoch': 1}
{'type': 'loss', 'content': 0.04712126404047012, 'timestamp': '2025-10-02 00:18:56.787238', 'step': 4613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:56.840306', 'step': 4613, 'epoch': 1}
{'type': 'loss', 'content': 0.2693162262439728, 'timestamp': '2025-10-02 00:18:56.842585', 'step': 4614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:56.897430', 'step': 4614, 'epoch': 1}
{'type': 'loss', 'content': 0.06324605643749237, 'timestamp': '2025-10-02 00:18:56.902695', 'step': 4615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:56.967568', 'step': 4615, 'epoch': 1}
{'type': 'loss', 'content': 0.02456999570131302, 'timestamp': '2025-10-02 00:18:56.977976', 'step': 4616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:57.031186', 'step': 4616, 'epoch': 1}
{'type': 'loss', 'content': 0.15334054827690125, 'timestamp': '2025-10-02 00:18:57.034671', 'step': 4617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:57.088788', 'step': 4617, 'epoch': 1}
{'type': 'loss', 'content': 0.062467556446790695, 'timestamp': '2025-10-02 00:18:57.098399', 'step': 4618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:57.152611', 'step': 4618, 'epoch': 1}
{'type': 'loss', 'content': 0.07106520235538483, 'timestamp': '2025-10-02 00:18:57.156046', 'step': 4619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:18:57.217919', 'step': 4619, 'epoch': 1}
{'type': 'loss', 'content': 0.06557565927505493, 'timestamp': '2025-10-02 00:18:57.229808', 'step': 4620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:57.282719', 'step': 4620, 'epoch': 1}
{'type': 'loss', 'content': 0.157701313495636, 'timestamp': '2025-10-02 00:18:57.285087', 'step': 4621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:57.340775', 'step': 4621, 'epoch': 1}
{'type': 'loss', 'content': 0.19020506739616394, 'timestamp': '2025-10-02 00:18:57.348518', 'step': 4622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:18:57.407217', 'step': 4622, 'epoch': 1}
{'type': 'loss', 'content': 0.0722125917673111, 'timestamp': '2025-10-02 00:18:57.417648', 'step': 4623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:57.472524', 'step': 4623, 'epoch': 1}
{'type': 'loss', 'content': 0.07540647685527802, 'timestamp': '2025-10-02 00:18:57.478627', 'step': 4624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:57.532117', 'step': 4624, 'epoch': 1}
{'type': 'loss', 'content': 0.06752053648233414, 'timestamp': '2025-10-02 00:18:57.537907', 'step': 4625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:57.592372', 'step': 4625, 'epoch': 1}
{'type': 'loss', 'content': 0.06582693010568619, 'timestamp': '2025-10-02 00:18:57.595109', 'step': 4626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:57.650192', 'step': 4626, 'epoch': 1}
{'type': 'loss', 'content': 0.05965346843004227, 'timestamp': '2025-10-02 00:18:57.659955', 'step': 4627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:57.714502', 'step': 4627, 'epoch': 1}
{'type': 'loss', 'content': 0.12239629775285721, 'timestamp': '2025-10-02 00:18:57.720336', 'step': 4628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:18:57.776268', 'step': 4628, 'epoch': 1}
{'type': 'loss', 'content': 0.15619394183158875, 'timestamp': '2025-10-02 00:18:57.778502', 'step': 4629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:57.832412', 'step': 4629, 'epoch': 1}
{'type': 'loss', 'content': 0.07952498644590378, 'timestamp': '2025-10-02 00:18:57.834934', 'step': 4630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:57.888193', 'step': 4630, 'epoch': 1}
{'type': 'loss', 'content': 0.18402628600597382, 'timestamp': '2025-10-02 00:18:57.890621', 'step': 4631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:57.944672', 'step': 4631, 'epoch': 1}
{'type': 'loss', 'content': 0.053069595247507095, 'timestamp': '2025-10-02 00:18:57.950625', 'step': 4632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:18:58.004264', 'step': 4632, 'epoch': 1}
{'type': 'loss', 'content': 0.06888709217309952, 'timestamp': '2025-10-02 00:18:58.013673', 'step': 4633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:58.068361', 'step': 4633, 'epoch': 1}
{'type': 'loss', 'content': 0.10399413853883743, 'timestamp': '2025-10-02 00:18:58.070840', 'step': 4634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:58.124716', 'step': 4634, 'epoch': 1}
{'type': 'loss', 'content': 0.08315431326627731, 'timestamp': '2025-10-02 00:18:58.127294', 'step': 4635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:58.181370', 'step': 4635, 'epoch': 1}
{'type': 'loss', 'content': 0.17216110229492188, 'timestamp': '2025-10-02 00:18:58.187288', 'step': 4636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:18:58.248615', 'step': 4636, 'epoch': 1}
{'type': 'loss', 'content': 0.022596491500735283, 'timestamp': '2025-10-02 00:18:58.260593', 'step': 4637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:58.314622', 'step': 4637, 'epoch': 1}
{'type': 'loss', 'content': 0.1712021827697754, 'timestamp': '2025-10-02 00:18:58.317119', 'step': 4638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:58.371367', 'step': 4638, 'epoch': 1}
{'type': 'loss', 'content': 0.07174941897392273, 'timestamp': '2025-10-02 00:18:58.373838', 'step': 4639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:58.428111', 'step': 4639, 'epoch': 1}
{'type': 'loss', 'content': 0.0728951096534729, 'timestamp': '2025-10-02 00:18:58.434751', 'step': 4640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:18:58.495843', 'step': 4640, 'epoch': 1}
{'type': 'loss', 'content': 0.07517760246992111, 'timestamp': '2025-10-02 00:18:58.507812', 'step': 4641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:58.561653', 'step': 4641, 'epoch': 1}
{'type': 'loss', 'content': 0.09090396016836166, 'timestamp': '2025-10-02 00:18:58.563914', 'step': 4642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:58.618363', 'step': 4642, 'epoch': 1}
{'type': 'loss', 'content': 0.0869300588965416, 'timestamp': '2025-10-02 00:18:58.620988', 'step': 4643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:18:58.675656', 'step': 4643, 'epoch': 1}
{'type': 'loss', 'content': 0.1354724019765854, 'timestamp': '2025-10-02 00:18:58.681789', 'step': 4644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:58.735284', 'step': 4644, 'epoch': 1}
{'type': 'loss', 'content': 0.04455109313130379, 'timestamp': '2025-10-02 00:18:58.737531', 'step': 4645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:18:58.791283', 'step': 4645, 'epoch': 1}
{'type': 'loss', 'content': 0.14556650817394257, 'timestamp': '2025-10-02 00:18:58.793469', 'step': 4646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:18:58.848242', 'step': 4646, 'epoch': 1}
{'type': 'loss', 'content': 0.11336078494787216, 'timestamp': '2025-10-02 00:18:58.850573', 'step': 4647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:18:58.904023', 'step': 4647, 'epoch': 1}
{'type': 'loss', 'content': 0.05500822514295578, 'timestamp': '2025-10-02 00:18:58.912295', 'step': 4648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:58.965749', 'step': 4648, 'epoch': 1}
{'type': 'loss', 'content': 0.08099592477083206, 'timestamp': '2025-10-02 00:18:58.976157', 'step': 4649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:18:59.029753', 'step': 4649, 'epoch': 1}
{'type': 'loss', 'content': 0.21938185393810272, 'timestamp': '2025-10-02 00:18:59.032290', 'step': 4650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:59.087030', 'step': 4650, 'epoch': 1}
{'type': 'loss', 'content': 0.0666092187166214, 'timestamp': '2025-10-02 00:18:59.089521', 'step': 4651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:18:59.158552', 'step': 4651, 'epoch': 1}
{'type': 'loss', 'content': 0.03829590603709221, 'timestamp': '2025-10-02 00:18:59.171963', 'step': 4652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:59.225238', 'step': 4652, 'epoch': 1}
{'type': 'loss', 'content': 0.12443874776363373, 'timestamp': '2025-10-02 00:18:59.227474', 'step': 4653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:59.282783', 'step': 4653, 'epoch': 1}
{'type': 'loss', 'content': 0.033393315970897675, 'timestamp': '2025-10-02 00:18:59.292592', 'step': 4654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:59.346693', 'step': 4654, 'epoch': 1}
{'type': 'loss', 'content': 0.1119019091129303, 'timestamp': '2025-10-02 00:18:59.349159', 'step': 4655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:59.403718', 'step': 4655, 'epoch': 1}
{'type': 'loss', 'content': 0.02285425364971161, 'timestamp': '2025-10-02 00:18:59.410477', 'step': 4656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:59.463881', 'step': 4656, 'epoch': 1}
{'type': 'loss', 'content': 0.04465511068701744, 'timestamp': '2025-10-02 00:18:59.466431', 'step': 4657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:18:59.520135', 'step': 4657, 'epoch': 1}
{'type': 'loss', 'content': 0.15618085861206055, 'timestamp': '2025-10-02 00:18:59.522756', 'step': 4658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:59.576943', 'step': 4658, 'epoch': 1}
{'type': 'loss', 'content': 0.055383797734975815, 'timestamp': '2025-10-02 00:18:59.579469', 'step': 4659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:18:59.633270', 'step': 4659, 'epoch': 1}
{'type': 'loss', 'content': 0.05559994652867317, 'timestamp': '2025-10-02 00:18:59.639555', 'step': 4660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:18:59.693557', 'step': 4660, 'epoch': 1}
{'type': 'loss', 'content': 0.0717867836356163, 'timestamp': '2025-10-02 00:18:59.704014', 'step': 4661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:18:59.758091', 'step': 4661, 'epoch': 1}
{'type': 'loss', 'content': 0.050860121846199036, 'timestamp': '2025-10-02 00:18:59.760405', 'step': 4662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:59.814040', 'step': 4662, 'epoch': 1}
{'type': 'loss', 'content': 0.057351887226104736, 'timestamp': '2025-10-02 00:18:59.819848', 'step': 4663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:18:59.873913', 'step': 4663, 'epoch': 1}
{'type': 'loss', 'content': 0.12047947198152542, 'timestamp': '2025-10-02 00:18:59.879957', 'step': 4664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:18:59.934259', 'step': 4664, 'epoch': 1}
{'type': 'loss', 'content': 0.010641466826200485, 'timestamp': '2025-10-02 00:18:59.940146', 'step': 4665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:18:59.993636', 'step': 4665, 'epoch': 1}
{'type': 'loss', 'content': 0.1796911656856537, 'timestamp': '2025-10-02 00:18:59.996858', 'step': 4666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:00.050645', 'step': 4666, 'epoch': 1}
{'type': 'loss', 'content': 0.09488582611083984, 'timestamp': '2025-10-02 00:19:00.053054', 'step': 4667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:00.107495', 'step': 4667, 'epoch': 1}
{'type': 'loss', 'content': 0.07043391466140747, 'timestamp': '2025-10-02 00:19:00.114021', 'step': 4668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:00.168410', 'step': 4668, 'epoch': 1}
{'type': 'loss', 'content': 0.09450370818376541, 'timestamp': '2025-10-02 00:19:00.175893', 'step': 4669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:00.230848', 'step': 4669, 'epoch': 1}
{'type': 'loss', 'content': 0.18591395020484924, 'timestamp': '2025-10-02 00:19:00.232953', 'step': 4670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:00.288265', 'step': 4670, 'epoch': 1}
{'type': 'loss', 'content': 0.13292349874973297, 'timestamp': '2025-10-02 00:19:00.290846', 'step': 4671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:00.344368', 'step': 4671, 'epoch': 1}
{'type': 'loss', 'content': 0.1854952871799469, 'timestamp': '2025-10-02 00:19:00.350764', 'step': 4672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:00.404981', 'step': 4672, 'epoch': 1}
{'type': 'loss', 'content': 0.06750302016735077, 'timestamp': '2025-10-02 00:19:00.410722', 'step': 4673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:00.464656', 'step': 4673, 'epoch': 1}
{'type': 'loss', 'content': 0.13023614883422852, 'timestamp': '2025-10-02 00:19:00.470538', 'step': 4674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:00.525078', 'step': 4674, 'epoch': 1}
{'type': 'loss', 'content': 0.08963175863027573, 'timestamp': '2025-10-02 00:19:00.534349', 'step': 4675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:00.588306', 'step': 4675, 'epoch': 1}
{'type': 'loss', 'content': 0.03357269987463951, 'timestamp': '2025-10-02 00:19:00.596643', 'step': 4676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:00.650096', 'step': 4676, 'epoch': 1}
{'type': 'loss', 'content': 0.23228703439235687, 'timestamp': '2025-10-02 00:19:00.652241', 'step': 4677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:00.706428', 'step': 4677, 'epoch': 1}
{'type': 'loss', 'content': 0.02918311394751072, 'timestamp': '2025-10-02 00:19:00.715786', 'step': 4678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:00.770751', 'step': 4678, 'epoch': 1}
{'type': 'loss', 'content': 0.12100715935230255, 'timestamp': '2025-10-02 00:19:00.773214', 'step': 4679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:00.828578', 'step': 4679, 'epoch': 1}
{'type': 'loss', 'content': 0.10372743010520935, 'timestamp': '2025-10-02 00:19:00.835096', 'step': 4680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:00.888915', 'step': 4680, 'epoch': 1}
{'type': 'loss', 'content': 0.12003517150878906, 'timestamp': '2025-10-02 00:19:00.891524', 'step': 4681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:00.945508', 'step': 4681, 'epoch': 1}
{'type': 'loss', 'content': 0.18606290221214294, 'timestamp': '2025-10-02 00:19:00.947988', 'step': 4682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:01.001496', 'step': 4682, 'epoch': 1}
{'type': 'loss', 'content': 0.21389645338058472, 'timestamp': '2025-10-02 00:19:01.004588', 'step': 4683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:01.059521', 'step': 4683, 'epoch': 1}
{'type': 'loss', 'content': 0.10080459713935852, 'timestamp': '2025-10-02 00:19:01.065676', 'step': 4684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:01.122123', 'step': 4684, 'epoch': 1}
{'type': 'loss', 'content': 0.042729802429676056, 'timestamp': '2025-10-02 00:19:01.133343', 'step': 4685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:01.188607', 'step': 4685, 'epoch': 1}
{'type': 'loss', 'content': 0.046448759734630585, 'timestamp': '2025-10-02 00:19:01.196004', 'step': 4686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:01.249655', 'step': 4686, 'epoch': 1}
{'type': 'loss', 'content': 0.18152004480361938, 'timestamp': '2025-10-02 00:19:01.252024', 'step': 4687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:01.306429', 'step': 4687, 'epoch': 1}
{'type': 'loss', 'content': 0.0657985731959343, 'timestamp': '2025-10-02 00:19:01.314732', 'step': 4688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:01.369245', 'step': 4688, 'epoch': 1}
{'type': 'loss', 'content': 0.05665048584342003, 'timestamp': '2025-10-02 00:19:01.371921', 'step': 4689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:01.425190', 'step': 4689, 'epoch': 1}
{'type': 'loss', 'content': 0.09745337814092636, 'timestamp': '2025-10-02 00:19:01.427806', 'step': 4690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:01.486280', 'step': 4690, 'epoch': 1}
{'type': 'loss', 'content': 0.021920418366789818, 'timestamp': '2025-10-02 00:19:01.496686', 'step': 4691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:01.551142', 'step': 4691, 'epoch': 1}
{'type': 'loss', 'content': 0.061544716358184814, 'timestamp': '2025-10-02 00:19:01.561295', 'step': 4692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:01.615689', 'step': 4692, 'epoch': 1}
{'type': 'loss', 'content': 0.046507321298122406, 'timestamp': '2025-10-02 00:19:01.618468', 'step': 4693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:01.672745', 'step': 4693, 'epoch': 1}
{'type': 'loss', 'content': 0.03111550398170948, 'timestamp': '2025-10-02 00:19:01.680122', 'step': 4694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:01.735661', 'step': 4694, 'epoch': 1}
{'type': 'loss', 'content': 0.13209345936775208, 'timestamp': '2025-10-02 00:19:01.738120', 'step': 4695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:19:01.791374', 'step': 4695, 'epoch': 1}
{'type': 'loss', 'content': 0.18389707803726196, 'timestamp': '2025-10-02 00:19:01.797351', 'step': 4696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:01.850647', 'step': 4696, 'epoch': 1}
{'type': 'loss', 'content': 0.16428224742412567, 'timestamp': '2025-10-02 00:19:01.853814', 'step': 4697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:01.908735', 'step': 4697, 'epoch': 1}
{'type': 'loss', 'content': 0.057275380939245224, 'timestamp': '2025-10-02 00:19:01.916329', 'step': 4698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:01.971255', 'step': 4698, 'epoch': 1}
{'type': 'loss', 'content': 0.06113026291131973, 'timestamp': '2025-10-02 00:19:01.980472', 'step': 4699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:02.039159', 'step': 4699, 'epoch': 1}
{'type': 'loss', 'content': 0.009912606328725815, 'timestamp': '2025-10-02 00:19:02.050376', 'step': 4700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:02.104070', 'step': 4700, 'epoch': 1}
{'type': 'loss', 'content': 0.023119889199733734, 'timestamp': '2025-10-02 00:19:02.111510', 'step': 4701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:02.165063', 'step': 4701, 'epoch': 1}
{'type': 'loss', 'content': 0.13015729188919067, 'timestamp': '2025-10-02 00:19:02.167415', 'step': 4702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:02.221848', 'step': 4702, 'epoch': 1}
{'type': 'loss', 'content': 0.13310201466083527, 'timestamp': '2025-10-02 00:19:02.227586', 'step': 4703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:02.282360', 'step': 4703, 'epoch': 1}
{'type': 'loss', 'content': 0.10295933485031128, 'timestamp': '2025-10-02 00:19:02.288794', 'step': 4704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:02.342569', 'step': 4704, 'epoch': 1}
{'type': 'loss', 'content': 0.033011969178915024, 'timestamp': '2025-10-02 00:19:02.353068', 'step': 4705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:02.407123', 'step': 4705, 'epoch': 1}
{'type': 'loss', 'content': 0.04555204138159752, 'timestamp': '2025-10-02 00:19:02.409587', 'step': 4706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:02.463538', 'step': 4706, 'epoch': 1}
{'type': 'loss', 'content': 0.12673939764499664, 'timestamp': '2025-10-02 00:19:02.465890', 'step': 4707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:02.520280', 'step': 4707, 'epoch': 1}
{'type': 'loss', 'content': 0.2461201548576355, 'timestamp': '2025-10-02 00:19:02.526303', 'step': 4708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:02.580454', 'step': 4708, 'epoch': 1}
{'type': 'loss', 'content': 0.09168736636638641, 'timestamp': '2025-10-02 00:19:02.582690', 'step': 4709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:02.636131', 'step': 4709, 'epoch': 1}
{'type': 'loss', 'content': 0.052743472158908844, 'timestamp': '2025-10-02 00:19:02.645502', 'step': 4710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:02.701048', 'step': 4710, 'epoch': 1}
{'type': 'loss', 'content': 0.027709422633051872, 'timestamp': '2025-10-02 00:19:02.706926', 'step': 4711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:02.772616', 'step': 4711, 'epoch': 1}
{'type': 'loss', 'content': 0.03908867388963699, 'timestamp': '2025-10-02 00:19:02.784117', 'step': 4712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:02.839790', 'step': 4712, 'epoch': 1}
{'type': 'loss', 'content': 0.03582802787423134, 'timestamp': '2025-10-02 00:19:02.843201', 'step': 4713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:02.901140', 'step': 4713, 'epoch': 1}
{'type': 'loss', 'content': 0.05454452335834503, 'timestamp': '2025-10-02 00:19:02.908685', 'step': 4714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:02.966056', 'step': 4714, 'epoch': 1}
{'type': 'loss', 'content': 0.11060534417629242, 'timestamp': '2025-10-02 00:19:02.971803', 'step': 4715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:03.028109', 'step': 4715, 'epoch': 1}
{'type': 'loss', 'content': 0.08003420382738113, 'timestamp': '2025-10-02 00:19:03.038705', 'step': 4716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:03.092573', 'step': 4716, 'epoch': 1}
{'type': 'loss', 'content': 0.09085389971733093, 'timestamp': '2025-10-02 00:19:03.096455', 'step': 4717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:03.152030', 'step': 4717, 'epoch': 1}
{'type': 'loss', 'content': 0.19527557492256165, 'timestamp': '2025-10-02 00:19:03.154330', 'step': 4718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:03.210721', 'step': 4718, 'epoch': 1}
{'type': 'loss', 'content': 0.1367122381925583, 'timestamp': '2025-10-02 00:19:03.213774', 'step': 4719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:03.269559', 'step': 4719, 'epoch': 1}
{'type': 'loss', 'content': 0.14768199622631073, 'timestamp': '2025-10-02 00:19:03.276264', 'step': 4720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:03.331516', 'step': 4720, 'epoch': 1}
{'type': 'loss', 'content': 0.09172815084457397, 'timestamp': '2025-10-02 00:19:03.337376', 'step': 4721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:19:03.413605', 'step': 4721, 'epoch': 1}
{'type': 'loss', 'content': 0.0536397323012352, 'timestamp': '2025-10-02 00:19:03.427304', 'step': 4722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:03.483536', 'step': 4722, 'epoch': 1}
{'type': 'loss', 'content': 0.12308058887720108, 'timestamp': '2025-10-02 00:19:03.486629', 'step': 4723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:03.542638', 'step': 4723, 'epoch': 1}
{'type': 'loss', 'content': 0.09750226140022278, 'timestamp': '2025-10-02 00:19:03.549129', 'step': 4724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:03.607909', 'step': 4724, 'epoch': 1}
{'type': 'loss', 'content': 0.06668662279844284, 'timestamp': '2025-10-02 00:19:03.619097', 'step': 4725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:03.673802', 'step': 4725, 'epoch': 1}
{'type': 'loss', 'content': 0.1723848581314087, 'timestamp': '2025-10-02 00:19:03.677463', 'step': 4726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:03.734427', 'step': 4726, 'epoch': 1}
{'type': 'loss', 'content': 0.042994070798158646, 'timestamp': '2025-10-02 00:19:03.737905', 'step': 4727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:19:03.802414', 'step': 4727, 'epoch': 1}
{'type': 'loss', 'content': 0.014666670002043247, 'timestamp': '2025-10-02 00:19:03.814293', 'step': 4728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:03.873749', 'step': 4728, 'epoch': 1}
{'type': 'loss', 'content': 0.035529375076293945, 'timestamp': '2025-10-02 00:19:03.877694', 'step': 4729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:03.936201', 'step': 4729, 'epoch': 1}
{'type': 'loss', 'content': 0.08145000785589218, 'timestamp': '2025-10-02 00:19:03.942141', 'step': 4730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:03.997441', 'step': 4730, 'epoch': 1}
{'type': 'loss', 'content': 0.1259937435388565, 'timestamp': '2025-10-02 00:19:04.000745', 'step': 4731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:04.059018', 'step': 4731, 'epoch': 1}
{'type': 'loss', 'content': 0.07557160407304764, 'timestamp': '2025-10-02 00:19:04.065500', 'step': 4732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:04.121506', 'step': 4732, 'epoch': 1}
{'type': 'loss', 'content': 0.11361125856637955, 'timestamp': '2025-10-02 00:19:04.123881', 'step': 4733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:04.177195', 'step': 4733, 'epoch': 1}
{'type': 'loss', 'content': 0.18730729818344116, 'timestamp': '2025-10-02 00:19:04.179749', 'step': 4734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:04.233717', 'step': 4734, 'epoch': 1}
{'type': 'loss', 'content': 0.11076316237449646, 'timestamp': '2025-10-02 00:19:04.236180', 'step': 4735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:04.290123', 'step': 4735, 'epoch': 1}
{'type': 'loss', 'content': 0.10678061097860336, 'timestamp': '2025-10-02 00:19:04.296127', 'step': 4736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:04.349953', 'step': 4736, 'epoch': 1}
{'type': 'loss', 'content': 0.055231839418411255, 'timestamp': '2025-10-02 00:19:04.357428', 'step': 4737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:04.412253', 'step': 4737, 'epoch': 1}
{'type': 'loss', 'content': 0.06240186467766762, 'timestamp': '2025-10-02 00:19:04.417909', 'step': 4738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:04.472742', 'step': 4738, 'epoch': 1}
{'type': 'loss', 'content': 0.10915832966566086, 'timestamp': '2025-10-02 00:19:04.475429', 'step': 4739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:04.533659', 'step': 4739, 'epoch': 1}
{'type': 'loss', 'content': 0.05963291600346565, 'timestamp': '2025-10-02 00:19:04.544832', 'step': 4740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:04.606138', 'step': 4740, 'epoch': 1}
{'type': 'loss', 'content': 0.026831746101379395, 'timestamp': '2025-10-02 00:19:04.617662', 'step': 4741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:04.674433', 'step': 4741, 'epoch': 1}
{'type': 'loss', 'content': 0.04279151186347008, 'timestamp': '2025-10-02 00:19:04.680557', 'step': 4742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:04.736709', 'step': 4742, 'epoch': 1}
{'type': 'loss', 'content': 0.01968090981245041, 'timestamp': '2025-10-02 00:19:04.746070', 'step': 4743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:04.801835', 'step': 4743, 'epoch': 1}
{'type': 'loss', 'content': 0.08902250975370407, 'timestamp': '2025-10-02 00:19:04.808875', 'step': 4744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:04.864628', 'step': 4744, 'epoch': 1}
{'type': 'loss', 'content': 0.04875065013766289, 'timestamp': '2025-10-02 00:19:04.873328', 'step': 4745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:04.928953', 'step': 4745, 'epoch': 1}
{'type': 'loss', 'content': 0.13566356897354126, 'timestamp': '2025-10-02 00:19:04.931822', 'step': 4746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:04.987045', 'step': 4746, 'epoch': 1}
{'type': 'loss', 'content': 0.04781588539481163, 'timestamp': '2025-10-02 00:19:04.989883', 'step': 4747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:05.046197', 'step': 4747, 'epoch': 1}
{'type': 'loss', 'content': 0.027147842571139336, 'timestamp': '2025-10-02 00:19:05.053003', 'step': 4748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:05.107024', 'step': 4748, 'epoch': 1}
{'type': 'loss', 'content': 0.26840248703956604, 'timestamp': '2025-10-02 00:19:05.109626', 'step': 4749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:05.164444', 'step': 4749, 'epoch': 1}
{'type': 'loss', 'content': 0.02167396806180477, 'timestamp': '2025-10-02 00:19:05.173146', 'step': 4750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:05.228941', 'step': 4750, 'epoch': 1}
{'type': 'loss', 'content': 0.022659827023744583, 'timestamp': '2025-10-02 00:19:05.238104', 'step': 4751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:05.294677', 'step': 4751, 'epoch': 1}
{'type': 'loss', 'content': 0.06683063507080078, 'timestamp': '2025-10-02 00:19:05.301525', 'step': 4752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:05.356653', 'step': 4752, 'epoch': 1}
{'type': 'loss', 'content': 0.18789854645729065, 'timestamp': '2025-10-02 00:19:05.359007', 'step': 4753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:05.415427', 'step': 4753, 'epoch': 1}
{'type': 'loss', 'content': 0.10544085502624512, 'timestamp': '2025-10-02 00:19:05.417559', 'step': 4754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:05.472574', 'step': 4754, 'epoch': 1}
{'type': 'loss', 'content': 0.04249407723546028, 'timestamp': '2025-10-02 00:19:05.482314', 'step': 4755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:05.536999', 'step': 4755, 'epoch': 1}
{'type': 'loss', 'content': 0.05648259073495865, 'timestamp': '2025-10-02 00:19:05.543419', 'step': 4756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:05.600021', 'step': 4756, 'epoch': 1}
{'type': 'loss', 'content': 0.07589706778526306, 'timestamp': '2025-10-02 00:19:05.611218', 'step': 4757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:19:05.666268', 'step': 4757, 'epoch': 1}
{'type': 'loss', 'content': 0.07276823371648788, 'timestamp': '2025-10-02 00:19:05.668780', 'step': 4758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:05.724804', 'step': 4758, 'epoch': 1}
{'type': 'loss', 'content': 0.14825980365276337, 'timestamp': '2025-10-02 00:19:05.733724', 'step': 4759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:05.787786', 'step': 4759, 'epoch': 1}
{'type': 'loss', 'content': 0.10928349941968918, 'timestamp': '2025-10-02 00:19:05.794147', 'step': 4760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:19:05.859500', 'step': 4760, 'epoch': 1}
{'type': 'loss', 'content': 0.01998400129377842, 'timestamp': '2025-10-02 00:19:05.872739', 'step': 4761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:05.927261', 'step': 4761, 'epoch': 1}
{'type': 'loss', 'content': 0.02545822784304619, 'timestamp': '2025-10-02 00:19:05.930773', 'step': 4762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:05.985358', 'step': 4762, 'epoch': 1}
{'type': 'loss', 'content': 0.01931719109416008, 'timestamp': '2025-10-02 00:19:05.994833', 'step': 4763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:06.049373', 'step': 4763, 'epoch': 1}
{'type': 'loss', 'content': 0.02962738834321499, 'timestamp': '2025-10-02 00:19:06.059688', 'step': 4764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:06.112857', 'step': 4764, 'epoch': 1}
{'type': 'loss', 'content': 0.12713798880577087, 'timestamp': '2025-10-02 00:19:06.118773', 'step': 4765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:06.172531', 'step': 4765, 'epoch': 1}
{'type': 'loss', 'content': 0.2101738154888153, 'timestamp': '2025-10-02 00:19:06.174960', 'step': 4766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:06.229266', 'step': 4766, 'epoch': 1}
{'type': 'loss', 'content': 0.06689387559890747, 'timestamp': '2025-10-02 00:19:06.231864', 'step': 4767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:06.286151', 'step': 4767, 'epoch': 1}
{'type': 'loss', 'content': 0.040767740458250046, 'timestamp': '2025-10-02 00:19:06.296357', 'step': 4768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:06.349678', 'step': 4768, 'epoch': 1}
{'type': 'loss', 'content': 0.14238649606704712, 'timestamp': '2025-10-02 00:19:06.352218', 'step': 4769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:06.405565', 'step': 4769, 'epoch': 1}
{'type': 'loss', 'content': 0.14161691069602966, 'timestamp': '2025-10-02 00:19:06.408336', 'step': 4770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:06.461897', 'step': 4770, 'epoch': 1}
{'type': 'loss', 'content': 0.087239108979702, 'timestamp': '2025-10-02 00:19:06.469376', 'step': 4771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:06.523867', 'step': 4771, 'epoch': 1}
{'type': 'loss', 'content': 0.034817107021808624, 'timestamp': '2025-10-02 00:19:06.530375', 'step': 4772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:06.584337', 'step': 4772, 'epoch': 1}
{'type': 'loss', 'content': 0.036529477685689926, 'timestamp': '2025-10-02 00:19:06.587521', 'step': 4773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:06.641280', 'step': 4773, 'epoch': 1}
{'type': 'loss', 'content': 0.10575778782367706, 'timestamp': '2025-10-02 00:19:06.643893', 'step': 4774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:06.697520', 'step': 4774, 'epoch': 1}
{'type': 'loss', 'content': 0.11069204658269882, 'timestamp': '2025-10-02 00:19:06.700275', 'step': 4775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:06.753571', 'step': 4775, 'epoch': 1}
{'type': 'loss', 'content': 0.1775519847869873, 'timestamp': '2025-10-02 00:19:06.759644', 'step': 4776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:19:06.812662', 'step': 4776, 'epoch': 1}
{'type': 'loss', 'content': 0.12291916459798813, 'timestamp': '2025-10-02 00:19:06.815139', 'step': 4777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:06.869930', 'step': 4777, 'epoch': 1}
{'type': 'loss', 'content': 0.0669146478176117, 'timestamp': '2025-10-02 00:19:06.879731', 'step': 4778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:06.935662', 'step': 4778, 'epoch': 1}
{'type': 'loss', 'content': 0.020143546164035797, 'timestamp': '2025-10-02 00:19:06.945485', 'step': 4779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:06.999842', 'step': 4779, 'epoch': 1}
{'type': 'loss', 'content': 0.1349240094423294, 'timestamp': '2025-10-02 00:19:07.007974', 'step': 4780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:07.062619', 'step': 4780, 'epoch': 1}
{'type': 'loss', 'content': 0.06195235252380371, 'timestamp': '2025-10-02 00:19:07.065197', 'step': 4781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:07.119488', 'step': 4781, 'epoch': 1}
{'type': 'loss', 'content': 0.03041815757751465, 'timestamp': '2025-10-02 00:19:07.129118', 'step': 4782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:07.183472', 'step': 4782, 'epoch': 1}
{'type': 'loss', 'content': 0.11719769984483719, 'timestamp': '2025-10-02 00:19:07.186496', 'step': 4783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:07.240976', 'step': 4783, 'epoch': 1}
{'type': 'loss', 'content': 0.0640248954296112, 'timestamp': '2025-10-02 00:19:07.249383', 'step': 4784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:07.303291', 'step': 4784, 'epoch': 1}
{'type': 'loss', 'content': 0.04989352077245712, 'timestamp': '2025-10-02 00:19:07.312895', 'step': 4785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:07.373481', 'step': 4785, 'epoch': 1}
{'type': 'loss', 'content': 0.02760150097310543, 'timestamp': '2025-10-02 00:19:07.384225', 'step': 4786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:07.439586', 'step': 4786, 'epoch': 1}
{'type': 'loss', 'content': 0.07403778284788132, 'timestamp': '2025-10-02 00:19:07.441997', 'step': 4787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:07.495499', 'step': 4787, 'epoch': 1}
{'type': 'loss', 'content': 0.15486454963684082, 'timestamp': '2025-10-02 00:19:07.501507', 'step': 4788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:07.555544', 'step': 4788, 'epoch': 1}
{'type': 'loss', 'content': 0.12319016456604004, 'timestamp': '2025-10-02 00:19:07.558286', 'step': 4789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:07.616561', 'step': 4789, 'epoch': 1}
{'type': 'loss', 'content': 0.07246365398168564, 'timestamp': '2025-10-02 00:19:07.627014', 'step': 4790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:07.681131', 'step': 4790, 'epoch': 1}
{'type': 'loss', 'content': 0.07344770431518555, 'timestamp': '2025-10-02 00:19:07.683661', 'step': 4791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:07.739283', 'step': 4791, 'epoch': 1}
{'type': 'loss', 'content': 0.05114288255572319, 'timestamp': '2025-10-02 00:19:07.745343', 'step': 4792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:19:07.806063', 'step': 4792, 'epoch': 1}
{'type': 'loss', 'content': 0.04638334736227989, 'timestamp': '2025-10-02 00:19:07.818050', 'step': 4793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:07.877318', 'step': 4793, 'epoch': 1}
{'type': 'loss', 'content': 0.02314271405339241, 'timestamp': '2025-10-02 00:19:07.887776', 'step': 4794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:07.941683', 'step': 4794, 'epoch': 1}
{'type': 'loss', 'content': 0.11624002456665039, 'timestamp': '2025-10-02 00:19:07.947513', 'step': 4795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:08.001868', 'step': 4795, 'epoch': 1}
{'type': 'loss', 'content': 0.11624431610107422, 'timestamp': '2025-10-02 00:19:08.008375', 'step': 4796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:08.061265', 'step': 4796, 'epoch': 1}
{'type': 'loss', 'content': 0.09449297934770584, 'timestamp': '2025-10-02 00:19:08.063745', 'step': 4797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:08.117819', 'step': 4797, 'epoch': 1}
{'type': 'loss', 'content': 0.18581587076187134, 'timestamp': '2025-10-02 00:19:08.120355', 'step': 4798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:08.174092', 'step': 4798, 'epoch': 1}
{'type': 'loss', 'content': 0.1001083254814148, 'timestamp': '2025-10-02 00:19:08.176411', 'step': 4799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:08.228787', 'step': 4799, 'epoch': 1}
{'type': 'loss', 'content': 0.09725557267665863, 'timestamp': '2025-10-02 00:19:08.234928', 'step': 4800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:08.287939', 'step': 4800, 'epoch': 1}
{'type': 'loss', 'content': 0.0728030577301979, 'timestamp': '2025-10-02 00:19:08.297635', 'step': 4801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:08.351522', 'step': 4801, 'epoch': 1}
{'type': 'loss', 'content': 0.0801171362400055, 'timestamp': '2025-10-02 00:19:08.353932', 'step': 4802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:08.409260', 'step': 4802, 'epoch': 1}
{'type': 'loss', 'content': 0.03041689656674862, 'timestamp': '2025-10-02 00:19:08.419085', 'step': 4803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:08.473541', 'step': 4803, 'epoch': 1}
{'type': 'loss', 'content': 0.0804290845990181, 'timestamp': '2025-10-02 00:19:08.480595', 'step': 4804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:08.533173', 'step': 4804, 'epoch': 1}
{'type': 'loss', 'content': 0.09021010249853134, 'timestamp': '2025-10-02 00:19:08.535573', 'step': 4805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:08.588867', 'step': 4805, 'epoch': 1}
{'type': 'loss', 'content': 0.068433478474617, 'timestamp': '2025-10-02 00:19:08.595005', 'step': 4806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:08.648146', 'step': 4806, 'epoch': 1}
{'type': 'loss', 'content': 0.03218955174088478, 'timestamp': '2025-10-02 00:19:08.650610', 'step': 4807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:19:08.703678', 'step': 4807, 'epoch': 1}
{'type': 'loss', 'content': 0.1152631938457489, 'timestamp': '2025-10-02 00:19:08.710125', 'step': 4808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:08.767247', 'step': 4808, 'epoch': 1}
{'type': 'loss', 'content': 0.06672243028879166, 'timestamp': '2025-10-02 00:19:08.778440', 'step': 4809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:08.832405', 'step': 4809, 'epoch': 1}
{'type': 'loss', 'content': 0.1642618030309677, 'timestamp': '2025-10-02 00:19:08.834440', 'step': 4810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:19:08.904523', 'step': 4810, 'epoch': 1}
{'type': 'loss', 'content': 0.0308714397251606, 'timestamp': '2025-10-02 00:19:08.917416', 'step': 4811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:08.971843', 'step': 4811, 'epoch': 1}
{'type': 'loss', 'content': 0.021121826022863388, 'timestamp': '2025-10-02 00:19:08.982212', 'step': 4812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:09.043891', 'step': 4812, 'epoch': 1}
{'type': 'loss', 'content': 0.060179732739925385, 'timestamp': '2025-10-02 00:19:09.055415', 'step': 4813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:09.111506', 'step': 4813, 'epoch': 1}
{'type': 'loss', 'content': 0.062458205968141556, 'timestamp': '2025-10-02 00:19:09.114668', 'step': 4814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:09.168402', 'step': 4814, 'epoch': 1}
{'type': 'loss', 'content': 0.06595750898122787, 'timestamp': '2025-10-02 00:19:09.170348', 'step': 4815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:09.223889', 'step': 4815, 'epoch': 1}
{'type': 'loss', 'content': 0.044310685247182846, 'timestamp': '2025-10-02 00:19:09.230659', 'step': 4816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:09.284238', 'step': 4816, 'epoch': 1}
{'type': 'loss', 'content': 0.25275665521621704, 'timestamp': '2025-10-02 00:19:09.286922', 'step': 4817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:09.341012', 'step': 4817, 'epoch': 1}
{'type': 'loss', 'content': 0.05532978102564812, 'timestamp': '2025-10-02 00:19:09.343299', 'step': 4818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:09.397402', 'step': 4818, 'epoch': 1}
{'type': 'loss', 'content': 0.046565942466259, 'timestamp': '2025-10-02 00:19:09.403288', 'step': 4819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:09.457392', 'step': 4819, 'epoch': 1}
{'type': 'loss', 'content': 0.04985679313540459, 'timestamp': '2025-10-02 00:19:09.467976', 'step': 4820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:09.522177', 'step': 4820, 'epoch': 1}
{'type': 'loss', 'content': 0.07600865513086319, 'timestamp': '2025-10-02 00:19:09.524479', 'step': 4821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:09.585127', 'step': 4821, 'epoch': 1}
{'type': 'loss', 'content': 0.08109259605407715, 'timestamp': '2025-10-02 00:19:09.595819', 'step': 4822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:09.652057', 'step': 4822, 'epoch': 1}
{'type': 'loss', 'content': 0.012324516661465168, 'timestamp': '2025-10-02 00:19:09.659847', 'step': 4823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:09.716926', 'step': 4823, 'epoch': 1}
{'type': 'loss', 'content': 0.07701461017131805, 'timestamp': '2025-10-02 00:19:09.722949', 'step': 4824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:19:09.775677', 'step': 4824, 'epoch': 1}
{'type': 'loss', 'content': 0.16088105738162994, 'timestamp': '2025-10-02 00:19:09.777958', 'step': 4825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:09.831450', 'step': 4825, 'epoch': 1}
{'type': 'loss', 'content': 0.18357287347316742, 'timestamp': '2025-10-02 00:19:09.833811', 'step': 4826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:09.888079', 'step': 4826, 'epoch': 1}
{'type': 'loss', 'content': 0.10714376717805862, 'timestamp': '2025-10-02 00:19:09.890533', 'step': 4827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:09.943936', 'step': 4827, 'epoch': 1}
{'type': 'loss', 'content': 0.06795010715723038, 'timestamp': '2025-10-02 00:19:09.950866', 'step': 4828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:19:10.011196', 'step': 4828, 'epoch': 1}
{'type': 'loss', 'content': 0.042790837585926056, 'timestamp': '2025-10-02 00:19:10.023227', 'step': 4829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:10.078960', 'step': 4829, 'epoch': 1}
{'type': 'loss', 'content': 0.015824630856513977, 'timestamp': '2025-10-02 00:19:10.085174', 'step': 4830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:10.144573', 'step': 4830, 'epoch': 1}
{'type': 'loss', 'content': 0.033987049013376236, 'timestamp': '2025-10-02 00:19:10.155029', 'step': 4831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:10.208750', 'step': 4831, 'epoch': 1}
{'type': 'loss', 'content': 0.2014084756374359, 'timestamp': '2025-10-02 00:19:10.214759', 'step': 4832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:10.267798', 'step': 4832, 'epoch': 1}
{'type': 'loss', 'content': 0.02150360681116581, 'timestamp': '2025-10-02 00:19:10.275609', 'step': 4833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:10.330392', 'step': 4833, 'epoch': 1}
{'type': 'loss', 'content': 0.0478830486536026, 'timestamp': '2025-10-02 00:19:10.336534', 'step': 4834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:10.390343', 'step': 4834, 'epoch': 1}
{'type': 'loss', 'content': 0.11412152647972107, 'timestamp': '2025-10-02 00:19:10.399917', 'step': 4835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:10.456547', 'step': 4835, 'epoch': 1}
{'type': 'loss', 'content': 0.024589166045188904, 'timestamp': '2025-10-02 00:19:10.467137', 'step': 4836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:10.522682', 'step': 4836, 'epoch': 1}
{'type': 'loss', 'content': 0.1620633900165558, 'timestamp': '2025-10-02 00:19:10.525388', 'step': 4837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:10.581886', 'step': 4837, 'epoch': 1}
{'type': 'loss', 'content': 0.08702369034290314, 'timestamp': '2025-10-02 00:19:10.584629', 'step': 4838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:10.639460', 'step': 4838, 'epoch': 1}
{'type': 'loss', 'content': 0.25371846556663513, 'timestamp': '2025-10-02 00:19:10.641653', 'step': 4839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:10.698408', 'step': 4839, 'epoch': 1}
{'type': 'loss', 'content': 0.10654094070196152, 'timestamp': '2025-10-02 00:19:10.704393', 'step': 4840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:10.758245', 'step': 4840, 'epoch': 1}
{'type': 'loss', 'content': 0.02542009763419628, 'timestamp': '2025-10-02 00:19:10.768148', 'step': 4841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:10.823344', 'step': 4841, 'epoch': 1}
{'type': 'loss', 'content': 0.018661431968212128, 'timestamp': '2025-10-02 00:19:10.832876', 'step': 4842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:10.889562', 'step': 4842, 'epoch': 1}
{'type': 'loss', 'content': 0.0752006247639656, 'timestamp': '2025-10-02 00:19:10.893826', 'step': 4843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:10.956670', 'step': 4843, 'epoch': 1}
{'type': 'loss', 'content': 0.06094730272889137, 'timestamp': '2025-10-02 00:19:10.968208', 'step': 4844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:11.023356', 'step': 4844, 'epoch': 1}
{'type': 'loss', 'content': 0.055738743394613266, 'timestamp': '2025-10-02 00:19:11.026950', 'step': 4845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:11.084341', 'step': 4845, 'epoch': 1}
{'type': 'loss', 'content': 0.04271889850497246, 'timestamp': '2025-10-02 00:19:11.092335', 'step': 4846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:11.147711', 'step': 4846, 'epoch': 1}
{'type': 'loss', 'content': 0.03900456055998802, 'timestamp': '2025-10-02 00:19:11.150351', 'step': 4847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:11.206239', 'step': 4847, 'epoch': 1}
{'type': 'loss', 'content': 0.03375347703695297, 'timestamp': '2025-10-02 00:19:11.213040', 'step': 4848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:11.275517', 'step': 4848, 'epoch': 1}
{'type': 'loss', 'content': 0.09388222545385361, 'timestamp': '2025-10-02 00:19:11.287076', 'step': 4849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:11.342838', 'step': 4849, 'epoch': 1}
{'type': 'loss', 'content': 0.1654251664876938, 'timestamp': '2025-10-02 00:19:11.349044', 'step': 4850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:11.404600', 'step': 4850, 'epoch': 1}
{'type': 'loss', 'content': 0.12532366812229156, 'timestamp': '2025-10-02 00:19:11.407434', 'step': 4851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:11.462433', 'step': 4851, 'epoch': 1}
{'type': 'loss', 'content': 0.12974847853183746, 'timestamp': '2025-10-02 00:19:11.468300', 'step': 4852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:11.523826', 'step': 4852, 'epoch': 1}
{'type': 'loss', 'content': 0.040847424417734146, 'timestamp': '2025-10-02 00:19:11.533903', 'step': 4853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:11.589335', 'step': 4853, 'epoch': 1}
{'type': 'loss', 'content': 0.07510749250650406, 'timestamp': '2025-10-02 00:19:11.598907', 'step': 4854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:11.653110', 'step': 4854, 'epoch': 1}
{'type': 'loss', 'content': 0.09512931853532791, 'timestamp': '2025-10-02 00:19:11.656615', 'step': 4855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:11.711940', 'step': 4855, 'epoch': 1}
{'type': 'loss', 'content': 0.08615689724683762, 'timestamp': '2025-10-02 00:19:11.718487', 'step': 4856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:11.772686', 'step': 4856, 'epoch': 1}
{'type': 'loss', 'content': 0.06686659157276154, 'timestamp': '2025-10-02 00:19:11.775234', 'step': 4857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:11.830817', 'step': 4857, 'epoch': 1}
{'type': 'loss', 'content': 0.052410341799259186, 'timestamp': '2025-10-02 00:19:11.833794', 'step': 4858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:11.890046', 'step': 4858, 'epoch': 1}
{'type': 'loss', 'content': 0.05130572244524956, 'timestamp': '2025-10-02 00:19:11.896176', 'step': 4859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:19:11.958087', 'step': 4859, 'epoch': 1}
{'type': 'loss', 'content': 0.0395994558930397, 'timestamp': '2025-10-02 00:19:11.969796', 'step': 4860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:12.026538', 'step': 4860, 'epoch': 1}
{'type': 'loss', 'content': 0.28877726197242737, 'timestamp': '2025-10-02 00:19:12.029545', 'step': 4861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:12.086785', 'step': 4861, 'epoch': 1}
{'type': 'loss', 'content': 0.11731265485286713, 'timestamp': '2025-10-02 00:19:12.093031', 'step': 4862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:12.149057', 'step': 4862, 'epoch': 1}
{'type': 'loss', 'content': 0.24694333970546722, 'timestamp': '2025-10-02 00:19:12.152043', 'step': 4863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:12.207893', 'step': 4863, 'epoch': 1}
{'type': 'loss', 'content': 0.15512849390506744, 'timestamp': '2025-10-02 00:19:12.214613', 'step': 4864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:12.268937', 'step': 4864, 'epoch': 1}
{'type': 'loss', 'content': 0.182348370552063, 'timestamp': '2025-10-02 00:19:12.271222', 'step': 4865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:12.328336', 'step': 4865, 'epoch': 1}
{'type': 'loss', 'content': 0.10966173559427261, 'timestamp': '2025-10-02 00:19:12.334477', 'step': 4866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:12.390241', 'step': 4866, 'epoch': 1}
{'type': 'loss', 'content': 0.15106473863124847, 'timestamp': '2025-10-02 00:19:12.393300', 'step': 4867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:12.448770', 'step': 4867, 'epoch': 1}
{'type': 'loss', 'content': 0.16435661911964417, 'timestamp': '2025-10-02 00:19:12.459163', 'step': 4868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:12.515024', 'step': 4868, 'epoch': 1}
{'type': 'loss', 'content': 0.09567557275295258, 'timestamp': '2025-10-02 00:19:12.517843', 'step': 4869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:12.573360', 'step': 4869, 'epoch': 1}
{'type': 'loss', 'content': 0.0364832878112793, 'timestamp': '2025-10-02 00:19:12.576359', 'step': 4870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:19:12.649643', 'step': 4870, 'epoch': 1}
{'type': 'loss', 'content': 0.025326237082481384, 'timestamp': '2025-10-02 00:19:12.663120', 'step': 4871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:12.717505', 'step': 4871, 'epoch': 1}
{'type': 'loss', 'content': 0.16186398267745972, 'timestamp': '2025-10-02 00:19:12.723797', 'step': 4872, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:19:39.652135', 'step': 4872, 'epoch': 1}
{'type': 'pplx', 'content': 94.77376548444846, 'timestamp': '2025-10-02 00:19:39.655874', 'step': 4872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:39.715626', 'step': 4872, 'epoch': 1}
{'type': 'loss', 'content': 0.07117818295955658, 'timestamp': '2025-10-02 00:19:39.727164', 'step': 4873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:39.785367', 'step': 4873, 'epoch': 1}
{'type': 'loss', 'content': 0.08196601271629333, 'timestamp': '2025-10-02 00:19:39.790608', 'step': 4874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:39.846567', 'step': 4874, 'epoch': 1}
{'type': 'loss', 'content': 0.17936483025550842, 'timestamp': '2025-10-02 00:19:39.849951', 'step': 4875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:39.907265', 'step': 4875, 'epoch': 1}
{'type': 'loss', 'content': 0.17939645051956177, 'timestamp': '2025-10-02 00:19:39.913924', 'step': 4876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:39.970157', 'step': 4876, 'epoch': 1}
{'type': 'loss', 'content': 0.07411184906959534, 'timestamp': '2025-10-02 00:19:39.973462', 'step': 4877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:40.033932', 'step': 4877, 'epoch': 1}
{'type': 'loss', 'content': 0.05521271005272865, 'timestamp': '2025-10-02 00:19:40.037100', 'step': 4878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:40.093600', 'step': 4878, 'epoch': 1}
{'type': 'loss', 'content': 0.10468734800815582, 'timestamp': '2025-10-02 00:19:40.097531', 'step': 4879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:40.153163', 'step': 4879, 'epoch': 1}
{'type': 'loss', 'content': 0.09942160546779633, 'timestamp': '2025-10-02 00:19:40.159643', 'step': 4880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:40.215812', 'step': 4880, 'epoch': 1}
{'type': 'loss', 'content': 0.08506658673286438, 'timestamp': '2025-10-02 00:19:40.225244', 'step': 4881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:40.281138', 'step': 4881, 'epoch': 1}
{'type': 'loss', 'content': 0.01772996410727501, 'timestamp': '2025-10-02 00:19:40.288746', 'step': 4882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:40.352997', 'step': 4882, 'epoch': 1}
{'type': 'loss', 'content': 0.01158969197422266, 'timestamp': '2025-10-02 00:19:40.363496', 'step': 4883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:40.421306', 'step': 4883, 'epoch': 1}
{'type': 'loss', 'content': 0.0922812670469284, 'timestamp': '2025-10-02 00:19:40.427887', 'step': 4884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:40.483216', 'step': 4884, 'epoch': 1}
{'type': 'loss', 'content': 0.057821497321128845, 'timestamp': '2025-10-02 00:19:40.485459', 'step': 4885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:40.539439', 'step': 4885, 'epoch': 1}
{'type': 'loss', 'content': 0.12160670012235641, 'timestamp': '2025-10-02 00:19:40.541847', 'step': 4886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:19:40.597755', 'step': 4886, 'epoch': 1}
{'type': 'loss', 'content': 0.1613399237394333, 'timestamp': '2025-10-02 00:19:40.599991', 'step': 4887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:40.656539', 'step': 4887, 'epoch': 1}
{'type': 'loss', 'content': 0.08505688607692719, 'timestamp': '2025-10-02 00:19:40.664594', 'step': 4888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:40.718641', 'step': 4888, 'epoch': 1}
{'type': 'loss', 'content': 0.10786021500825882, 'timestamp': '2025-10-02 00:19:40.721089', 'step': 4889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:40.776423', 'step': 4889, 'epoch': 1}
{'type': 'loss', 'content': 0.18468286097049713, 'timestamp': '2025-10-02 00:19:40.778763', 'step': 4890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:19:40.846080', 'step': 4890, 'epoch': 1}
{'type': 'loss', 'content': 0.03641389310359955, 'timestamp': '2025-10-02 00:19:40.858040', 'step': 4891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:40.913687', 'step': 4891, 'epoch': 1}
{'type': 'loss', 'content': 0.1305655688047409, 'timestamp': '2025-10-02 00:19:40.919583', 'step': 4892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:40.972332', 'step': 4892, 'epoch': 1}
{'type': 'loss', 'content': 0.06837472319602966, 'timestamp': '2025-10-02 00:19:40.979925', 'step': 4893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:41.034399', 'step': 4893, 'epoch': 1}
{'type': 'loss', 'content': 0.06992336362600327, 'timestamp': '2025-10-02 00:19:41.036479', 'step': 4894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:41.091665', 'step': 4894, 'epoch': 1}
{'type': 'loss', 'content': 0.06526512652635574, 'timestamp': '2025-10-02 00:19:41.101224', 'step': 4895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:41.155220', 'step': 4895, 'epoch': 1}
{'type': 'loss', 'content': 0.08588658273220062, 'timestamp': '2025-10-02 00:19:41.161311', 'step': 4896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:41.214932', 'step': 4896, 'epoch': 1}
{'type': 'loss', 'content': 0.01949124038219452, 'timestamp': '2025-10-02 00:19:41.224407', 'step': 4897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:41.279544', 'step': 4897, 'epoch': 1}
{'type': 'loss', 'content': 0.17188896238803864, 'timestamp': '2025-10-02 00:19:41.282719', 'step': 4898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:41.337216', 'step': 4898, 'epoch': 1}
{'type': 'loss', 'content': 0.19266854226589203, 'timestamp': '2025-10-02 00:19:41.340245', 'step': 4899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:41.394139', 'step': 4899, 'epoch': 1}
{'type': 'loss', 'content': 0.11696849018335342, 'timestamp': '2025-10-02 00:19:41.399713', 'step': 4900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:41.453302', 'step': 4900, 'epoch': 1}
{'type': 'loss', 'content': 0.07807962596416473, 'timestamp': '2025-10-02 00:19:41.459176', 'step': 4901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:41.514591', 'step': 4901, 'epoch': 1}
{'type': 'loss', 'content': 0.06355811655521393, 'timestamp': '2025-10-02 00:19:41.524111', 'step': 4902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:41.578604', 'step': 4902, 'epoch': 1}
{'type': 'loss', 'content': 0.04509584233164787, 'timestamp': '2025-10-02 00:19:41.584534', 'step': 4903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:41.639744', 'step': 4903, 'epoch': 1}
{'type': 'loss', 'content': 0.11456969380378723, 'timestamp': '2025-10-02 00:19:41.645559', 'step': 4904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:41.699467', 'step': 4904, 'epoch': 1}
{'type': 'loss', 'content': 0.050281330943107605, 'timestamp': '2025-10-02 00:19:41.701634', 'step': 4905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:41.756085', 'step': 4905, 'epoch': 1}
{'type': 'loss', 'content': 0.12387819588184357, 'timestamp': '2025-10-02 00:19:41.759189', 'step': 4906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:41.815540', 'step': 4906, 'epoch': 1}
{'type': 'loss', 'content': 0.022649215534329414, 'timestamp': '2025-10-02 00:19:41.821299', 'step': 4907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:41.875708', 'step': 4907, 'epoch': 1}
{'type': 'loss', 'content': 0.2726588547229767, 'timestamp': '2025-10-02 00:19:41.881410', 'step': 4908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:41.936066', 'step': 4908, 'epoch': 1}
{'type': 'loss', 'content': 0.17284375429153442, 'timestamp': '2025-10-02 00:19:41.938373', 'step': 4909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:41.993483', 'step': 4909, 'epoch': 1}
{'type': 'loss', 'content': 0.08618966490030289, 'timestamp': '2025-10-02 00:19:42.001133', 'step': 4910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:42.055348', 'step': 4910, 'epoch': 1}
{'type': 'loss', 'content': 0.0479372963309288, 'timestamp': '2025-10-02 00:19:42.057496', 'step': 4911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:42.112134', 'step': 4911, 'epoch': 1}
{'type': 'loss', 'content': 0.0460309199988842, 'timestamp': '2025-10-02 00:19:42.118743', 'step': 4912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:42.177729', 'step': 4912, 'epoch': 1}
{'type': 'loss', 'content': 0.02236228622496128, 'timestamp': '2025-10-02 00:19:42.188734', 'step': 4913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:42.244556', 'step': 4913, 'epoch': 1}
{'type': 'loss', 'content': 0.2700190246105194, 'timestamp': '2025-10-02 00:19:42.248014', 'step': 4914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:42.303043', 'step': 4914, 'epoch': 1}
{'type': 'loss', 'content': 0.037072960287332535, 'timestamp': '2025-10-02 00:19:42.305519', 'step': 4915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:19:42.368071', 'step': 4915, 'epoch': 1}
{'type': 'loss', 'content': 0.03297815099358559, 'timestamp': '2025-10-02 00:19:42.379706', 'step': 4916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:42.433251', 'step': 4916, 'epoch': 1}
{'type': 'loss', 'content': 0.2783982753753662, 'timestamp': '2025-10-02 00:19:42.435413', 'step': 4917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:42.489857', 'step': 4917, 'epoch': 1}
{'type': 'loss', 'content': 0.08277089893817902, 'timestamp': '2025-10-02 00:19:42.492429', 'step': 4918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:42.546863', 'step': 4918, 'epoch': 1}
{'type': 'loss', 'content': 0.031061606481671333, 'timestamp': '2025-10-02 00:19:42.549226', 'step': 4919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:42.603399', 'step': 4919, 'epoch': 1}
{'type': 'loss', 'content': 0.060026198625564575, 'timestamp': '2025-10-02 00:19:42.609345', 'step': 4920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:42.663039', 'step': 4920, 'epoch': 1}
{'type': 'loss', 'content': 0.11345873028039932, 'timestamp': '2025-10-02 00:19:42.665367', 'step': 4921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:42.719488', 'step': 4921, 'epoch': 1}
{'type': 'loss', 'content': 0.07058562338352203, 'timestamp': '2025-10-02 00:19:42.722319', 'step': 4922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:42.776173', 'step': 4922, 'epoch': 1}
{'type': 'loss', 'content': 0.1284489929676056, 'timestamp': '2025-10-02 00:19:42.778488', 'step': 4923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:19:42.835191', 'step': 4923, 'epoch': 1}
{'type': 'loss', 'content': 0.14868105947971344, 'timestamp': '2025-10-02 00:19:42.840893', 'step': 4924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:42.894443', 'step': 4924, 'epoch': 1}
{'type': 'loss', 'content': 0.048525482416152954, 'timestamp': '2025-10-02 00:19:42.904085', 'step': 4925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:42.959407', 'step': 4925, 'epoch': 1}
{'type': 'loss', 'content': 0.1562991440296173, 'timestamp': '2025-10-02 00:19:42.961932', 'step': 4926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:43.016269', 'step': 4926, 'epoch': 1}
{'type': 'loss', 'content': 0.11706312745809555, 'timestamp': '2025-10-02 00:19:43.021943', 'step': 4927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:43.076300', 'step': 4927, 'epoch': 1}
{'type': 'loss', 'content': 0.06465984135866165, 'timestamp': '2025-10-02 00:19:43.083046', 'step': 4928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:43.137539', 'step': 4928, 'epoch': 1}
{'type': 'loss', 'content': 0.126204714179039, 'timestamp': '2025-10-02 00:19:43.139888', 'step': 4929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:43.193548', 'step': 4929, 'epoch': 1}
{'type': 'loss', 'content': 0.1907493621110916, 'timestamp': '2025-10-02 00:19:43.196246', 'step': 4930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:43.250594', 'step': 4930, 'epoch': 1}
{'type': 'loss', 'content': 0.10183387994766235, 'timestamp': '2025-10-02 00:19:43.255670', 'step': 4931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:43.310115', 'step': 4931, 'epoch': 1}
{'type': 'loss', 'content': 0.034956805408000946, 'timestamp': '2025-10-02 00:19:43.316722', 'step': 4932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:43.371282', 'step': 4932, 'epoch': 1}
{'type': 'loss', 'content': 0.05721874162554741, 'timestamp': '2025-10-02 00:19:43.374218', 'step': 4933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:43.428649', 'step': 4933, 'epoch': 1}
{'type': 'loss', 'content': 0.19892846047878265, 'timestamp': '2025-10-02 00:19:43.430895', 'step': 4934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:43.485241', 'step': 4934, 'epoch': 1}
{'type': 'loss', 'content': 0.07389947772026062, 'timestamp': '2025-10-02 00:19:43.494617', 'step': 4935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:19:43.557317', 'step': 4935, 'epoch': 1}
{'type': 'loss', 'content': 0.021306805312633514, 'timestamp': '2025-10-02 00:19:43.568774', 'step': 4936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:19:43.629671', 'step': 4936, 'epoch': 1}
{'type': 'loss', 'content': 0.05248947814106941, 'timestamp': '2025-10-02 00:19:43.641196', 'step': 4937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:43.696087', 'step': 4937, 'epoch': 1}
{'type': 'loss', 'content': 0.10026029497385025, 'timestamp': '2025-10-02 00:19:43.698265', 'step': 4938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:43.752483', 'step': 4938, 'epoch': 1}
{'type': 'loss', 'content': 0.111467644572258, 'timestamp': '2025-10-02 00:19:43.759825', 'step': 4939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:43.816323', 'step': 4939, 'epoch': 1}
{'type': 'loss', 'content': 0.03192288428544998, 'timestamp': '2025-10-02 00:19:43.826646', 'step': 4940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:43.880706', 'step': 4940, 'epoch': 1}
{'type': 'loss', 'content': 0.08003809303045273, 'timestamp': '2025-10-02 00:19:43.886606', 'step': 4941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:43.941138', 'step': 4941, 'epoch': 1}
{'type': 'loss', 'content': 0.04989497736096382, 'timestamp': '2025-10-02 00:19:43.943643', 'step': 4942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:43.998213', 'step': 4942, 'epoch': 1}
{'type': 'loss', 'content': 0.050101350992918015, 'timestamp': '2025-10-02 00:19:44.003984', 'step': 4943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:44.059121', 'step': 4943, 'epoch': 1}
{'type': 'loss', 'content': 0.03063453547656536, 'timestamp': '2025-10-02 00:19:44.065753', 'step': 4944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:44.120194', 'step': 4944, 'epoch': 1}
{'type': 'loss', 'content': 0.0655626654624939, 'timestamp': '2025-10-02 00:19:44.129601', 'step': 4945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:44.183588', 'step': 4945, 'epoch': 1}
{'type': 'loss', 'content': 0.07654920220375061, 'timestamp': '2025-10-02 00:19:44.189565', 'step': 4946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:44.245193', 'step': 4946, 'epoch': 1}
{'type': 'loss', 'content': 0.10759232938289642, 'timestamp': '2025-10-02 00:19:44.247567', 'step': 4947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:44.302113', 'step': 4947, 'epoch': 1}
{'type': 'loss', 'content': 0.1843317449092865, 'timestamp': '2025-10-02 00:19:44.308385', 'step': 4948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:44.363309', 'step': 4948, 'epoch': 1}
{'type': 'loss', 'content': 0.07912709563970566, 'timestamp': '2025-10-02 00:19:44.373582', 'step': 4949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:44.429058', 'step': 4949, 'epoch': 1}
{'type': 'loss', 'content': 0.12074161320924759, 'timestamp': '2025-10-02 00:19:44.431375', 'step': 4950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:44.486365', 'step': 4950, 'epoch': 1}
{'type': 'loss', 'content': 0.028203899040818214, 'timestamp': '2025-10-02 00:19:44.495737', 'step': 4951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:44.550171', 'step': 4951, 'epoch': 1}
{'type': 'loss', 'content': 0.04954370856285095, 'timestamp': '2025-10-02 00:19:44.558355', 'step': 4952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:44.611782', 'step': 4952, 'epoch': 1}
{'type': 'loss', 'content': 0.07043113559484482, 'timestamp': '2025-10-02 00:19:44.614112', 'step': 4953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:44.668383', 'step': 4953, 'epoch': 1}
{'type': 'loss', 'content': 0.17441630363464355, 'timestamp': '2025-10-02 00:19:44.670838', 'step': 4954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:44.726652', 'step': 4954, 'epoch': 1}
{'type': 'loss', 'content': 0.07397481799125671, 'timestamp': '2025-10-02 00:19:44.736180', 'step': 4955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:44.791057', 'step': 4955, 'epoch': 1}
{'type': 'loss', 'content': 0.08242367208003998, 'timestamp': '2025-10-02 00:19:44.796960', 'step': 4956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:44.852992', 'step': 4956, 'epoch': 1}
{'type': 'loss', 'content': 0.1422659009695053, 'timestamp': '2025-10-02 00:19:44.855398', 'step': 4957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:44.909604', 'step': 4957, 'epoch': 1}
{'type': 'loss', 'content': 0.08475682884454727, 'timestamp': '2025-10-02 00:19:44.911806', 'step': 4958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:44.966389', 'step': 4958, 'epoch': 1}
{'type': 'loss', 'content': 0.03231709450483322, 'timestamp': '2025-10-02 00:19:44.972299', 'step': 4959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:45.026797', 'step': 4959, 'epoch': 1}
{'type': 'loss', 'content': 0.21809880435466766, 'timestamp': '2025-10-02 00:19:45.032925', 'step': 4960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:45.086172', 'step': 4960, 'epoch': 1}
{'type': 'loss', 'content': 0.14284169673919678, 'timestamp': '2025-10-02 00:19:45.088522', 'step': 4961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:45.145181', 'step': 4961, 'epoch': 1}
{'type': 'loss', 'content': 0.030530588701367378, 'timestamp': '2025-10-02 00:19:45.150988', 'step': 4962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:45.206158', 'step': 4962, 'epoch': 1}
{'type': 'loss', 'content': 0.18179458379745483, 'timestamp': '2025-10-02 00:19:45.208270', 'step': 4963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:45.262771', 'step': 4963, 'epoch': 1}
{'type': 'loss', 'content': 0.0832873284816742, 'timestamp': '2025-10-02 00:19:45.269305', 'step': 4964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:45.323348', 'step': 4964, 'epoch': 1}
{'type': 'loss', 'content': 0.11917947977781296, 'timestamp': '2025-10-02 00:19:45.330984', 'step': 4965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:19:45.394219', 'step': 4965, 'epoch': 1}
{'type': 'loss', 'content': 0.07327581942081451, 'timestamp': '2025-10-02 00:19:45.405068', 'step': 4966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:45.467052', 'step': 4966, 'epoch': 1}
{'type': 'loss', 'content': 0.07738404721021652, 'timestamp': '2025-10-02 00:19:45.477542', 'step': 4967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:45.544705', 'step': 4967, 'epoch': 1}
{'type': 'loss', 'content': 0.07174556702375412, 'timestamp': '2025-10-02 00:19:45.551426', 'step': 4968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:45.605451', 'step': 4968, 'epoch': 1}
{'type': 'loss', 'content': 0.13085755705833435, 'timestamp': '2025-10-02 00:19:45.611340', 'step': 4969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:45.665684', 'step': 4969, 'epoch': 1}
{'type': 'loss', 'content': 0.049780406057834625, 'timestamp': '2025-10-02 00:19:45.668732', 'step': 4970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:45.723169', 'step': 4970, 'epoch': 1}
{'type': 'loss', 'content': 0.030997006222605705, 'timestamp': '2025-10-02 00:19:45.725536', 'step': 4971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:45.780403', 'step': 4971, 'epoch': 1}
{'type': 'loss', 'content': 0.059992894530296326, 'timestamp': '2025-10-02 00:19:45.788467', 'step': 4972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:45.843805', 'step': 4972, 'epoch': 1}
{'type': 'loss', 'content': 0.03401652351021767, 'timestamp': '2025-10-02 00:19:45.853096', 'step': 4973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:45.908328', 'step': 4973, 'epoch': 1}
{'type': 'loss', 'content': 0.257236123085022, 'timestamp': '2025-10-02 00:19:45.911075', 'step': 4974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:45.965663', 'step': 4974, 'epoch': 1}
{'type': 'loss', 'content': 0.07359713315963745, 'timestamp': '2025-10-02 00:19:45.971496', 'step': 4975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:46.026641', 'step': 4975, 'epoch': 1}
{'type': 'loss', 'content': 0.056829120963811874, 'timestamp': '2025-10-02 00:19:46.034798', 'step': 4976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:19:46.095282', 'step': 4976, 'epoch': 1}
{'type': 'loss', 'content': 0.03294455260038376, 'timestamp': '2025-10-02 00:19:46.107030', 'step': 4977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:46.168282', 'step': 4977, 'epoch': 1}
{'type': 'loss', 'content': 0.08761385083198547, 'timestamp': '2025-10-02 00:19:46.178781', 'step': 4978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:46.232853', 'step': 4978, 'epoch': 1}
{'type': 'loss', 'content': 0.13241833448410034, 'timestamp': '2025-10-02 00:19:46.235111', 'step': 4979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:46.289707', 'step': 4979, 'epoch': 1}
{'type': 'loss', 'content': 0.0509396530687809, 'timestamp': '2025-10-02 00:19:46.299846', 'step': 4980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:46.354631', 'step': 4980, 'epoch': 1}
{'type': 'loss', 'content': 0.028086090460419655, 'timestamp': '2025-10-02 00:19:46.357248', 'step': 4981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:46.412864', 'step': 4981, 'epoch': 1}
{'type': 'loss', 'content': 0.04869738221168518, 'timestamp': '2025-10-02 00:19:46.415235', 'step': 4982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:46.470310', 'step': 4982, 'epoch': 1}
{'type': 'loss', 'content': 0.06057152524590492, 'timestamp': '2025-10-02 00:19:46.479688', 'step': 4983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:19:46.547576', 'step': 4983, 'epoch': 1}
{'type': 'loss', 'content': 0.05667191743850708, 'timestamp': '2025-10-02 00:19:46.560313', 'step': 4984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:46.614989', 'step': 4984, 'epoch': 1}
{'type': 'loss', 'content': 0.022617727518081665, 'timestamp': '2025-10-02 00:19:46.617352', 'step': 4985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:46.671533', 'step': 4985, 'epoch': 1}
{'type': 'loss', 'content': 0.07265390455722809, 'timestamp': '2025-10-02 00:19:46.674338', 'step': 4986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:46.729337', 'step': 4986, 'epoch': 1}
{'type': 'loss', 'content': 0.13133443892002106, 'timestamp': '2025-10-02 00:19:46.736876', 'step': 4987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:46.790857', 'step': 4987, 'epoch': 1}
{'type': 'loss', 'content': 0.09252271801233292, 'timestamp': '2025-10-02 00:19:46.796667', 'step': 4988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:46.856628', 'step': 4988, 'epoch': 1}
{'type': 'loss', 'content': 0.016767891123890877, 'timestamp': '2025-10-02 00:19:46.867990', 'step': 4989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:46.922736', 'step': 4989, 'epoch': 1}
{'type': 'loss', 'content': 0.1761016845703125, 'timestamp': '2025-10-02 00:19:46.925589', 'step': 4990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:46.980249', 'step': 4990, 'epoch': 1}
{'type': 'loss', 'content': 0.04159298911690712, 'timestamp': '2025-10-02 00:19:46.982744', 'step': 4991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:47.036918', 'step': 4991, 'epoch': 1}
{'type': 'loss', 'content': 0.09836522489786148, 'timestamp': '2025-10-02 00:19:47.042869', 'step': 4992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:47.096651', 'step': 4992, 'epoch': 1}
{'type': 'loss', 'content': 0.09397923946380615, 'timestamp': '2025-10-02 00:19:47.098924', 'step': 4993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:47.153195', 'step': 4993, 'epoch': 1}
{'type': 'loss', 'content': 0.0650748759508133, 'timestamp': '2025-10-02 00:19:47.155352', 'step': 4994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:47.209373', 'step': 4994, 'epoch': 1}
{'type': 'loss', 'content': 0.0679214671254158, 'timestamp': '2025-10-02 00:19:47.212558', 'step': 4995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:47.266334', 'step': 4995, 'epoch': 1}
{'type': 'loss', 'content': 0.14365524053573608, 'timestamp': '2025-10-02 00:19:47.272412', 'step': 4996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:47.332494', 'step': 4996, 'epoch': 1}
{'type': 'loss', 'content': 0.036582402884960175, 'timestamp': '2025-10-02 00:19:47.343844', 'step': 4997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:47.398198', 'step': 4997, 'epoch': 1}
{'type': 'loss', 'content': 0.06737300753593445, 'timestamp': '2025-10-02 00:19:47.400502', 'step': 4998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:47.455392', 'step': 4998, 'epoch': 1}
{'type': 'loss', 'content': 0.07853762805461884, 'timestamp': '2025-10-02 00:19:47.457924', 'step': 4999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:47.512262', 'step': 4999, 'epoch': 1}
{'type': 'loss', 'content': 0.014782872051000595, 'timestamp': '2025-10-02 00:19:47.522413', 'step': 5000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-10-02 00:19:48.086425', 'step': 5000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:48.145057', 'step': 5000, 'epoch': 1}
{'type': 'loss', 'content': 0.08300724625587463, 'timestamp': '2025-10-02 00:19:48.147548', 'step': 5001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:48.202522', 'step': 5001, 'epoch': 1}
{'type': 'loss', 'content': 0.13197052478790283, 'timestamp': '2025-10-02 00:19:48.205320', 'step': 5002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:48.259753', 'step': 5002, 'epoch': 1}
{'type': 'loss', 'content': 0.05562390387058258, 'timestamp': '2025-10-02 00:19:48.277646', 'step': 5003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:48.345384', 'step': 5003, 'epoch': 1}
{'type': 'loss', 'content': 0.0633184015750885, 'timestamp': '2025-10-02 00:19:48.352128', 'step': 5004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:48.407952', 'step': 5004, 'epoch': 1}
{'type': 'loss', 'content': 0.044443558901548386, 'timestamp': '2025-10-02 00:19:48.418215', 'step': 5005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:48.480422', 'step': 5005, 'epoch': 1}
{'type': 'loss', 'content': 0.014251122251152992, 'timestamp': '2025-10-02 00:19:48.490922', 'step': 5006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:48.546062', 'step': 5006, 'epoch': 1}
{'type': 'loss', 'content': 0.038358110934495926, 'timestamp': '2025-10-02 00:19:48.551745', 'step': 5007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:48.606326', 'step': 5007, 'epoch': 1}
{'type': 'loss', 'content': 0.07611080259084702, 'timestamp': '2025-10-02 00:19:48.616489', 'step': 5008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:48.672292', 'step': 5008, 'epoch': 1}
{'type': 'loss', 'content': 0.11076623201370239, 'timestamp': '2025-10-02 00:19:48.674678', 'step': 5009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:48.729260', 'step': 5009, 'epoch': 1}
{'type': 'loss', 'content': 0.12935972213745117, 'timestamp': '2025-10-02 00:19:48.731760', 'step': 5010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:48.788116', 'step': 5010, 'epoch': 1}
{'type': 'loss', 'content': 0.0867774561047554, 'timestamp': '2025-10-02 00:19:48.790615', 'step': 5011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:48.845327', 'step': 5011, 'epoch': 1}
{'type': 'loss', 'content': 0.11538764089345932, 'timestamp': '2025-10-02 00:19:48.851465', 'step': 5012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:48.906084', 'step': 5012, 'epoch': 1}
{'type': 'loss', 'content': 0.03477860987186432, 'timestamp': '2025-10-02 00:19:48.908774', 'step': 5013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:48.964326', 'step': 5013, 'epoch': 1}
{'type': 'loss', 'content': 0.015759997069835663, 'timestamp': '2025-10-02 00:19:48.967360', 'step': 5014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:49.024307', 'step': 5014, 'epoch': 1}
{'type': 'loss', 'content': 0.10667888075113297, 'timestamp': '2025-10-02 00:19:49.027113', 'step': 5015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:49.082988', 'step': 5015, 'epoch': 1}
{'type': 'loss', 'content': 0.10554324835538864, 'timestamp': '2025-10-02 00:19:49.089269', 'step': 5016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:49.144374', 'step': 5016, 'epoch': 1}
{'type': 'loss', 'content': 0.07068140059709549, 'timestamp': '2025-10-02 00:19:49.146952', 'step': 5017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:49.203068', 'step': 5017, 'epoch': 1}
{'type': 'loss', 'content': 0.07862042635679245, 'timestamp': '2025-10-02 00:19:49.208858', 'step': 5018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:49.266099', 'step': 5018, 'epoch': 1}
{'type': 'loss', 'content': 0.032493483275175095, 'timestamp': '2025-10-02 00:19:49.275428', 'step': 5019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:49.329516', 'step': 5019, 'epoch': 1}
{'type': 'loss', 'content': 0.10628721863031387, 'timestamp': '2025-10-02 00:19:49.335750', 'step': 5020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:49.390627', 'step': 5020, 'epoch': 1}
{'type': 'loss', 'content': 0.04462520033121109, 'timestamp': '2025-10-02 00:19:49.393289', 'step': 5021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:49.449949', 'step': 5021, 'epoch': 1}
{'type': 'loss', 'content': 0.15150916576385498, 'timestamp': '2025-10-02 00:19:49.452411', 'step': 5022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:49.512708', 'step': 5022, 'epoch': 1}
{'type': 'loss', 'content': 0.1029181033372879, 'timestamp': '2025-10-02 00:19:49.522904', 'step': 5023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:49.579329', 'step': 5023, 'epoch': 1}
{'type': 'loss', 'content': 0.08444838970899582, 'timestamp': '2025-10-02 00:19:49.585906', 'step': 5024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:49.641388', 'step': 5024, 'epoch': 1}
{'type': 'loss', 'content': 0.11862898617982864, 'timestamp': '2025-10-02 00:19:49.643496', 'step': 5025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:49.700376', 'step': 5025, 'epoch': 1}
{'type': 'loss', 'content': 0.067174531519413, 'timestamp': '2025-10-02 00:19:49.703334', 'step': 5026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:49.758599', 'step': 5026, 'epoch': 1}
{'type': 'loss', 'content': 0.12480948120355606, 'timestamp': '2025-10-02 00:19:49.767904', 'step': 5027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:49.826654', 'step': 5027, 'epoch': 1}
{'type': 'loss', 'content': 0.018922356888651848, 'timestamp': '2025-10-02 00:19:49.836793', 'step': 5028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:49.894116', 'step': 5028, 'epoch': 1}
{'type': 'loss', 'content': 0.06376492232084274, 'timestamp': '2025-10-02 00:19:49.904357', 'step': 5029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:49.959063', 'step': 5029, 'epoch': 1}
{'type': 'loss', 'content': 0.11401456594467163, 'timestamp': '2025-10-02 00:19:49.964687', 'step': 5030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:50.032170', 'step': 5030, 'epoch': 1}
{'type': 'loss', 'content': 0.09525804966688156, 'timestamp': '2025-10-02 00:19:50.035153', 'step': 5031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:50.088690', 'step': 5031, 'epoch': 1}
{'type': 'loss', 'content': 0.28781822323799133, 'timestamp': '2025-10-02 00:19:50.094888', 'step': 5032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:50.155167', 'step': 5032, 'epoch': 1}
{'type': 'loss', 'content': 0.03040318377315998, 'timestamp': '2025-10-02 00:19:50.166510', 'step': 5033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:50.223904', 'step': 5033, 'epoch': 1}
{'type': 'loss', 'content': 0.015526008792221546, 'timestamp': '2025-10-02 00:19:50.233301', 'step': 5034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:50.289659', 'step': 5034, 'epoch': 1}
{'type': 'loss', 'content': 0.11482980102300644, 'timestamp': '2025-10-02 00:19:50.292047', 'step': 5035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:50.347237', 'step': 5035, 'epoch': 1}
{'type': 'loss', 'content': 0.1662723869085312, 'timestamp': '2025-10-02 00:19:50.353224', 'step': 5036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:50.407370', 'step': 5036, 'epoch': 1}
{'type': 'loss', 'content': 0.12254002690315247, 'timestamp': '2025-10-02 00:19:50.413336', 'step': 5037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:50.468305', 'step': 5037, 'epoch': 1}
{'type': 'loss', 'content': 0.05629896745085716, 'timestamp': '2025-10-02 00:19:50.475809', 'step': 5038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:50.540371', 'step': 5038, 'epoch': 1}
{'type': 'loss', 'content': 0.06925676763057709, 'timestamp': '2025-10-02 00:19:50.550863', 'step': 5039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:50.605651', 'step': 5039, 'epoch': 1}
{'type': 'loss', 'content': 0.06637995690107346, 'timestamp': '2025-10-02 00:19:50.612275', 'step': 5040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:50.666860', 'step': 5040, 'epoch': 1}
{'type': 'loss', 'content': 0.050121478736400604, 'timestamp': '2025-10-02 00:19:50.672805', 'step': 5041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:50.729496', 'step': 5041, 'epoch': 1}
{'type': 'loss', 'content': 0.06313057988882065, 'timestamp': '2025-10-02 00:19:50.732258', 'step': 5042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:50.786932', 'step': 5042, 'epoch': 1}
{'type': 'loss', 'content': 0.08886340260505676, 'timestamp': '2025-10-02 00:19:50.789699', 'step': 5043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:19:50.845375', 'step': 5043, 'epoch': 1}
{'type': 'loss', 'content': 0.1264439970254898, 'timestamp': '2025-10-02 00:19:50.851066', 'step': 5044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:50.914625', 'step': 5044, 'epoch': 1}
{'type': 'loss', 'content': 0.011917829513549805, 'timestamp': '2025-10-02 00:19:50.925665', 'step': 5045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:50.980934', 'step': 5045, 'epoch': 1}
{'type': 'loss', 'content': 0.1771487444639206, 'timestamp': '2025-10-02 00:19:50.983384', 'step': 5046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:51.037281', 'step': 5046, 'epoch': 1}
{'type': 'loss', 'content': 0.13112062215805054, 'timestamp': '2025-10-02 00:19:51.039762', 'step': 5047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:51.099849', 'step': 5047, 'epoch': 1}
{'type': 'loss', 'content': 0.08237878233194351, 'timestamp': '2025-10-02 00:19:51.105783', 'step': 5048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:51.159988', 'step': 5048, 'epoch': 1}
{'type': 'loss', 'content': 0.02734275348484516, 'timestamp': '2025-10-02 00:19:51.169419', 'step': 5049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:51.224000', 'step': 5049, 'epoch': 1}
{'type': 'loss', 'content': 0.044004861265420914, 'timestamp': '2025-10-02 00:19:51.229866', 'step': 5050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:51.284285', 'step': 5050, 'epoch': 1}
{'type': 'loss', 'content': 0.1961451917886734, 'timestamp': '2025-10-02 00:19:51.286868', 'step': 5051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:51.340789', 'step': 5051, 'epoch': 1}
{'type': 'loss', 'content': 0.10038372129201889, 'timestamp': '2025-10-02 00:19:51.346557', 'step': 5052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:19:51.414346', 'step': 5052, 'epoch': 1}
{'type': 'loss', 'content': 0.03857627883553505, 'timestamp': '2025-10-02 00:19:51.427734', 'step': 5053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:51.482231', 'step': 5053, 'epoch': 1}
{'type': 'loss', 'content': 0.13303552567958832, 'timestamp': '2025-10-02 00:19:51.484311', 'step': 5054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:51.545022', 'step': 5054, 'epoch': 1}
{'type': 'loss', 'content': 0.029506299644708633, 'timestamp': '2025-10-02 00:19:51.555515', 'step': 5055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:51.611595', 'step': 5055, 'epoch': 1}
{'type': 'loss', 'content': 0.10430918633937836, 'timestamp': '2025-10-02 00:19:51.618603', 'step': 5056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:51.672421', 'step': 5056, 'epoch': 1}
{'type': 'loss', 'content': 0.14508113265037537, 'timestamp': '2025-10-02 00:19:51.674730', 'step': 5057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:51.729027', 'step': 5057, 'epoch': 1}
{'type': 'loss', 'content': 0.106427863240242, 'timestamp': '2025-10-02 00:19:51.731226', 'step': 5058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:51.785213', 'step': 5058, 'epoch': 1}
{'type': 'loss', 'content': 0.09089956432580948, 'timestamp': '2025-10-02 00:19:51.787270', 'step': 5059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:51.842937', 'step': 5059, 'epoch': 1}
{'type': 'loss', 'content': 0.09533189982175827, 'timestamp': '2025-10-02 00:19:51.848814', 'step': 5060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:51.902653', 'step': 5060, 'epoch': 1}
{'type': 'loss', 'content': 0.08521220088005066, 'timestamp': '2025-10-02 00:19:51.904987', 'step': 5061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:51.959416', 'step': 5061, 'epoch': 1}
{'type': 'loss', 'content': 0.2853853106498718, 'timestamp': '2025-10-02 00:19:51.962096', 'step': 5062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:52.017184', 'step': 5062, 'epoch': 1}
{'type': 'loss', 'content': 0.05250011757016182, 'timestamp': '2025-10-02 00:19:52.023404', 'step': 5063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:52.093961', 'step': 5063, 'epoch': 1}
{'type': 'loss', 'content': 0.14281339943408966, 'timestamp': '2025-10-02 00:19:52.099919', 'step': 5064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:52.160805', 'step': 5064, 'epoch': 1}
{'type': 'loss', 'content': 0.033054694533348083, 'timestamp': '2025-10-02 00:19:52.172143', 'step': 5065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:52.225760', 'step': 5065, 'epoch': 1}
{'type': 'loss', 'content': 0.15125210583209991, 'timestamp': '2025-10-02 00:19:52.228529', 'step': 5066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:52.283015', 'step': 5066, 'epoch': 1}
{'type': 'loss', 'content': 0.059409186244010925, 'timestamp': '2025-10-02 00:19:52.288869', 'step': 5067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:52.348392', 'step': 5067, 'epoch': 1}
{'type': 'loss', 'content': 0.03400826081633568, 'timestamp': '2025-10-02 00:19:52.359365', 'step': 5068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:52.413284', 'step': 5068, 'epoch': 1}
{'type': 'loss', 'content': 0.09155821055173874, 'timestamp': '2025-10-02 00:19:52.415485', 'step': 5069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:52.470248', 'step': 5069, 'epoch': 1}
{'type': 'loss', 'content': 0.21441924571990967, 'timestamp': '2025-10-02 00:19:52.476093', 'step': 5070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:52.531925', 'step': 5070, 'epoch': 1}
{'type': 'loss', 'content': 0.060188744217157364, 'timestamp': '2025-10-02 00:19:52.537836', 'step': 5071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:52.592430', 'step': 5071, 'epoch': 1}
{'type': 'loss', 'content': 0.09587634354829788, 'timestamp': '2025-10-02 00:19:52.598066', 'step': 5072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:52.655114', 'step': 5072, 'epoch': 1}
{'type': 'loss', 'content': 0.12404415011405945, 'timestamp': '2025-10-02 00:19:52.657462', 'step': 5073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:52.712224', 'step': 5073, 'epoch': 1}
{'type': 'loss', 'content': 0.05085480958223343, 'timestamp': '2025-10-02 00:19:52.714935', 'step': 5074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:19:52.783984', 'step': 5074, 'epoch': 1}
{'type': 'loss', 'content': 0.0619894377887249, 'timestamp': '2025-10-02 00:19:52.796277', 'step': 5075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:19:52.850852', 'step': 5075, 'epoch': 1}
{'type': 'loss', 'content': 0.13033126294612885, 'timestamp': '2025-10-02 00:19:52.856490', 'step': 5076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:52.910049', 'step': 5076, 'epoch': 1}
{'type': 'loss', 'content': 0.08000468462705612, 'timestamp': '2025-10-02 00:19:52.920558', 'step': 5077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:52.981214', 'step': 5077, 'epoch': 1}
{'type': 'loss', 'content': 0.05949602276086807, 'timestamp': '2025-10-02 00:19:52.991792', 'step': 5078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:53.046494', 'step': 5078, 'epoch': 1}
{'type': 'loss', 'content': 0.12855064868927002, 'timestamp': '2025-10-02 00:19:53.048970', 'step': 5079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:53.102421', 'step': 5079, 'epoch': 1}
{'type': 'loss', 'content': 0.24210353195667267, 'timestamp': '2025-10-02 00:19:53.108001', 'step': 5080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:53.161001', 'step': 5080, 'epoch': 1}
{'type': 'loss', 'content': 0.1107044443488121, 'timestamp': '2025-10-02 00:19:53.163333', 'step': 5081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:53.228694', 'step': 5081, 'epoch': 1}
{'type': 'loss', 'content': 0.0914812833070755, 'timestamp': '2025-10-02 00:19:53.231000', 'step': 5082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:53.284745', 'step': 5082, 'epoch': 1}
{'type': 'loss', 'content': 0.05461712181568146, 'timestamp': '2025-10-02 00:19:53.287373', 'step': 5083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:53.342421', 'step': 5083, 'epoch': 1}
{'type': 'loss', 'content': 0.26961153745651245, 'timestamp': '2025-10-02 00:19:53.348168', 'step': 5084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:53.401936', 'step': 5084, 'epoch': 1}
{'type': 'loss', 'content': 0.11870525032281876, 'timestamp': '2025-10-02 00:19:53.404470', 'step': 5085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:53.458385', 'step': 5085, 'epoch': 1}
{'type': 'loss', 'content': 0.08313202857971191, 'timestamp': '2025-10-02 00:19:53.464146', 'step': 5086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:53.520110', 'step': 5086, 'epoch': 1}
{'type': 'loss', 'content': 0.20203256607055664, 'timestamp': '2025-10-02 00:19:53.522434', 'step': 5087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:53.577473', 'step': 5087, 'epoch': 1}
{'type': 'loss', 'content': 0.0809478834271431, 'timestamp': '2025-10-02 00:19:53.585552', 'step': 5088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:53.640876', 'step': 5088, 'epoch': 1}
{'type': 'loss', 'content': 0.06106869503855705, 'timestamp': '2025-10-02 00:19:53.643376', 'step': 5089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:53.697994', 'step': 5089, 'epoch': 1}
{'type': 'loss', 'content': 0.1404237598180771, 'timestamp': '2025-10-02 00:19:53.700839', 'step': 5090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:53.755843', 'step': 5090, 'epoch': 1}
{'type': 'loss', 'content': 0.04024538770318031, 'timestamp': '2025-10-02 00:19:53.765037', 'step': 5091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:53.819331', 'step': 5091, 'epoch': 1}
{'type': 'loss', 'content': 0.12454652786254883, 'timestamp': '2025-10-02 00:19:53.826154', 'step': 5092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:53.880056', 'step': 5092, 'epoch': 1}
{'type': 'loss', 'content': 0.07962005585432053, 'timestamp': '2025-10-02 00:19:53.882494', 'step': 5093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:53.937758', 'step': 5093, 'epoch': 1}
{'type': 'loss', 'content': 0.07104197144508362, 'timestamp': '2025-10-02 00:19:53.944746', 'step': 5094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:54.002352', 'step': 5094, 'epoch': 1}
{'type': 'loss', 'content': 0.14018838107585907, 'timestamp': '2025-10-02 00:19:54.004930', 'step': 5095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:54.059337', 'step': 5095, 'epoch': 1}
{'type': 'loss', 'content': 0.1322339028120041, 'timestamp': '2025-10-02 00:19:54.065386', 'step': 5096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:54.119478', 'step': 5096, 'epoch': 1}
{'type': 'loss', 'content': 0.05864395946264267, 'timestamp': '2025-10-02 00:19:54.121887', 'step': 5097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:54.176911', 'step': 5097, 'epoch': 1}
{'type': 'loss', 'content': 0.0923842117190361, 'timestamp': '2025-10-02 00:19:54.178947', 'step': 5098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:54.233504', 'step': 5098, 'epoch': 1}
{'type': 'loss', 'content': 0.17633526027202606, 'timestamp': '2025-10-02 00:19:54.236555', 'step': 5099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:54.291215', 'step': 5099, 'epoch': 1}
{'type': 'loss', 'content': 0.12591378390789032, 'timestamp': '2025-10-02 00:19:54.297416', 'step': 5100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:54.358561', 'step': 5100, 'epoch': 1}
{'type': 'loss', 'content': 0.029268799349665642, 'timestamp': '2025-10-02 00:19:54.369868', 'step': 5101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:54.428595', 'step': 5101, 'epoch': 1}
{'type': 'loss', 'content': 0.23786842823028564, 'timestamp': '2025-10-02 00:19:54.430742', 'step': 5102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:54.484705', 'step': 5102, 'epoch': 1}
{'type': 'loss', 'content': 0.22370785474777222, 'timestamp': '2025-10-02 00:19:54.491381', 'step': 5103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:54.553002', 'step': 5103, 'epoch': 1}
{'type': 'loss', 'content': 0.1577926129102707, 'timestamp': '2025-10-02 00:19:54.563442', 'step': 5104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:54.619306', 'step': 5104, 'epoch': 1}
{'type': 'loss', 'content': 0.1418924182653427, 'timestamp': '2025-10-02 00:19:54.621926', 'step': 5105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:54.684338', 'step': 5105, 'epoch': 1}
{'type': 'loss', 'content': 0.05638539046049118, 'timestamp': '2025-10-02 00:19:54.694816', 'step': 5106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:54.750224', 'step': 5106, 'epoch': 1}
{'type': 'loss', 'content': 0.06691902875900269, 'timestamp': '2025-10-02 00:19:54.752783', 'step': 5107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:19:54.808327', 'step': 5107, 'epoch': 1}
{'type': 'loss', 'content': 0.1101570576429367, 'timestamp': '2025-10-02 00:19:54.814752', 'step': 5108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:19:54.874342', 'step': 5108, 'epoch': 1}
{'type': 'loss', 'content': 0.03524108603596687, 'timestamp': '2025-10-02 00:19:54.885729', 'step': 5109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:54.941594', 'step': 5109, 'epoch': 1}
{'type': 'loss', 'content': 0.020741630345582962, 'timestamp': '2025-10-02 00:19:54.951083', 'step': 5110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:55.005514', 'step': 5110, 'epoch': 1}
{'type': 'loss', 'content': 0.16225838661193848, 'timestamp': '2025-10-02 00:19:55.007855', 'step': 5111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:55.061257', 'step': 5111, 'epoch': 1}
{'type': 'loss', 'content': 0.09513863176107407, 'timestamp': '2025-10-02 00:19:55.067186', 'step': 5112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:55.132391', 'step': 5112, 'epoch': 1}
{'type': 'loss', 'content': 0.06767424196004868, 'timestamp': '2025-10-02 00:19:55.143363', 'step': 5113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:55.201372', 'step': 5113, 'epoch': 1}
{'type': 'loss', 'content': 0.0662807747721672, 'timestamp': '2025-10-02 00:19:55.210948', 'step': 5114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:55.269279', 'step': 5114, 'epoch': 1}
{'type': 'loss', 'content': 0.1319374442100525, 'timestamp': '2025-10-02 00:19:55.272355', 'step': 5115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:55.326048', 'step': 5115, 'epoch': 1}
{'type': 'loss', 'content': 0.11433921009302139, 'timestamp': '2025-10-02 00:19:55.331908', 'step': 5116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:55.395599', 'step': 5116, 'epoch': 1}
{'type': 'loss', 'content': 0.2546369135379791, 'timestamp': '2025-10-02 00:19:55.397935', 'step': 5117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:55.466646', 'step': 5117, 'epoch': 1}
{'type': 'loss', 'content': 0.04381284490227699, 'timestamp': '2025-10-02 00:19:55.476836', 'step': 5118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:19:55.536835', 'step': 5118, 'epoch': 1}
{'type': 'loss', 'content': 0.029879221692681313, 'timestamp': '2025-10-02 00:19:55.547012', 'step': 5119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:55.604827', 'step': 5119, 'epoch': 1}
{'type': 'loss', 'content': 0.054442014545202255, 'timestamp': '2025-10-02 00:19:55.610921', 'step': 5120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:19:55.696148', 'step': 5120, 'epoch': 1}
{'type': 'loss', 'content': 0.03535047173500061, 'timestamp': '2025-10-02 00:19:55.711225', 'step': 5121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:55.766744', 'step': 5121, 'epoch': 1}
{'type': 'loss', 'content': 0.09067823737859726, 'timestamp': '2025-10-02 00:19:55.769027', 'step': 5122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:55.824693', 'step': 5122, 'epoch': 1}
{'type': 'loss', 'content': 0.10419581830501556, 'timestamp': '2025-10-02 00:19:55.827760', 'step': 5123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:55.882097', 'step': 5123, 'epoch': 1}
{'type': 'loss', 'content': 0.08232627063989639, 'timestamp': '2025-10-02 00:19:55.887877', 'step': 5124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:19:55.941240', 'step': 5124, 'epoch': 1}
{'type': 'loss', 'content': 0.07090148329734802, 'timestamp': '2025-10-02 00:19:55.943768', 'step': 5125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:56.000083', 'step': 5125, 'epoch': 1}
{'type': 'loss', 'content': 0.0302900280803442, 'timestamp': '2025-10-02 00:19:56.002404', 'step': 5126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:56.057076', 'step': 5126, 'epoch': 1}
{'type': 'loss', 'content': 0.03794041648507118, 'timestamp': '2025-10-02 00:19:56.062898', 'step': 5127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:56.122029', 'step': 5127, 'epoch': 1}
{'type': 'loss', 'content': 0.13274264335632324, 'timestamp': '2025-10-02 00:19:56.129576', 'step': 5128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:56.186457', 'step': 5128, 'epoch': 1}
{'type': 'loss', 'content': 0.07264622300863266, 'timestamp': '2025-10-02 00:19:56.196065', 'step': 5129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:56.250645', 'step': 5129, 'epoch': 1}
{'type': 'loss', 'content': 0.10941580682992935, 'timestamp': '2025-10-02 00:19:56.253386', 'step': 5130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:56.309738', 'step': 5130, 'epoch': 1}
{'type': 'loss', 'content': 0.11071432381868362, 'timestamp': '2025-10-02 00:19:56.312030', 'step': 5131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:56.366979', 'step': 5131, 'epoch': 1}
{'type': 'loss', 'content': 0.12818457186222076, 'timestamp': '2025-10-02 00:19:56.372861', 'step': 5132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:56.426012', 'step': 5132, 'epoch': 1}
{'type': 'loss', 'content': 0.08351778239011765, 'timestamp': '2025-10-02 00:19:56.435474', 'step': 5133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:19:56.497334', 'step': 5133, 'epoch': 1}
{'type': 'loss', 'content': 0.048978861421346664, 'timestamp': '2025-10-02 00:19:56.507988', 'step': 5134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:56.561824', 'step': 5134, 'epoch': 1}
{'type': 'loss', 'content': 0.19042181968688965, 'timestamp': '2025-10-02 00:19:56.566402', 'step': 5135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:56.626017', 'step': 5135, 'epoch': 1}
{'type': 'loss', 'content': 0.05494708567857742, 'timestamp': '2025-10-02 00:19:56.636090', 'step': 5136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:56.690842', 'step': 5136, 'epoch': 1}
{'type': 'loss', 'content': 0.10941555351018906, 'timestamp': '2025-10-02 00:19:56.694950', 'step': 5137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:19:56.753441', 'step': 5137, 'epoch': 1}
{'type': 'loss', 'content': 0.15230663120746613, 'timestamp': '2025-10-02 00:19:56.755662', 'step': 5138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:56.812218', 'step': 5138, 'epoch': 1}
{'type': 'loss', 'content': 0.06718647480010986, 'timestamp': '2025-10-02 00:19:56.827924', 'step': 5139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:56.887814', 'step': 5139, 'epoch': 1}
{'type': 'loss', 'content': 0.1116516962647438, 'timestamp': '2025-10-02 00:19:56.894356', 'step': 5140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:56.948037', 'step': 5140, 'epoch': 1}
{'type': 'loss', 'content': 0.0507318377494812, 'timestamp': '2025-10-02 00:19:56.950406', 'step': 5141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:19:57.012490', 'step': 5141, 'epoch': 1}
{'type': 'loss', 'content': 0.11823811382055283, 'timestamp': '2025-10-02 00:19:57.023313', 'step': 5142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:57.078176', 'step': 5142, 'epoch': 1}
{'type': 'loss', 'content': 0.10742995142936707, 'timestamp': '2025-10-02 00:19:57.082621', 'step': 5143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:57.138387', 'step': 5143, 'epoch': 1}
{'type': 'loss', 'content': 0.12991389632225037, 'timestamp': '2025-10-02 00:19:57.144343', 'step': 5144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:57.197330', 'step': 5144, 'epoch': 1}
{'type': 'loss', 'content': 0.14968302845954895, 'timestamp': '2025-10-02 00:19:57.199954', 'step': 5145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:19:57.254657', 'step': 5145, 'epoch': 1}
{'type': 'loss', 'content': 0.10735323280096054, 'timestamp': '2025-10-02 00:19:57.264032', 'step': 5146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:19:57.326060', 'step': 5146, 'epoch': 1}
{'type': 'loss', 'content': 0.12224284559488297, 'timestamp': '2025-10-02 00:19:57.328246', 'step': 5147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:57.382719', 'step': 5147, 'epoch': 1}
{'type': 'loss', 'content': 0.024357939139008522, 'timestamp': '2025-10-02 00:19:57.389763', 'step': 5148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:57.450363', 'step': 5148, 'epoch': 1}
{'type': 'loss', 'content': 0.05644376203417778, 'timestamp': '2025-10-02 00:19:57.457944', 'step': 5149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:57.518877', 'step': 5149, 'epoch': 1}
{'type': 'loss', 'content': 0.07362031936645508, 'timestamp': '2025-10-02 00:19:57.525098', 'step': 5150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:57.581394', 'step': 5150, 'epoch': 1}
{'type': 'loss', 'content': 0.061365850269794464, 'timestamp': '2025-10-02 00:19:57.584044', 'step': 5151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:57.638759', 'step': 5151, 'epoch': 1}
{'type': 'loss', 'content': 0.1524456888437271, 'timestamp': '2025-10-02 00:19:57.644484', 'step': 5152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:57.699457', 'step': 5152, 'epoch': 1}
{'type': 'loss', 'content': 0.07080341875553131, 'timestamp': '2025-10-02 00:19:57.702348', 'step': 5153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:19:57.765976', 'step': 5153, 'epoch': 1}
{'type': 'loss', 'content': 0.06729938834905624, 'timestamp': '2025-10-02 00:19:57.776658', 'step': 5154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:57.833423', 'step': 5154, 'epoch': 1}
{'type': 'loss', 'content': 0.06713563948869705, 'timestamp': '2025-10-02 00:19:57.838607', 'step': 5155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:57.895450', 'step': 5155, 'epoch': 1}
{'type': 'loss', 'content': 0.07193511724472046, 'timestamp': '2025-10-02 00:19:57.901903', 'step': 5156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:57.957381', 'step': 5156, 'epoch': 1}
{'type': 'loss', 'content': 0.057101570069789886, 'timestamp': '2025-10-02 00:19:57.964617', 'step': 5157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:58.022404', 'step': 5157, 'epoch': 1}
{'type': 'loss', 'content': 0.02879444882273674, 'timestamp': '2025-10-02 00:19:58.025738', 'step': 5158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:19:58.080953', 'step': 5158, 'epoch': 1}
{'type': 'loss', 'content': 0.05501102656126022, 'timestamp': '2025-10-02 00:19:58.083981', 'step': 5159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:19:58.139545', 'step': 5159, 'epoch': 1}
{'type': 'loss', 'content': 0.07264041155576706, 'timestamp': '2025-10-02 00:19:58.145525', 'step': 5160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:58.201131', 'step': 5160, 'epoch': 1}
{'type': 'loss', 'content': 0.04758196324110031, 'timestamp': '2025-10-02 00:19:58.208563', 'step': 5161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:19:58.267904', 'step': 5161, 'epoch': 1}
{'type': 'loss', 'content': 0.17002031207084656, 'timestamp': '2025-10-02 00:19:58.270341', 'step': 5162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:58.329711', 'step': 5162, 'epoch': 1}
{'type': 'loss', 'content': 0.05085085332393646, 'timestamp': '2025-10-02 00:19:58.336974', 'step': 5163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:58.393616', 'step': 5163, 'epoch': 1}
{'type': 'loss', 'content': 0.06692488491535187, 'timestamp': '2025-10-02 00:19:58.400193', 'step': 5164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:58.456169', 'step': 5164, 'epoch': 1}
{'type': 'loss', 'content': 0.10375393182039261, 'timestamp': '2025-10-02 00:19:58.458401', 'step': 5165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:19:58.523507', 'step': 5165, 'epoch': 1}
{'type': 'loss', 'content': 0.08418133854866028, 'timestamp': '2025-10-02 00:19:58.534140', 'step': 5166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:58.589662', 'step': 5166, 'epoch': 1}
{'type': 'loss', 'content': 0.0440790168941021, 'timestamp': '2025-10-02 00:19:58.593018', 'step': 5167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:58.648749', 'step': 5167, 'epoch': 1}
{'type': 'loss', 'content': 0.04792982339859009, 'timestamp': '2025-10-02 00:19:58.655144', 'step': 5168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:19:58.709224', 'step': 5168, 'epoch': 1}
{'type': 'loss', 'content': 0.07662468403577805, 'timestamp': '2025-10-02 00:19:58.719377', 'step': 5169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:58.776794', 'step': 5169, 'epoch': 1}
{'type': 'loss', 'content': 0.0815892368555069, 'timestamp': '2025-10-02 00:19:58.779688', 'step': 5170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:58.836182', 'step': 5170, 'epoch': 1}
{'type': 'loss', 'content': 0.02551460638642311, 'timestamp': '2025-10-02 00:19:58.843558', 'step': 5171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:58.899804', 'step': 5171, 'epoch': 1}
{'type': 'loss', 'content': 0.024508729577064514, 'timestamp': '2025-10-02 00:19:58.907708', 'step': 5172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:58.962133', 'step': 5172, 'epoch': 1}
{'type': 'loss', 'content': 0.15070386230945587, 'timestamp': '2025-10-02 00:19:58.964572', 'step': 5173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:59.031847', 'step': 5173, 'epoch': 1}
{'type': 'loss', 'content': 0.14409805834293365, 'timestamp': '2025-10-02 00:19:59.038389', 'step': 5174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:59.095136', 'step': 5174, 'epoch': 1}
{'type': 'loss', 'content': 0.017948132008314133, 'timestamp': '2025-10-02 00:19:59.097587', 'step': 5175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:59.153893', 'step': 5175, 'epoch': 1}
{'type': 'loss', 'content': 0.03078990802168846, 'timestamp': '2025-10-02 00:19:59.161887', 'step': 5176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:59.220356', 'step': 5176, 'epoch': 1}
{'type': 'loss', 'content': 0.09355796873569489, 'timestamp': '2025-10-02 00:19:59.222710', 'step': 5177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:59.278371', 'step': 5177, 'epoch': 1}
{'type': 'loss', 'content': 0.104466512799263, 'timestamp': '2025-10-02 00:19:59.280780', 'step': 5178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:59.335554', 'step': 5178, 'epoch': 1}
{'type': 'loss', 'content': 0.07556060701608658, 'timestamp': '2025-10-02 00:19:59.338252', 'step': 5179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:59.393417', 'step': 5179, 'epoch': 1}
{'type': 'loss', 'content': 0.09770306944847107, 'timestamp': '2025-10-02 00:19:59.399176', 'step': 5180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:59.453332', 'step': 5180, 'epoch': 1}
{'type': 'loss', 'content': 0.06111367791891098, 'timestamp': '2025-10-02 00:19:59.455674', 'step': 5181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:19:59.510677', 'step': 5181, 'epoch': 1}
{'type': 'loss', 'content': 0.1506628692150116, 'timestamp': '2025-10-02 00:19:59.513412', 'step': 5182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:19:59.567696', 'step': 5182, 'epoch': 1}
{'type': 'loss', 'content': 0.11777233332395554, 'timestamp': '2025-10-02 00:19:59.570558', 'step': 5183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:19:59.628408', 'step': 5183, 'epoch': 1}
{'type': 'loss', 'content': 0.2203710824251175, 'timestamp': '2025-10-02 00:19:59.634414', 'step': 5184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:19:59.688788', 'step': 5184, 'epoch': 1}
{'type': 'loss', 'content': 0.03574584424495697, 'timestamp': '2025-10-02 00:19:59.695973', 'step': 5185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:19:59.755265', 'step': 5185, 'epoch': 1}
{'type': 'loss', 'content': 0.06617124378681183, 'timestamp': '2025-10-02 00:19:59.759290', 'step': 5186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:19:59.814392', 'step': 5186, 'epoch': 1}
{'type': 'loss', 'content': 0.1385643035173416, 'timestamp': '2025-10-02 00:19:59.816782', 'step': 5187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:19:59.870903', 'step': 5187, 'epoch': 1}
{'type': 'loss', 'content': 0.09154829382896423, 'timestamp': '2025-10-02 00:19:59.877059', 'step': 5188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:19:59.931558', 'step': 5188, 'epoch': 1}
{'type': 'loss', 'content': 0.0610821396112442, 'timestamp': '2025-10-02 00:19:59.937044', 'step': 5189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:19:59.991750', 'step': 5189, 'epoch': 1}
{'type': 'loss', 'content': 0.0837906152009964, 'timestamp': '2025-10-02 00:19:59.994365', 'step': 5190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:00.048869', 'step': 5190, 'epoch': 1}
{'type': 'loss', 'content': 0.17932350933551788, 'timestamp': '2025-10-02 00:20:00.051303', 'step': 5191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:00.106392', 'step': 5191, 'epoch': 1}
{'type': 'loss', 'content': 0.10702003538608551, 'timestamp': '2025-10-02 00:20:00.114182', 'step': 5192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:00.168225', 'step': 5192, 'epoch': 1}
{'type': 'loss', 'content': 0.17949451506137848, 'timestamp': '2025-10-02 00:20:00.170632', 'step': 5193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:00.225064', 'step': 5193, 'epoch': 1}
{'type': 'loss', 'content': 0.10301913321018219, 'timestamp': '2025-10-02 00:20:00.232408', 'step': 5194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:00.287618', 'step': 5194, 'epoch': 1}
{'type': 'loss', 'content': 0.08274790644645691, 'timestamp': '2025-10-02 00:20:00.296571', 'step': 5195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:00.351019', 'step': 5195, 'epoch': 1}
{'type': 'loss', 'content': 0.07629086822271347, 'timestamp': '2025-10-02 00:20:00.360948', 'step': 5196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:00.415114', 'step': 5196, 'epoch': 1}
{'type': 'loss', 'content': 0.10441039502620697, 'timestamp': '2025-10-02 00:20:00.417386', 'step': 5197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:00.472425', 'step': 5197, 'epoch': 1}
{'type': 'loss', 'content': 0.02752227522432804, 'timestamp': '2025-10-02 00:20:00.474814', 'step': 5198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:00.529612', 'step': 5198, 'epoch': 1}
{'type': 'loss', 'content': 0.08607187122106552, 'timestamp': '2025-10-02 00:20:00.531821', 'step': 5199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:00.587129', 'step': 5199, 'epoch': 1}
{'type': 'loss', 'content': 0.09639164805412292, 'timestamp': '2025-10-02 00:20:00.597190', 'step': 5200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:00.652924', 'step': 5200, 'epoch': 1}
{'type': 'loss', 'content': 0.05177454277873039, 'timestamp': '2025-10-02 00:20:00.656438', 'step': 5201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:00.711066', 'step': 5201, 'epoch': 1}
{'type': 'loss', 'content': 0.04608755186200142, 'timestamp': '2025-10-02 00:20:00.714185', 'step': 5202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:00.768323', 'step': 5202, 'epoch': 1}
{'type': 'loss', 'content': 0.04877755790948868, 'timestamp': '2025-10-02 00:20:00.773954', 'step': 5203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:00.833751', 'step': 5203, 'epoch': 1}
{'type': 'loss', 'content': 0.1155804693698883, 'timestamp': '2025-10-02 00:20:00.841805', 'step': 5204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:00.898593', 'step': 5204, 'epoch': 1}
{'type': 'loss', 'content': 0.031984832137823105, 'timestamp': '2025-10-02 00:20:00.901978', 'step': 5205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:20:00.972211', 'step': 5205, 'epoch': 1}
{'type': 'loss', 'content': 0.014320681802928448, 'timestamp': '2025-10-02 00:20:00.984524', 'step': 5206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:01.041356', 'step': 5206, 'epoch': 1}
{'type': 'loss', 'content': 0.10066511482000351, 'timestamp': '2025-10-02 00:20:01.043450', 'step': 5207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:01.105186', 'step': 5207, 'epoch': 1}
{'type': 'loss', 'content': 0.03848794475197792, 'timestamp': '2025-10-02 00:20:01.116438', 'step': 5208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:01.176365', 'step': 5208, 'epoch': 1}
{'type': 'loss', 'content': 0.020631251856684685, 'timestamp': '2025-10-02 00:20:01.187725', 'step': 5209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:01.243605', 'step': 5209, 'epoch': 1}
{'type': 'loss', 'content': 0.09373126924037933, 'timestamp': '2025-10-02 00:20:01.245774', 'step': 5210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:01.300629', 'step': 5210, 'epoch': 1}
{'type': 'loss', 'content': 0.16329067945480347, 'timestamp': '2025-10-02 00:20:01.303211', 'step': 5211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:20:01.372862', 'step': 5211, 'epoch': 1}
{'type': 'loss', 'content': 0.01636788621544838, 'timestamp': '2025-10-02 00:20:01.385976', 'step': 5212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:01.439882', 'step': 5212, 'epoch': 1}
{'type': 'loss', 'content': 0.16546353697776794, 'timestamp': '2025-10-02 00:20:01.442204', 'step': 5213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:01.498249', 'step': 5213, 'epoch': 1}
{'type': 'loss', 'content': 0.08786103874444962, 'timestamp': '2025-10-02 00:20:01.501272', 'step': 5214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:01.556697', 'step': 5214, 'epoch': 1}
{'type': 'loss', 'content': 0.1415078490972519, 'timestamp': '2025-10-02 00:20:01.560060', 'step': 5215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:01.614966', 'step': 5215, 'epoch': 1}
{'type': 'loss', 'content': 0.21733920276165009, 'timestamp': '2025-10-02 00:20:01.621031', 'step': 5216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:01.675632', 'step': 5216, 'epoch': 1}
{'type': 'loss', 'content': 0.08297395706176758, 'timestamp': '2025-10-02 00:20:01.685873', 'step': 5217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:01.739760', 'step': 5217, 'epoch': 1}
{'type': 'loss', 'content': 0.18647706508636475, 'timestamp': '2025-10-02 00:20:01.742112', 'step': 5218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:01.796540', 'step': 5218, 'epoch': 1}
{'type': 'loss', 'content': 0.04968118295073509, 'timestamp': '2025-10-02 00:20:01.805894', 'step': 5219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:01.861592', 'step': 5219, 'epoch': 1}
{'type': 'loss', 'content': 0.04634610936045647, 'timestamp': '2025-10-02 00:20:01.867901', 'step': 5220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:01.922878', 'step': 5220, 'epoch': 1}
{'type': 'loss', 'content': 0.06125098466873169, 'timestamp': '2025-10-02 00:20:01.928622', 'step': 5221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:01.987479', 'step': 5221, 'epoch': 1}
{'type': 'loss', 'content': 0.09213818609714508, 'timestamp': '2025-10-02 00:20:01.997687', 'step': 5222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:02.052000', 'step': 5222, 'epoch': 1}
{'type': 'loss', 'content': 0.08799976110458374, 'timestamp': '2025-10-02 00:20:02.054575', 'step': 5223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:02.111065', 'step': 5223, 'epoch': 1}
{'type': 'loss', 'content': 0.06205214560031891, 'timestamp': '2025-10-02 00:20:02.117340', 'step': 5224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:02.171372', 'step': 5224, 'epoch': 1}
{'type': 'loss', 'content': 0.05644017457962036, 'timestamp': '2025-10-02 00:20:02.173651', 'step': 5225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:02.227868', 'step': 5225, 'epoch': 1}
{'type': 'loss', 'content': 0.10397026687860489, 'timestamp': '2025-10-02 00:20:02.230288', 'step': 5226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:02.284230', 'step': 5226, 'epoch': 1}
{'type': 'loss', 'content': 0.09927790611982346, 'timestamp': '2025-10-02 00:20:02.287282', 'step': 5227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:02.341184', 'step': 5227, 'epoch': 1}
{'type': 'loss', 'content': 0.1106419712305069, 'timestamp': '2025-10-02 00:20:02.347177', 'step': 5228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:02.401582', 'step': 5228, 'epoch': 1}
{'type': 'loss', 'content': 0.14944295585155487, 'timestamp': '2025-10-02 00:20:02.409049', 'step': 5229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:02.463789', 'step': 5229, 'epoch': 1}
{'type': 'loss', 'content': 0.14322152733802795, 'timestamp': '2025-10-02 00:20:02.466322', 'step': 5230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:02.521244', 'step': 5230, 'epoch': 1}
{'type': 'loss', 'content': 0.05152241140604019, 'timestamp': '2025-10-02 00:20:02.523635', 'step': 5231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:02.585298', 'step': 5231, 'epoch': 1}
{'type': 'loss', 'content': 0.029254231601953506, 'timestamp': '2025-10-02 00:20:02.596505', 'step': 5232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:02.650496', 'step': 5232, 'epoch': 1}
{'type': 'loss', 'content': 0.044360026717185974, 'timestamp': '2025-10-02 00:20:02.659869', 'step': 5233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:02.714652', 'step': 5233, 'epoch': 1}
{'type': 'loss', 'content': 0.04368315264582634, 'timestamp': '2025-10-02 00:20:02.720641', 'step': 5234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:02.774024', 'step': 5234, 'epoch': 1}
{'type': 'loss', 'content': 0.23271670937538147, 'timestamp': '2025-10-02 00:20:02.777241', 'step': 5235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:02.832655', 'step': 5235, 'epoch': 1}
{'type': 'loss', 'content': 0.1852133721113205, 'timestamp': '2025-10-02 00:20:02.838773', 'step': 5236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:02.894875', 'step': 5236, 'epoch': 1}
{'type': 'loss', 'content': 0.05552640184760094, 'timestamp': '2025-10-02 00:20:02.900748', 'step': 5237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:02.959709', 'step': 5237, 'epoch': 1}
{'type': 'loss', 'content': 0.026039183139801025, 'timestamp': '2025-10-02 00:20:02.969889', 'step': 5238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:03.024765', 'step': 5238, 'epoch': 1}
{'type': 'loss', 'content': 0.07177924364805222, 'timestamp': '2025-10-02 00:20:03.030475', 'step': 5239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:03.089492', 'step': 5239, 'epoch': 1}
{'type': 'loss', 'content': 0.1873258650302887, 'timestamp': '2025-10-02 00:20:03.095562', 'step': 5240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:03.149135', 'step': 5240, 'epoch': 1}
{'type': 'loss', 'content': 0.11397018283605576, 'timestamp': '2025-10-02 00:20:03.151617', 'step': 5241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:03.206205', 'step': 5241, 'epoch': 1}
{'type': 'loss', 'content': 0.28247272968292236, 'timestamp': '2025-10-02 00:20:03.208586', 'step': 5242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:03.264024', 'step': 5242, 'epoch': 1}
{'type': 'loss', 'content': 0.07000342756509781, 'timestamp': '2025-10-02 00:20:03.273587', 'step': 5243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:03.329307', 'step': 5243, 'epoch': 1}
{'type': 'loss', 'content': 0.18373939394950867, 'timestamp': '2025-10-02 00:20:03.335417', 'step': 5244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:03.389393', 'step': 5244, 'epoch': 1}
{'type': 'loss', 'content': 0.06120762601494789, 'timestamp': '2025-10-02 00:20:03.392267', 'step': 5245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:03.453086', 'step': 5245, 'epoch': 1}
{'type': 'loss', 'content': 0.014729694463312626, 'timestamp': '2025-10-02 00:20:03.463294', 'step': 5246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:03.522229', 'step': 5246, 'epoch': 1}
{'type': 'loss', 'content': 0.05595939978957176, 'timestamp': '2025-10-02 00:20:03.532392', 'step': 5247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:03.587983', 'step': 5247, 'epoch': 1}
{'type': 'loss', 'content': 0.020811766386032104, 'timestamp': '2025-10-02 00:20:03.598108', 'step': 5248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:03.651777', 'step': 5248, 'epoch': 1}
{'type': 'loss', 'content': 0.15319225192070007, 'timestamp': '2025-10-02 00:20:03.653850', 'step': 5249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:03.708474', 'step': 5249, 'epoch': 1}
{'type': 'loss', 'content': 0.12359736114740372, 'timestamp': '2025-10-02 00:20:03.714524', 'step': 5250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:20:03.768385', 'step': 5250, 'epoch': 1}
{'type': 'loss', 'content': 0.1185474842786789, 'timestamp': '2025-10-02 00:20:03.771964', 'step': 5251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:03.826489', 'step': 5251, 'epoch': 1}
{'type': 'loss', 'content': 0.07351358979940414, 'timestamp': '2025-10-02 00:20:03.832998', 'step': 5252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:03.886500', 'step': 5252, 'epoch': 1}
{'type': 'loss', 'content': 0.06481403857469559, 'timestamp': '2025-10-02 00:20:03.888677', 'step': 5253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:03.943634', 'step': 5253, 'epoch': 1}
{'type': 'loss', 'content': 0.10879795253276825, 'timestamp': '2025-10-02 00:20:03.945894', 'step': 5254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:04.000421', 'step': 5254, 'epoch': 1}
{'type': 'loss', 'content': 0.035136207938194275, 'timestamp': '2025-10-02 00:20:04.006255', 'step': 5255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:04.060441', 'step': 5255, 'epoch': 1}
{'type': 'loss', 'content': 0.14510609209537506, 'timestamp': '2025-10-02 00:20:04.066212', 'step': 5256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:04.119468', 'step': 5256, 'epoch': 1}
{'type': 'loss', 'content': 0.23000222444534302, 'timestamp': '2025-10-02 00:20:04.121795', 'step': 5257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:04.176114', 'step': 5257, 'epoch': 1}
{'type': 'loss', 'content': 0.08546113222837448, 'timestamp': '2025-10-02 00:20:04.178914', 'step': 5258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:04.233117', 'step': 5258, 'epoch': 1}
{'type': 'loss', 'content': 0.13374999165534973, 'timestamp': '2025-10-02 00:20:04.236566', 'step': 5259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:04.292152', 'step': 5259, 'epoch': 1}
{'type': 'loss', 'content': 0.06614815443754196, 'timestamp': '2025-10-02 00:20:04.297870', 'step': 5260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:04.358731', 'step': 5260, 'epoch': 1}
{'type': 'loss', 'content': 0.020813265815377235, 'timestamp': '2025-10-02 00:20:04.370051', 'step': 5261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:04.424619', 'step': 5261, 'epoch': 1}
{'type': 'loss', 'content': 0.142052561044693, 'timestamp': '2025-10-02 00:20:04.426943', 'step': 5262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:04.483788', 'step': 5262, 'epoch': 1}
{'type': 'loss', 'content': 0.14761701226234436, 'timestamp': '2025-10-02 00:20:04.485814', 'step': 5263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:04.540778', 'step': 5263, 'epoch': 1}
{'type': 'loss', 'content': 0.1184988021850586, 'timestamp': '2025-10-02 00:20:04.547378', 'step': 5264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:20:04.619312', 'step': 5264, 'epoch': 1}
{'type': 'loss', 'content': 0.02628502808511257, 'timestamp': '2025-10-02 00:20:04.633687', 'step': 5265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:04.687796', 'step': 5265, 'epoch': 1}
{'type': 'loss', 'content': 0.10403256118297577, 'timestamp': '2025-10-02 00:20:04.695256', 'step': 5266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:04.754187', 'step': 5266, 'epoch': 1}
{'type': 'loss', 'content': 0.11254087835550308, 'timestamp': '2025-10-02 00:20:04.764347', 'step': 5267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:04.820309', 'step': 5267, 'epoch': 1}
{'type': 'loss', 'content': 0.14878885447978973, 'timestamp': '2025-10-02 00:20:04.830614', 'step': 5268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:04.884578', 'step': 5268, 'epoch': 1}
{'type': 'loss', 'content': 0.07750153541564941, 'timestamp': '2025-10-02 00:20:04.886816', 'step': 5269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:04.941341', 'step': 5269, 'epoch': 1}
{'type': 'loss', 'content': 0.1027979776263237, 'timestamp': '2025-10-02 00:20:04.943494', 'step': 5270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:04.998586', 'step': 5270, 'epoch': 1}
{'type': 'loss', 'content': 0.08198726177215576, 'timestamp': '2025-10-02 00:20:05.000864', 'step': 5271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:05.055175', 'step': 5271, 'epoch': 1}
{'type': 'loss', 'content': 0.10738352686166763, 'timestamp': '2025-10-02 00:20:05.061082', 'step': 5272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:05.115037', 'step': 5272, 'epoch': 1}
{'type': 'loss', 'content': 0.07684056460857391, 'timestamp': '2025-10-02 00:20:05.117531', 'step': 5273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:05.172926', 'step': 5273, 'epoch': 1}
{'type': 'loss', 'content': 0.024383891373872757, 'timestamp': '2025-10-02 00:20:05.180435', 'step': 5274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:05.239187', 'step': 5274, 'epoch': 1}
{'type': 'loss', 'content': 0.16975177824497223, 'timestamp': '2025-10-02 00:20:05.241467', 'step': 5275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:05.296020', 'step': 5275, 'epoch': 1}
{'type': 'loss', 'content': 0.07374398410320282, 'timestamp': '2025-10-02 00:20:05.302489', 'step': 5276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:05.355488', 'step': 5276, 'epoch': 1}
{'type': 'loss', 'content': 0.08236382901668549, 'timestamp': '2025-10-02 00:20:05.357702', 'step': 5277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:05.412469', 'step': 5277, 'epoch': 1}
{'type': 'loss', 'content': 0.10295984148979187, 'timestamp': '2025-10-02 00:20:05.414650', 'step': 5278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:05.468400', 'step': 5278, 'epoch': 1}
{'type': 'loss', 'content': 0.145926833152771, 'timestamp': '2025-10-02 00:20:05.471325', 'step': 5279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:05.525134', 'step': 5279, 'epoch': 1}
{'type': 'loss', 'content': 0.13855597376823425, 'timestamp': '2025-10-02 00:20:05.530903', 'step': 5280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:05.586172', 'step': 5280, 'epoch': 1}
{'type': 'loss', 'content': 0.055117879062891006, 'timestamp': '2025-10-02 00:20:05.588761', 'step': 5281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:05.642511', 'step': 5281, 'epoch': 1}
{'type': 'loss', 'content': 0.0455838143825531, 'timestamp': '2025-10-02 00:20:05.649994', 'step': 5282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:05.704288', 'step': 5282, 'epoch': 1}
{'type': 'loss', 'content': 0.043212585151195526, 'timestamp': '2025-10-02 00:20:05.710107', 'step': 5283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:05.765378', 'step': 5283, 'epoch': 1}
{'type': 'loss', 'content': 0.10980581492185593, 'timestamp': '2025-10-02 00:20:05.773555', 'step': 5284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:05.827625', 'step': 5284, 'epoch': 1}
{'type': 'loss', 'content': 0.055188920348882675, 'timestamp': '2025-10-02 00:20:05.829662', 'step': 5285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:05.883138', 'step': 5285, 'epoch': 1}
{'type': 'loss', 'content': 0.17157793045043945, 'timestamp': '2025-10-02 00:20:05.885353', 'step': 5286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:05.940365', 'step': 5286, 'epoch': 1}
{'type': 'loss', 'content': 0.11466987431049347, 'timestamp': '2025-10-02 00:20:05.946269', 'step': 5287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:06.000737', 'step': 5287, 'epoch': 1}
{'type': 'loss', 'content': 0.18165728449821472, 'timestamp': '2025-10-02 00:20:06.006871', 'step': 5288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:06.061304', 'step': 5288, 'epoch': 1}
{'type': 'loss', 'content': 0.08206615597009659, 'timestamp': '2025-10-02 00:20:06.063564', 'step': 5289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:06.117770', 'step': 5289, 'epoch': 1}
{'type': 'loss', 'content': 0.22707496583461761, 'timestamp': '2025-10-02 00:20:06.120109', 'step': 5290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:06.176002', 'step': 5290, 'epoch': 1}
{'type': 'loss', 'content': 0.21305198967456818, 'timestamp': '2025-10-02 00:20:06.185538', 'step': 5291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:06.243082', 'step': 5291, 'epoch': 1}
{'type': 'loss', 'content': 0.055921752005815506, 'timestamp': '2025-10-02 00:20:06.251174', 'step': 5292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:06.304982', 'step': 5292, 'epoch': 1}
{'type': 'loss', 'content': 0.08632083237171173, 'timestamp': '2025-10-02 00:20:06.310930', 'step': 5293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:06.366144', 'step': 5293, 'epoch': 1}
{'type': 'loss', 'content': 0.08552522212266922, 'timestamp': '2025-10-02 00:20:06.373713', 'step': 5294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:06.432445', 'step': 5294, 'epoch': 1}
{'type': 'loss', 'content': 0.056172873824834824, 'timestamp': '2025-10-02 00:20:06.442638', 'step': 5295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:06.496603', 'step': 5295, 'epoch': 1}
{'type': 'loss', 'content': 0.14561374485492706, 'timestamp': '2025-10-02 00:20:06.502510', 'step': 5296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:06.555609', 'step': 5296, 'epoch': 1}
{'type': 'loss', 'content': 0.10039865970611572, 'timestamp': '2025-10-02 00:20:06.557766', 'step': 5297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:06.612301', 'step': 5297, 'epoch': 1}
{'type': 'loss', 'content': 0.06266655027866364, 'timestamp': '2025-10-02 00:20:06.619748', 'step': 5298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:06.678881', 'step': 5298, 'epoch': 1}
{'type': 'loss', 'content': 0.015543225221335888, 'timestamp': '2025-10-02 00:20:06.689043', 'step': 5299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:06.743498', 'step': 5299, 'epoch': 1}
{'type': 'loss', 'content': 0.09435493499040604, 'timestamp': '2025-10-02 00:20:06.749315', 'step': 5300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:20:06.817257', 'step': 5300, 'epoch': 1}
{'type': 'loss', 'content': 0.04067005589604378, 'timestamp': '2025-10-02 00:20:06.830811', 'step': 5301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:06.885494', 'step': 5301, 'epoch': 1}
{'type': 'loss', 'content': 0.06282515078783035, 'timestamp': '2025-10-02 00:20:06.888696', 'step': 5302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:06.943180', 'step': 5302, 'epoch': 1}
{'type': 'loss', 'content': 0.12484244257211685, 'timestamp': '2025-10-02 00:20:06.945924', 'step': 5303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:06.999870', 'step': 5303, 'epoch': 1}
{'type': 'loss', 'content': 0.06888826936483383, 'timestamp': '2025-10-02 00:20:07.006307', 'step': 5304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:07.059960', 'step': 5304, 'epoch': 1}
{'type': 'loss', 'content': 0.05593889579176903, 'timestamp': '2025-10-02 00:20:07.062612', 'step': 5305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:20:07.116153', 'step': 5305, 'epoch': 1}
{'type': 'loss', 'content': 0.11871803551912308, 'timestamp': '2025-10-02 00:20:07.118512', 'step': 5306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:07.172447', 'step': 5306, 'epoch': 1}
{'type': 'loss', 'content': 0.07447054982185364, 'timestamp': '2025-10-02 00:20:07.175322', 'step': 5307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:07.230249', 'step': 5307, 'epoch': 1}
{'type': 'loss', 'content': 0.1108241081237793, 'timestamp': '2025-10-02 00:20:07.236557', 'step': 5308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:20:07.289773', 'step': 5308, 'epoch': 1}
{'type': 'loss', 'content': 0.11482150107622147, 'timestamp': '2025-10-02 00:20:07.294903', 'step': 5309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:07.349016', 'step': 5309, 'epoch': 1}
{'type': 'loss', 'content': 0.04464062303304672, 'timestamp': '2025-10-02 00:20:07.351467', 'step': 5310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:07.405390', 'step': 5310, 'epoch': 1}
{'type': 'loss', 'content': 0.14029762148857117, 'timestamp': '2025-10-02 00:20:07.407779', 'step': 5311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:07.463492', 'step': 5311, 'epoch': 1}
{'type': 'loss', 'content': 0.12788164615631104, 'timestamp': '2025-10-02 00:20:07.473791', 'step': 5312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:07.531293', 'step': 5312, 'epoch': 1}
{'type': 'loss', 'content': 0.02684684284031391, 'timestamp': '2025-10-02 00:20:07.541546', 'step': 5313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:07.600136', 'step': 5313, 'epoch': 1}
{'type': 'loss', 'content': 0.03878013417124748, 'timestamp': '2025-10-02 00:20:07.603709', 'step': 5314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:07.660147', 'step': 5314, 'epoch': 1}
{'type': 'loss', 'content': 0.12225518375635147, 'timestamp': '2025-10-02 00:20:07.663271', 'step': 5315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:20:07.726310', 'step': 5315, 'epoch': 1}
{'type': 'loss', 'content': 0.03760041296482086, 'timestamp': '2025-10-02 00:20:07.737721', 'step': 5316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:07.796510', 'step': 5316, 'epoch': 1}
{'type': 'loss', 'content': 0.18556271493434906, 'timestamp': '2025-10-02 00:20:07.798959', 'step': 5317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:07.854607', 'step': 5317, 'epoch': 1}
{'type': 'loss', 'content': 0.036685843020677567, 'timestamp': '2025-10-02 00:20:07.862068', 'step': 5318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:07.919631', 'step': 5318, 'epoch': 1}
{'type': 'loss', 'content': 0.040867459028959274, 'timestamp': '2025-10-02 00:20:07.925356', 'step': 5319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:07.981139', 'step': 5319, 'epoch': 1}
{'type': 'loss', 'content': 0.11990353465080261, 'timestamp': '2025-10-02 00:20:07.988660', 'step': 5320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:08.047641', 'step': 5320, 'epoch': 1}
{'type': 'loss', 'content': 0.04923734813928604, 'timestamp': '2025-10-02 00:20:08.058634', 'step': 5321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:08.114272', 'step': 5321, 'epoch': 1}
{'type': 'loss', 'content': 0.11436678469181061, 'timestamp': '2025-10-02 00:20:08.116938', 'step': 5322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:08.172261', 'step': 5322, 'epoch': 1}
{'type': 'loss', 'content': 0.056383512914180756, 'timestamp': '2025-10-02 00:20:08.174532', 'step': 5323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:08.231067', 'step': 5323, 'epoch': 1}
{'type': 'loss', 'content': 0.1833087056875229, 'timestamp': '2025-10-02 00:20:08.237498', 'step': 5324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:08.292036', 'step': 5324, 'epoch': 1}
{'type': 'loss', 'content': 0.10075205564498901, 'timestamp': '2025-10-02 00:20:08.295261', 'step': 5325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:08.355636', 'step': 5325, 'epoch': 1}
{'type': 'loss', 'content': 0.08337399363517761, 'timestamp': '2025-10-02 00:20:08.365867', 'step': 5326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:08.423711', 'step': 5326, 'epoch': 1}
{'type': 'loss', 'content': 0.09004814177751541, 'timestamp': '2025-10-02 00:20:08.426375', 'step': 5327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:08.483283', 'step': 5327, 'epoch': 1}
{'type': 'loss', 'content': 0.09408885985612869, 'timestamp': '2025-10-02 00:20:08.489025', 'step': 5328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:08.545754', 'step': 5328, 'epoch': 1}
{'type': 'loss', 'content': 0.09584573656320572, 'timestamp': '2025-10-02 00:20:08.549112', 'step': 5329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:08.605389', 'step': 5329, 'epoch': 1}
{'type': 'loss', 'content': 0.12232354283332825, 'timestamp': '2025-10-02 00:20:08.607935', 'step': 5330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:08.666030', 'step': 5330, 'epoch': 1}
{'type': 'loss', 'content': 0.06603275239467621, 'timestamp': '2025-10-02 00:20:08.669340', 'step': 5331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:08.726467', 'step': 5331, 'epoch': 1}
{'type': 'loss', 'content': 0.2962537705898285, 'timestamp': '2025-10-02 00:20:08.733623', 'step': 5332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:08.788692', 'step': 5332, 'epoch': 1}
{'type': 'loss', 'content': 0.08237426728010178, 'timestamp': '2025-10-02 00:20:08.798972', 'step': 5333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:08.854666', 'step': 5333, 'epoch': 1}
{'type': 'loss', 'content': 0.19421891868114471, 'timestamp': '2025-10-02 00:20:08.857281', 'step': 5334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:08.912585', 'step': 5334, 'epoch': 1}
{'type': 'loss', 'content': 0.0691516175866127, 'timestamp': '2025-10-02 00:20:08.915365', 'step': 5335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:08.971351', 'step': 5335, 'epoch': 1}
{'type': 'loss', 'content': 0.09897060692310333, 'timestamp': '2025-10-02 00:20:08.978199', 'step': 5336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:09.033690', 'step': 5336, 'epoch': 1}
{'type': 'loss', 'content': 0.33140692114830017, 'timestamp': '2025-10-02 00:20:09.036258', 'step': 5337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:09.091417', 'step': 5337, 'epoch': 1}
{'type': 'loss', 'content': 0.14515094459056854, 'timestamp': '2025-10-02 00:20:09.094551', 'step': 5338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:09.155078', 'step': 5338, 'epoch': 1}
{'type': 'loss', 'content': 0.09588778764009476, 'timestamp': '2025-10-02 00:20:09.165258', 'step': 5339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:09.221625', 'step': 5339, 'epoch': 1}
{'type': 'loss', 'content': 0.23982198536396027, 'timestamp': '2025-10-02 00:20:09.227515', 'step': 5340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:09.281375', 'step': 5340, 'epoch': 1}
{'type': 'loss', 'content': 0.05864788591861725, 'timestamp': '2025-10-02 00:20:09.287368', 'step': 5341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:09.342878', 'step': 5341, 'epoch': 1}
{'type': 'loss', 'content': 0.06131937727332115, 'timestamp': '2025-10-02 00:20:09.345242', 'step': 5342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:09.402335', 'step': 5342, 'epoch': 1}
{'type': 'loss', 'content': 0.05146234109997749, 'timestamp': '2025-10-02 00:20:09.411916', 'step': 5343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:09.471878', 'step': 5343, 'epoch': 1}
{'type': 'loss', 'content': 0.07033819705247879, 'timestamp': '2025-10-02 00:20:09.482884', 'step': 5344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:09.539609', 'step': 5344, 'epoch': 1}
{'type': 'loss', 'content': 0.0830293819308281, 'timestamp': '2025-10-02 00:20:09.543149', 'step': 5345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:09.604436', 'step': 5345, 'epoch': 1}
{'type': 'loss', 'content': 0.20107616484165192, 'timestamp': '2025-10-02 00:20:09.607566', 'step': 5346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:09.664431', 'step': 5346, 'epoch': 1}
{'type': 'loss', 'content': 0.1034800335764885, 'timestamp': '2025-10-02 00:20:09.667866', 'step': 5347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:20:09.739482', 'step': 5347, 'epoch': 1}
{'type': 'loss', 'content': 0.01519541721791029, 'timestamp': '2025-10-02 00:20:09.752742', 'step': 5348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:09.809902', 'step': 5348, 'epoch': 1}
{'type': 'loss', 'content': 0.09563811868429184, 'timestamp': '2025-10-02 00:20:09.813643', 'step': 5349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:09.871415', 'step': 5349, 'epoch': 1}
{'type': 'loss', 'content': 0.06456403434276581, 'timestamp': '2025-10-02 00:20:09.879554', 'step': 5350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:09.939294', 'step': 5350, 'epoch': 1}
{'type': 'loss', 'content': 0.04422841966152191, 'timestamp': '2025-10-02 00:20:09.949476', 'step': 5351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:10.004910', 'step': 5351, 'epoch': 1}
{'type': 'loss', 'content': 0.14560449123382568, 'timestamp': '2025-10-02 00:20:10.011552', 'step': 5352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:10.066620', 'step': 5352, 'epoch': 1}
{'type': 'loss', 'content': 0.2124667465686798, 'timestamp': '2025-10-02 00:20:10.068806', 'step': 5353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:10.123380', 'step': 5353, 'epoch': 1}
{'type': 'loss', 'content': 0.14249616861343384, 'timestamp': '2025-10-02 00:20:10.130910', 'step': 5354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:20:10.193788', 'step': 5354, 'epoch': 1}
{'type': 'loss', 'content': 0.026168953627347946, 'timestamp': '2025-10-02 00:20:10.204461', 'step': 5355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:10.259633', 'step': 5355, 'epoch': 1}
{'type': 'loss', 'content': 0.14918377995491028, 'timestamp': '2025-10-02 00:20:10.265358', 'step': 5356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:10.319640', 'step': 5356, 'epoch': 1}
{'type': 'loss', 'content': 0.0383918359875679, 'timestamp': '2025-10-02 00:20:10.326936', 'step': 5357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:10.381175', 'step': 5357, 'epoch': 1}
{'type': 'loss', 'content': 0.15896861255168915, 'timestamp': '2025-10-02 00:20:10.383633', 'step': 5358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:10.438227', 'step': 5358, 'epoch': 1}
{'type': 'loss', 'content': 0.06460539996623993, 'timestamp': '2025-10-02 00:20:10.445523', 'step': 5359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:10.499701', 'step': 5359, 'epoch': 1}
{'type': 'loss', 'content': 0.152619406580925, 'timestamp': '2025-10-02 00:20:10.505542', 'step': 5360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:10.558719', 'step': 5360, 'epoch': 1}
{'type': 'loss', 'content': 0.15809275209903717, 'timestamp': '2025-10-02 00:20:10.563467', 'step': 5361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:10.623591', 'step': 5361, 'epoch': 1}
{'type': 'loss', 'content': 0.10177649557590485, 'timestamp': '2025-10-02 00:20:10.629263', 'step': 5362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:10.685037', 'step': 5362, 'epoch': 1}
{'type': 'loss', 'content': 0.06361168622970581, 'timestamp': '2025-10-02 00:20:10.692507', 'step': 5363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:20:10.755887', 'step': 5363, 'epoch': 1}
{'type': 'loss', 'content': 0.015742020681500435, 'timestamp': '2025-10-02 00:20:10.767529', 'step': 5364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:10.822371', 'step': 5364, 'epoch': 1}
{'type': 'loss', 'content': 0.02800738997757435, 'timestamp': '2025-10-02 00:20:10.828244', 'step': 5365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:10.882723', 'step': 5365, 'epoch': 1}
{'type': 'loss', 'content': 0.07555641233921051, 'timestamp': '2025-10-02 00:20:10.885273', 'step': 5366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:10.939270', 'step': 5366, 'epoch': 1}
{'type': 'loss', 'content': 0.06235016882419586, 'timestamp': '2025-10-02 00:20:10.942095', 'step': 5367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:10.996824', 'step': 5367, 'epoch': 1}
{'type': 'loss', 'content': 0.036221809685230255, 'timestamp': '2025-10-02 00:20:11.006944', 'step': 5368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:11.061418', 'step': 5368, 'epoch': 1}
{'type': 'loss', 'content': 0.059274472296237946, 'timestamp': '2025-10-02 00:20:11.071242', 'step': 5369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:11.125259', 'step': 5369, 'epoch': 1}
{'type': 'loss', 'content': 0.07053246349096298, 'timestamp': '2025-10-02 00:20:11.127623', 'step': 5370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:11.181658', 'step': 5370, 'epoch': 1}
{'type': 'loss', 'content': 0.059253815561532974, 'timestamp': '2025-10-02 00:20:11.183951', 'step': 5371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:11.245606', 'step': 5371, 'epoch': 1}
{'type': 'loss', 'content': 0.01838875748217106, 'timestamp': '2025-10-02 00:20:11.256833', 'step': 5372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:11.310863', 'step': 5372, 'epoch': 1}
{'type': 'loss', 'content': 0.081004798412323, 'timestamp': '2025-10-02 00:20:11.313567', 'step': 5373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:11.367590', 'step': 5373, 'epoch': 1}
{'type': 'loss', 'content': 0.04656679555773735, 'timestamp': '2025-10-02 00:20:11.370049', 'step': 5374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:11.424162', 'step': 5374, 'epoch': 1}
{'type': 'loss', 'content': 0.036713019013404846, 'timestamp': '2025-10-02 00:20:11.431589', 'step': 5375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:11.488107', 'step': 5375, 'epoch': 1}
{'type': 'loss', 'content': 0.04479915276169777, 'timestamp': '2025-10-02 00:20:11.498435', 'step': 5376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:11.552036', 'step': 5376, 'epoch': 1}
{'type': 'loss', 'content': 0.05515819787979126, 'timestamp': '2025-10-02 00:20:11.557956', 'step': 5377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:11.612583', 'step': 5377, 'epoch': 1}
{'type': 'loss', 'content': 0.05344029888510704, 'timestamp': '2025-10-02 00:20:11.615224', 'step': 5378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:11.669500', 'step': 5378, 'epoch': 1}
{'type': 'loss', 'content': 0.0570201501250267, 'timestamp': '2025-10-02 00:20:11.671699', 'step': 5379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:11.727518', 'step': 5379, 'epoch': 1}
{'type': 'loss', 'content': 0.023226909339427948, 'timestamp': '2025-10-02 00:20:11.737893', 'step': 5380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:11.792483', 'step': 5380, 'epoch': 1}
{'type': 'loss', 'content': 0.046473920345306396, 'timestamp': '2025-10-02 00:20:11.798404', 'step': 5381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:20:11.868428', 'step': 5381, 'epoch': 1}
{'type': 'loss', 'content': 0.02881586365401745, 'timestamp': '2025-10-02 00:20:11.880901', 'step': 5382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:11.937156', 'step': 5382, 'epoch': 1}
{'type': 'loss', 'content': 0.21195362508296967, 'timestamp': '2025-10-02 00:20:11.939426', 'step': 5383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:11.994027', 'step': 5383, 'epoch': 1}
{'type': 'loss', 'content': 0.05143829435110092, 'timestamp': '2025-10-02 00:20:12.000610', 'step': 5384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:12.054454', 'step': 5384, 'epoch': 1}
{'type': 'loss', 'content': 0.02256941795349121, 'timestamp': '2025-10-02 00:20:12.063906', 'step': 5385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:12.122490', 'step': 5385, 'epoch': 1}
{'type': 'loss', 'content': 0.06942348927259445, 'timestamp': '2025-10-02 00:20:12.125245', 'step': 5386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:12.179479', 'step': 5386, 'epoch': 1}
{'type': 'loss', 'content': 0.04448314011096954, 'timestamp': '2025-10-02 00:20:12.186950', 'step': 5387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:12.242843', 'step': 5387, 'epoch': 1}
{'type': 'loss', 'content': 0.12382075190544128, 'timestamp': '2025-10-02 00:20:12.248831', 'step': 5388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:12.302106', 'step': 5388, 'epoch': 1}
{'type': 'loss', 'content': 0.026753103360533714, 'timestamp': '2025-10-02 00:20:12.304553', 'step': 5389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:12.358828', 'step': 5389, 'epoch': 1}
{'type': 'loss', 'content': 0.15109017491340637, 'timestamp': '2025-10-02 00:20:12.361291', 'step': 5390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:12.415505', 'step': 5390, 'epoch': 1}
{'type': 'loss', 'content': 0.09687995910644531, 'timestamp': '2025-10-02 00:20:12.421268', 'step': 5391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:12.477006', 'step': 5391, 'epoch': 1}
{'type': 'loss', 'content': 0.11812347173690796, 'timestamp': '2025-10-02 00:20:12.482644', 'step': 5392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:12.535879', 'step': 5392, 'epoch': 1}
{'type': 'loss', 'content': 0.11787231266498566, 'timestamp': '2025-10-02 00:20:12.538228', 'step': 5393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:12.592866', 'step': 5393, 'epoch': 1}
{'type': 'loss', 'content': 0.03475680947303772, 'timestamp': '2025-10-02 00:20:12.602237', 'step': 5394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:12.658759', 'step': 5394, 'epoch': 1}
{'type': 'loss', 'content': 0.04623483121395111, 'timestamp': '2025-10-02 00:20:12.668314', 'step': 5395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:12.722774', 'step': 5395, 'epoch': 1}
{'type': 'loss', 'content': 0.14334870874881744, 'timestamp': '2025-10-02 00:20:12.729206', 'step': 5396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:12.782664', 'step': 5396, 'epoch': 1}
{'type': 'loss', 'content': 0.12247791886329651, 'timestamp': '2025-10-02 00:20:12.784990', 'step': 5397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:12.839165', 'step': 5397, 'epoch': 1}
{'type': 'loss', 'content': 0.0771375223994255, 'timestamp': '2025-10-02 00:20:12.841558', 'step': 5398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:12.895116', 'step': 5398, 'epoch': 1}
{'type': 'loss', 'content': 0.11340445280075073, 'timestamp': '2025-10-02 00:20:12.897451', 'step': 5399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:12.951231', 'step': 5399, 'epoch': 1}
{'type': 'loss', 'content': 0.13697212934494019, 'timestamp': '2025-10-02 00:20:12.957243', 'step': 5400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:13.010655', 'step': 5400, 'epoch': 1}
{'type': 'loss', 'content': 0.1145971491932869, 'timestamp': '2025-10-02 00:20:13.020183', 'step': 5401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:13.074851', 'step': 5401, 'epoch': 1}
{'type': 'loss', 'content': 0.0356142595410347, 'timestamp': '2025-10-02 00:20:13.080974', 'step': 5402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:13.135602', 'step': 5402, 'epoch': 1}
{'type': 'loss', 'content': 0.05674492567777634, 'timestamp': '2025-10-02 00:20:13.141357', 'step': 5403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:13.200977', 'step': 5403, 'epoch': 1}
{'type': 'loss', 'content': 0.018935492262244225, 'timestamp': '2025-10-02 00:20:13.211929', 'step': 5404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:13.265775', 'step': 5404, 'epoch': 1}
{'type': 'loss', 'content': 0.05620960518717766, 'timestamp': '2025-10-02 00:20:13.268371', 'step': 5405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:13.322783', 'step': 5405, 'epoch': 1}
{'type': 'loss', 'content': 0.06057539954781532, 'timestamp': '2025-10-02 00:20:13.325236', 'step': 5406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:13.379500', 'step': 5406, 'epoch': 1}
{'type': 'loss', 'content': 0.07656638324260712, 'timestamp': '2025-10-02 00:20:13.382111', 'step': 5407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:20:13.437011', 'step': 5407, 'epoch': 1}
{'type': 'loss', 'content': 0.07800115644931793, 'timestamp': '2025-10-02 00:20:13.443469', 'step': 5408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:13.496902', 'step': 5408, 'epoch': 1}
{'type': 'loss', 'content': 0.10805325955152512, 'timestamp': '2025-10-02 00:20:13.499340', 'step': 5409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:13.558779', 'step': 5409, 'epoch': 1}
{'type': 'loss', 'content': 0.18061582744121552, 'timestamp': '2025-10-02 00:20:13.561082', 'step': 5410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:13.615153', 'step': 5410, 'epoch': 1}
{'type': 'loss', 'content': 0.07849083840847015, 'timestamp': '2025-10-02 00:20:13.621066', 'step': 5411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:20:13.683183', 'step': 5411, 'epoch': 1}
{'type': 'loss', 'content': 0.0330500565469265, 'timestamp': '2025-10-02 00:20:13.694654', 'step': 5412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:20:13.756132', 'step': 5412, 'epoch': 1}
{'type': 'loss', 'content': 0.017429174855351448, 'timestamp': '2025-10-02 00:20:13.767918', 'step': 5413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:13.822229', 'step': 5413, 'epoch': 1}
{'type': 'loss', 'content': 0.10915020853281021, 'timestamp': '2025-10-02 00:20:13.824419', 'step': 5414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:13.878917', 'step': 5414, 'epoch': 1}
{'type': 'loss', 'content': 0.13710851967334747, 'timestamp': '2025-10-02 00:20:13.881624', 'step': 5415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:13.937723', 'step': 5415, 'epoch': 1}
{'type': 'loss', 'content': 0.10599220544099808, 'timestamp': '2025-10-02 00:20:13.945369', 'step': 5416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:14.001742', 'step': 5416, 'epoch': 1}
{'type': 'loss', 'content': 0.04482286423444748, 'timestamp': '2025-10-02 00:20:14.011207', 'step': 5417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:14.064904', 'step': 5417, 'epoch': 1}
{'type': 'loss', 'content': 0.13233084976673126, 'timestamp': '2025-10-02 00:20:14.067293', 'step': 5418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:20:14.122213', 'step': 5418, 'epoch': 1}
{'type': 'loss', 'content': 0.07426531612873077, 'timestamp': '2025-10-02 00:20:14.124874', 'step': 5419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:14.178706', 'step': 5419, 'epoch': 1}
{'type': 'loss', 'content': 0.1870948076248169, 'timestamp': '2025-10-02 00:20:14.184448', 'step': 5420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:14.237807', 'step': 5420, 'epoch': 1}
{'type': 'loss', 'content': 0.0955125018954277, 'timestamp': '2025-10-02 00:20:14.240050', 'step': 5421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:14.294597', 'step': 5421, 'epoch': 1}
{'type': 'loss', 'content': 0.12149963527917862, 'timestamp': '2025-10-02 00:20:14.296862', 'step': 5422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:14.350902', 'step': 5422, 'epoch': 1}
{'type': 'loss', 'content': 0.05520803481340408, 'timestamp': '2025-10-02 00:20:14.356795', 'step': 5423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:14.418187', 'step': 5423, 'epoch': 1}
{'type': 'loss', 'content': 0.05011666938662529, 'timestamp': '2025-10-02 00:20:14.429454', 'step': 5424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:14.483691', 'step': 5424, 'epoch': 1}
{'type': 'loss', 'content': 0.011567850597202778, 'timestamp': '2025-10-02 00:20:14.491200', 'step': 5425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:14.553068', 'step': 5425, 'epoch': 1}
{'type': 'loss', 'content': 0.022825004532933235, 'timestamp': '2025-10-02 00:20:14.563561', 'step': 5426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:14.618614', 'step': 5426, 'epoch': 1}
{'type': 'loss', 'content': 0.045887745916843414, 'timestamp': '2025-10-02 00:20:14.626004', 'step': 5427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:14.680298', 'step': 5427, 'epoch': 1}
{'type': 'loss', 'content': 0.08133811503648758, 'timestamp': '2025-10-02 00:20:14.688478', 'step': 5428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:14.742348', 'step': 5428, 'epoch': 1}
{'type': 'loss', 'content': 0.08202183246612549, 'timestamp': '2025-10-02 00:20:14.745048', 'step': 5429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:20:14.819602', 'step': 5429, 'epoch': 1}
{'type': 'loss', 'content': 0.051050107926130295, 'timestamp': '2025-10-02 00:20:14.833048', 'step': 5430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:14.894136', 'step': 5430, 'epoch': 1}
{'type': 'loss', 'content': 0.05377454310655594, 'timestamp': '2025-10-02 00:20:14.904593', 'step': 5431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:14.958939', 'step': 5431, 'epoch': 1}
{'type': 'loss', 'content': 0.058541227132081985, 'timestamp': '2025-10-02 00:20:14.964959', 'step': 5432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:15.019170', 'step': 5432, 'epoch': 1}
{'type': 'loss', 'content': 0.04257693141698837, 'timestamp': '2025-10-02 00:20:15.026715', 'step': 5433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:15.085989', 'step': 5433, 'epoch': 1}
{'type': 'loss', 'content': 0.053172916173934937, 'timestamp': '2025-10-02 00:20:15.096156', 'step': 5434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:15.153198', 'step': 5434, 'epoch': 1}
{'type': 'loss', 'content': 0.039153750985860825, 'timestamp': '2025-10-02 00:20:15.155874', 'step': 5435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:15.209795', 'step': 5435, 'epoch': 1}
{'type': 'loss', 'content': 0.0738627165555954, 'timestamp': '2025-10-02 00:20:15.215364', 'step': 5436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:15.269108', 'step': 5436, 'epoch': 1}
{'type': 'loss', 'content': 0.14390672743320465, 'timestamp': '2025-10-02 00:20:15.271437', 'step': 5437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:15.327344', 'step': 5437, 'epoch': 1}
{'type': 'loss', 'content': 0.027366530150175095, 'timestamp': '2025-10-02 00:20:15.329689', 'step': 5438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:15.384872', 'step': 5438, 'epoch': 1}
{'type': 'loss', 'content': 0.09938972443342209, 'timestamp': '2025-10-02 00:20:15.394438', 'step': 5439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:15.449106', 'step': 5439, 'epoch': 1}
{'type': 'loss', 'content': 0.016533326357603073, 'timestamp': '2025-10-02 00:20:15.459229', 'step': 5440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:15.513380', 'step': 5440, 'epoch': 1}
{'type': 'loss', 'content': 0.05196419358253479, 'timestamp': '2025-10-02 00:20:15.515658', 'step': 5441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:20:15.584132', 'step': 5441, 'epoch': 1}
{'type': 'loss', 'content': 0.04329109191894531, 'timestamp': '2025-10-02 00:20:15.596431', 'step': 5442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:15.652162', 'step': 5442, 'epoch': 1}
{'type': 'loss', 'content': 0.038869984447956085, 'timestamp': '2025-10-02 00:20:15.661706', 'step': 5443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:15.717240', 'step': 5443, 'epoch': 1}
{'type': 'loss', 'content': 0.0337081216275692, 'timestamp': '2025-10-02 00:20:15.723725', 'step': 5444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:15.776849', 'step': 5444, 'epoch': 1}
{'type': 'loss', 'content': 0.2801934480667114, 'timestamp': '2025-10-02 00:20:15.779260', 'step': 5445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:20:15.833958', 'step': 5445, 'epoch': 1}
{'type': 'loss', 'content': 0.10868645459413528, 'timestamp': '2025-10-02 00:20:15.836450', 'step': 5446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:15.890775', 'step': 5446, 'epoch': 1}
{'type': 'loss', 'content': 0.06891896575689316, 'timestamp': '2025-10-02 00:20:15.900173', 'step': 5447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:15.954230', 'step': 5447, 'epoch': 1}
{'type': 'loss', 'content': 0.31661343574523926, 'timestamp': '2025-10-02 00:20:15.960847', 'step': 5448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:16.014842', 'step': 5448, 'epoch': 1}
{'type': 'loss', 'content': 0.07981622219085693, 'timestamp': '2025-10-02 00:20:16.022322', 'step': 5449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:16.076412', 'step': 5449, 'epoch': 1}
{'type': 'loss', 'content': 0.0823521614074707, 'timestamp': '2025-10-02 00:20:16.082213', 'step': 5450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:16.138051', 'step': 5450, 'epoch': 1}
{'type': 'loss', 'content': 0.08375383168458939, 'timestamp': '2025-10-02 00:20:16.143937', 'step': 5451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:16.197681', 'step': 5451, 'epoch': 1}
{'type': 'loss', 'content': 0.1447412669658661, 'timestamp': '2025-10-02 00:20:16.203674', 'step': 5452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:16.256947', 'step': 5452, 'epoch': 1}
{'type': 'loss', 'content': 0.11361054331064224, 'timestamp': '2025-10-02 00:20:16.259335', 'step': 5453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:16.313923', 'step': 5453, 'epoch': 1}
{'type': 'loss', 'content': 0.038336269557476044, 'timestamp': '2025-10-02 00:20:16.321547', 'step': 5454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:16.376047', 'step': 5454, 'epoch': 1}
{'type': 'loss', 'content': 0.10318238288164139, 'timestamp': '2025-10-02 00:20:16.378709', 'step': 5455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:16.433250', 'step': 5455, 'epoch': 1}
{'type': 'loss', 'content': 0.06919337809085846, 'timestamp': '2025-10-02 00:20:16.441475', 'step': 5456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:16.494907', 'step': 5456, 'epoch': 1}
{'type': 'loss', 'content': 0.05733659490942955, 'timestamp': '2025-10-02 00:20:16.497277', 'step': 5457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:16.550916', 'step': 5457, 'epoch': 1}
{'type': 'loss', 'content': 0.21439534425735474, 'timestamp': '2025-10-02 00:20:16.553541', 'step': 5458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:16.608119', 'step': 5458, 'epoch': 1}
{'type': 'loss', 'content': 0.09097623080015182, 'timestamp': '2025-10-02 00:20:16.610337', 'step': 5459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:16.665394', 'step': 5459, 'epoch': 1}
{'type': 'loss', 'content': 0.014154181815683842, 'timestamp': '2025-10-02 00:20:16.675750', 'step': 5460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:16.730395', 'step': 5460, 'epoch': 1}
{'type': 'loss', 'content': 0.12461873143911362, 'timestamp': '2025-10-02 00:20:16.732873', 'step': 5461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:16.791007', 'step': 5461, 'epoch': 1}
{'type': 'loss', 'content': 0.03983865678310394, 'timestamp': '2025-10-02 00:20:16.801196', 'step': 5462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:16.858415', 'step': 5462, 'epoch': 1}
{'type': 'loss', 'content': 0.029207853600382805, 'timestamp': '2025-10-02 00:20:16.867972', 'step': 5463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:16.921965', 'step': 5463, 'epoch': 1}
{'type': 'loss', 'content': 0.08292075991630554, 'timestamp': '2025-10-02 00:20:16.928649', 'step': 5464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:16.981335', 'step': 5464, 'epoch': 1}
{'type': 'loss', 'content': 0.09639494866132736, 'timestamp': '2025-10-02 00:20:16.983709', 'step': 5465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:17.037491', 'step': 5465, 'epoch': 1}
{'type': 'loss', 'content': 0.18685562908649445, 'timestamp': '2025-10-02 00:20:17.039748', 'step': 5466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:17.093961', 'step': 5466, 'epoch': 1}
{'type': 'loss', 'content': 0.07192260771989822, 'timestamp': '2025-10-02 00:20:17.096328', 'step': 5467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:17.151590', 'step': 5467, 'epoch': 1}
{'type': 'loss', 'content': 0.21227215230464935, 'timestamp': '2025-10-02 00:20:17.158556', 'step': 5468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:17.213056', 'step': 5468, 'epoch': 1}
{'type': 'loss', 'content': 0.24501775205135345, 'timestamp': '2025-10-02 00:20:17.215855', 'step': 5469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:17.272042', 'step': 5469, 'epoch': 1}
{'type': 'loss', 'content': 0.11200466006994247, 'timestamp': '2025-10-02 00:20:17.275149', 'step': 5470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:17.331083', 'step': 5470, 'epoch': 1}
{'type': 'loss', 'content': 0.048340510576963425, 'timestamp': '2025-10-02 00:20:17.338516', 'step': 5471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:17.394698', 'step': 5471, 'epoch': 1}
{'type': 'loss', 'content': 0.05920954793691635, 'timestamp': '2025-10-02 00:20:17.401312', 'step': 5472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:17.461098', 'step': 5472, 'epoch': 1}
{'type': 'loss', 'content': 0.05085466429591179, 'timestamp': '2025-10-02 00:20:17.472083', 'step': 5473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:17.527481', 'step': 5473, 'epoch': 1}
{'type': 'loss', 'content': 0.07032963633537292, 'timestamp': '2025-10-02 00:20:17.530439', 'step': 5474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:17.586718', 'step': 5474, 'epoch': 1}
{'type': 'loss', 'content': 0.11850930005311966, 'timestamp': '2025-10-02 00:20:17.590112', 'step': 5475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:17.645186', 'step': 5475, 'epoch': 1}
{'type': 'loss', 'content': 0.13062001764774323, 'timestamp': '2025-10-02 00:20:17.652251', 'step': 5476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:17.709487', 'step': 5476, 'epoch': 1}
{'type': 'loss', 'content': 0.046510834246873856, 'timestamp': '2025-10-02 00:20:17.715254', 'step': 5477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:17.773022', 'step': 5477, 'epoch': 1}
{'type': 'loss', 'content': 0.07400073111057281, 'timestamp': '2025-10-02 00:20:17.782382', 'step': 5478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:17.837726', 'step': 5478, 'epoch': 1}
{'type': 'loss', 'content': 0.06831732392311096, 'timestamp': '2025-10-02 00:20:17.840851', 'step': 5479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:17.897297', 'step': 5479, 'epoch': 1}
{'type': 'loss', 'content': 0.10553506761789322, 'timestamp': '2025-10-02 00:20:17.903826', 'step': 5480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:17.958740', 'step': 5480, 'epoch': 1}
{'type': 'loss', 'content': 0.12957298755645752, 'timestamp': '2025-10-02 00:20:17.962155', 'step': 5481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:20:18.016710', 'step': 5481, 'epoch': 1}
{'type': 'loss', 'content': 0.2516780197620392, 'timestamp': '2025-10-02 00:20:18.019937', 'step': 5482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:18.075288', 'step': 5482, 'epoch': 1}
{'type': 'loss', 'content': 0.06189713627099991, 'timestamp': '2025-10-02 00:20:18.081073', 'step': 5483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:18.136737', 'step': 5483, 'epoch': 1}
{'type': 'loss', 'content': 0.10265862196683884, 'timestamp': '2025-10-02 00:20:18.142512', 'step': 5484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:18.195852', 'step': 5484, 'epoch': 1}
{'type': 'loss', 'content': 0.12426672875881195, 'timestamp': '2025-10-02 00:20:18.198517', 'step': 5485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:20:18.262736', 'step': 5485, 'epoch': 1}
{'type': 'loss', 'content': 0.04541666805744171, 'timestamp': '2025-10-02 00:20:18.273404', 'step': 5486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:18.329600', 'step': 5486, 'epoch': 1}
{'type': 'loss', 'content': 0.13580991327762604, 'timestamp': '2025-10-02 00:20:18.332625', 'step': 5487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:18.390438', 'step': 5487, 'epoch': 1}
{'type': 'loss', 'content': 0.047070495784282684, 'timestamp': '2025-10-02 00:20:18.400761', 'step': 5488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:18.456485', 'step': 5488, 'epoch': 1}
{'type': 'loss', 'content': 0.09860773384571075, 'timestamp': '2025-10-02 00:20:18.460125', 'step': 5489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:18.518119', 'step': 5489, 'epoch': 1}
{'type': 'loss', 'content': 0.08043216168880463, 'timestamp': '2025-10-02 00:20:18.527650', 'step': 5490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:18.583130', 'step': 5490, 'epoch': 1}
{'type': 'loss', 'content': 0.19073940813541412, 'timestamp': '2025-10-02 00:20:18.585660', 'step': 5491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:18.640643', 'step': 5491, 'epoch': 1}
{'type': 'loss', 'content': 0.16537846624851227, 'timestamp': '2025-10-02 00:20:18.650770', 'step': 5492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:18.704190', 'step': 5492, 'epoch': 1}
{'type': 'loss', 'content': 0.11697334051132202, 'timestamp': '2025-10-02 00:20:18.714395', 'step': 5493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:18.769886', 'step': 5493, 'epoch': 1}
{'type': 'loss', 'content': 0.12237347662448883, 'timestamp': '2025-10-02 00:20:18.772010', 'step': 5494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:18.825916', 'step': 5494, 'epoch': 1}
{'type': 'loss', 'content': 0.1333872675895691, 'timestamp': '2025-10-02 00:20:18.828129', 'step': 5495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:18.882369', 'step': 5495, 'epoch': 1}
{'type': 'loss', 'content': 0.03051685355603695, 'timestamp': '2025-10-02 00:20:18.888309', 'step': 5496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:18.942943', 'step': 5496, 'epoch': 1}
{'type': 'loss', 'content': 0.1476057469844818, 'timestamp': '2025-10-02 00:20:18.945270', 'step': 5497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:18.999507', 'step': 5497, 'epoch': 1}
{'type': 'loss', 'content': 0.0778622254729271, 'timestamp': '2025-10-02 00:20:19.007124', 'step': 5498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:19.061923', 'step': 5498, 'epoch': 1}
{'type': 'loss', 'content': 0.07743018120527267, 'timestamp': '2025-10-02 00:20:19.069418', 'step': 5499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:19.123191', 'step': 5499, 'epoch': 1}
{'type': 'loss', 'content': 0.2677451968193054, 'timestamp': '2025-10-02 00:20:19.128908', 'step': 5500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-10-02 00:20:19.543863', 'step': 5500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:19.598102', 'step': 5500, 'epoch': 1}
{'type': 'loss', 'content': 0.11168314516544342, 'timestamp': '2025-10-02 00:20:19.600478', 'step': 5501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:19.655002', 'step': 5501, 'epoch': 1}
{'type': 'loss', 'content': 0.17753170430660248, 'timestamp': '2025-10-02 00:20:19.657617', 'step': 5502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:19.713493', 'step': 5502, 'epoch': 1}
{'type': 'loss', 'content': 0.15565955638885498, 'timestamp': '2025-10-02 00:20:19.718684', 'step': 5503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:19.772708', 'step': 5503, 'epoch': 1}
{'type': 'loss', 'content': 0.2045028805732727, 'timestamp': '2025-10-02 00:20:19.779389', 'step': 5504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:19.832836', 'step': 5504, 'epoch': 1}
{'type': 'loss', 'content': 0.1275629848241806, 'timestamp': '2025-10-02 00:20:19.835455', 'step': 5505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:19.889886', 'step': 5505, 'epoch': 1}
{'type': 'loss', 'content': 0.034003812819719315, 'timestamp': '2025-10-02 00:20:19.895755', 'step': 5506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:19.949770', 'step': 5506, 'epoch': 1}
{'type': 'loss', 'content': 0.06369654089212418, 'timestamp': '2025-10-02 00:20:19.952294', 'step': 5507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:20.005794', 'step': 5507, 'epoch': 1}
{'type': 'loss', 'content': 0.09603475779294968, 'timestamp': '2025-10-02 00:20:20.011565', 'step': 5508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:20.065135', 'step': 5508, 'epoch': 1}
{'type': 'loss', 'content': 0.05624018982052803, 'timestamp': '2025-10-02 00:20:20.067257', 'step': 5509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:20.121898', 'step': 5509, 'epoch': 1}
{'type': 'loss', 'content': 0.043690215796232224, 'timestamp': '2025-10-02 00:20:20.124169', 'step': 5510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:20.178109', 'step': 5510, 'epoch': 1}
{'type': 'loss', 'content': 0.13946639001369476, 'timestamp': '2025-10-02 00:20:20.180459', 'step': 5511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:20.235173', 'step': 5511, 'epoch': 1}
{'type': 'loss', 'content': 0.0751887634396553, 'timestamp': '2025-10-02 00:20:20.241058', 'step': 5512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:20.295821', 'step': 5512, 'epoch': 1}
{'type': 'loss', 'content': 0.12784135341644287, 'timestamp': '2025-10-02 00:20:20.298061', 'step': 5513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:20.353608', 'step': 5513, 'epoch': 1}
{'type': 'loss', 'content': 0.04899813234806061, 'timestamp': '2025-10-02 00:20:20.363163', 'step': 5514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:20.418106', 'step': 5514, 'epoch': 1}
{'type': 'loss', 'content': 0.1957918256521225, 'timestamp': '2025-10-02 00:20:20.420847', 'step': 5515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:20.475872', 'step': 5515, 'epoch': 1}
{'type': 'loss', 'content': 0.06406808644533157, 'timestamp': '2025-10-02 00:20:20.481773', 'step': 5516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:20.536726', 'step': 5516, 'epoch': 1}
{'type': 'loss', 'content': 0.020370518788695335, 'timestamp': '2025-10-02 00:20:20.539278', 'step': 5517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:20.593620', 'step': 5517, 'epoch': 1}
{'type': 'loss', 'content': 0.07465053349733353, 'timestamp': '2025-10-02 00:20:20.599781', 'step': 5518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:20:20.670261', 'step': 5518, 'epoch': 1}
{'type': 'loss', 'content': 0.06629368662834167, 'timestamp': '2025-10-02 00:20:20.682874', 'step': 5519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:20.737433', 'step': 5519, 'epoch': 1}
{'type': 'loss', 'content': 0.08288133889436722, 'timestamp': '2025-10-02 00:20:20.743191', 'step': 5520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:20.797478', 'step': 5520, 'epoch': 1}
{'type': 'loss', 'content': 0.04372498393058777, 'timestamp': '2025-10-02 00:20:20.799888', 'step': 5521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:20.854732', 'step': 5521, 'epoch': 1}
{'type': 'loss', 'content': 0.07305768132209778, 'timestamp': '2025-10-02 00:20:20.864309', 'step': 5522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:20.926482', 'step': 5522, 'epoch': 1}
{'type': 'loss', 'content': 0.04257480800151825, 'timestamp': '2025-10-02 00:20:20.936983', 'step': 5523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:20.991578', 'step': 5523, 'epoch': 1}
{'type': 'loss', 'content': 0.06670333445072174, 'timestamp': '2025-10-02 00:20:20.997354', 'step': 5524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:21.050988', 'step': 5524, 'epoch': 1}
{'type': 'loss', 'content': 0.09660293161869049, 'timestamp': '2025-10-02 00:20:21.053511', 'step': 5525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:21.108077', 'step': 5525, 'epoch': 1}
{'type': 'loss', 'content': 0.07525947690010071, 'timestamp': '2025-10-02 00:20:21.110459', 'step': 5526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:21.169589', 'step': 5526, 'epoch': 1}
{'type': 'loss', 'content': 0.029363859444856644, 'timestamp': '2025-10-02 00:20:21.179735', 'step': 5527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:21.235620', 'step': 5527, 'epoch': 1}
{'type': 'loss', 'content': 0.04741855710744858, 'timestamp': '2025-10-02 00:20:21.242280', 'step': 5528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:21.295345', 'step': 5528, 'epoch': 1}
{'type': 'loss', 'content': 0.12923265993595123, 'timestamp': '2025-10-02 00:20:21.297452', 'step': 5529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:21.351940', 'step': 5529, 'epoch': 1}
{'type': 'loss', 'content': 0.07331112027168274, 'timestamp': '2025-10-02 00:20:21.354410', 'step': 5530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:21.409521', 'step': 5530, 'epoch': 1}
{'type': 'loss', 'content': 0.08333161473274231, 'timestamp': '2025-10-02 00:20:21.412131', 'step': 5531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:21.465796', 'step': 5531, 'epoch': 1}
{'type': 'loss', 'content': 0.1956842690706253, 'timestamp': '2025-10-02 00:20:21.471605', 'step': 5532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:21.525250', 'step': 5532, 'epoch': 1}
{'type': 'loss', 'content': 0.05279352143406868, 'timestamp': '2025-10-02 00:20:21.531243', 'step': 5533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:21.586488', 'step': 5533, 'epoch': 1}
{'type': 'loss', 'content': 0.10463323444128036, 'timestamp': '2025-10-02 00:20:21.593982', 'step': 5534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:21.649043', 'step': 5534, 'epoch': 1}
{'type': 'loss', 'content': 0.07533245533704758, 'timestamp': '2025-10-02 00:20:21.657787', 'step': 5535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:21.711489', 'step': 5535, 'epoch': 1}
{'type': 'loss', 'content': 0.1013946533203125, 'timestamp': '2025-10-02 00:20:21.717314', 'step': 5536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:21.771814', 'step': 5536, 'epoch': 1}
{'type': 'loss', 'content': 0.14337539672851562, 'timestamp': '2025-10-02 00:20:21.774383', 'step': 5537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:21.828249', 'step': 5537, 'epoch': 1}
{'type': 'loss', 'content': 0.0870419591665268, 'timestamp': '2025-10-02 00:20:21.830645', 'step': 5538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:21.884790', 'step': 5538, 'epoch': 1}
{'type': 'loss', 'content': 0.11325796693563461, 'timestamp': '2025-10-02 00:20:21.887267', 'step': 5539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:21.940759', 'step': 5539, 'epoch': 1}
{'type': 'loss', 'content': 0.10194907337427139, 'timestamp': '2025-10-02 00:20:21.946543', 'step': 5540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:22.001080', 'step': 5540, 'epoch': 1}
{'type': 'loss', 'content': 0.09787552058696747, 'timestamp': '2025-10-02 00:20:22.003522', 'step': 5541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:22.057903', 'step': 5541, 'epoch': 1}
{'type': 'loss', 'content': 0.085850290954113, 'timestamp': '2025-10-02 00:20:22.060050', 'step': 5542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:22.113815', 'step': 5542, 'epoch': 1}
{'type': 'loss', 'content': 0.12314081937074661, 'timestamp': '2025-10-02 00:20:22.121164', 'step': 5543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:22.175799', 'step': 5543, 'epoch': 1}
{'type': 'loss', 'content': 0.176848903298378, 'timestamp': '2025-10-02 00:20:22.181786', 'step': 5544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:22.235478', 'step': 5544, 'epoch': 1}
{'type': 'loss', 'content': 0.08589514344930649, 'timestamp': '2025-10-02 00:20:22.237826', 'step': 5545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:22.292447', 'step': 5545, 'epoch': 1}
{'type': 'loss', 'content': 0.06533034145832062, 'timestamp': '2025-10-02 00:20:22.294793', 'step': 5546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:22.349016', 'step': 5546, 'epoch': 1}
{'type': 'loss', 'content': 0.1715680956840515, 'timestamp': '2025-10-02 00:20:22.351355', 'step': 5547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:22.405178', 'step': 5547, 'epoch': 1}
{'type': 'loss', 'content': 0.08823969960212708, 'timestamp': '2025-10-02 00:20:22.411248', 'step': 5548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:22.466322', 'step': 5548, 'epoch': 1}
{'type': 'loss', 'content': 0.06383784115314484, 'timestamp': '2025-10-02 00:20:22.475808', 'step': 5549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:22.530385', 'step': 5549, 'epoch': 1}
{'type': 'loss', 'content': 0.13641786575317383, 'timestamp': '2025-10-02 00:20:22.532797', 'step': 5550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:22.587313', 'step': 5550, 'epoch': 1}
{'type': 'loss', 'content': 0.042314935475587845, 'timestamp': '2025-10-02 00:20:22.589716', 'step': 5551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:22.645776', 'step': 5551, 'epoch': 1}
{'type': 'loss', 'content': 0.16353127360343933, 'timestamp': '2025-10-02 00:20:22.656110', 'step': 5552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:22.709773', 'step': 5552, 'epoch': 1}
{'type': 'loss', 'content': 0.16779498755931854, 'timestamp': '2025-10-02 00:20:22.712006', 'step': 5553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:22.768335', 'step': 5553, 'epoch': 1}
{'type': 'loss', 'content': 0.017781700938940048, 'timestamp': '2025-10-02 00:20:22.777747', 'step': 5554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:20:22.839924', 'step': 5554, 'epoch': 1}
{'type': 'loss', 'content': 0.017712214961647987, 'timestamp': '2025-10-02 00:20:22.850622', 'step': 5555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:22.905511', 'step': 5555, 'epoch': 1}
{'type': 'loss', 'content': 0.08615852892398834, 'timestamp': '2025-10-02 00:20:22.915880', 'step': 5556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:22.971835', 'step': 5556, 'epoch': 1}
{'type': 'loss', 'content': 0.15809433162212372, 'timestamp': '2025-10-02 00:20:22.974347', 'step': 5557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:23.028585', 'step': 5557, 'epoch': 1}
{'type': 'loss', 'content': 0.08680081367492676, 'timestamp': '2025-10-02 00:20:23.030571', 'step': 5558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:23.084605', 'step': 5558, 'epoch': 1}
{'type': 'loss', 'content': 0.14619135856628418, 'timestamp': '2025-10-02 00:20:23.087000', 'step': 5559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:23.140346', 'step': 5559, 'epoch': 1}
{'type': 'loss', 'content': 0.16547195613384247, 'timestamp': '2025-10-02 00:20:23.146334', 'step': 5560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:23.199757', 'step': 5560, 'epoch': 1}
{'type': 'loss', 'content': 0.24342337250709534, 'timestamp': '2025-10-02 00:20:23.202443', 'step': 5561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:23.290211', 'step': 5561, 'epoch': 1}
{'type': 'loss', 'content': 0.025893978774547577, 'timestamp': '2025-10-02 00:20:23.293850', 'step': 5562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:20:23.363434', 'step': 5562, 'epoch': 1}
{'type': 'loss', 'content': 0.07887674868106842, 'timestamp': '2025-10-02 00:20:23.367486', 'step': 5563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:23.439291', 'step': 5563, 'epoch': 1}
{'type': 'loss', 'content': 0.14073723554611206, 'timestamp': '2025-10-02 00:20:23.445498', 'step': 5564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:23.506229', 'step': 5564, 'epoch': 1}
{'type': 'loss', 'content': 0.03949720785021782, 'timestamp': '2025-10-02 00:20:23.513683', 'step': 5565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:23.578230', 'step': 5565, 'epoch': 1}
{'type': 'loss', 'content': 0.09062199294567108, 'timestamp': '2025-10-02 00:20:23.585264', 'step': 5566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:23.652621', 'step': 5566, 'epoch': 1}
{'type': 'loss', 'content': 0.08624546229839325, 'timestamp': '2025-10-02 00:20:23.656244', 'step': 5567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:23.718128', 'step': 5567, 'epoch': 1}
{'type': 'loss', 'content': 0.06647767126560211, 'timestamp': '2025-10-02 00:20:23.727423', 'step': 5568, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:20:51.830609', 'step': 5568, 'epoch': 1}
{'type': 'pplx', 'content': 90.12872441246054, 'timestamp': '2025-10-02 00:20:51.834827', 'step': 5568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:51.891224', 'step': 5568, 'epoch': 1}
{'type': 'loss', 'content': 0.1121758297085762, 'timestamp': '2025-10-02 00:20:51.894663', 'step': 5569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:51.958947', 'step': 5569, 'epoch': 1}
{'type': 'loss', 'content': 0.15853852033615112, 'timestamp': '2025-10-02 00:20:51.964150', 'step': 5570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:52.027662', 'step': 5570, 'epoch': 1}
{'type': 'loss', 'content': 0.07288334518671036, 'timestamp': '2025-10-02 00:20:52.031594', 'step': 5571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:52.102591', 'step': 5571, 'epoch': 1}
{'type': 'loss', 'content': 0.059244897216558456, 'timestamp': '2025-10-02 00:20:52.112752', 'step': 5572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:52.184022', 'step': 5572, 'epoch': 1}
{'type': 'loss', 'content': 0.08766081929206848, 'timestamp': '2025-10-02 00:20:52.195030', 'step': 5573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:52.258481', 'step': 5573, 'epoch': 1}
{'type': 'loss', 'content': 0.2027892768383026, 'timestamp': '2025-10-02 00:20:52.267451', 'step': 5574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:52.330887', 'step': 5574, 'epoch': 1}
{'type': 'loss', 'content': 0.0930141881108284, 'timestamp': '2025-10-02 00:20:52.334890', 'step': 5575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:52.393067', 'step': 5575, 'epoch': 1}
{'type': 'loss', 'content': 0.03805262967944145, 'timestamp': '2025-10-02 00:20:52.400143', 'step': 5576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:20:52.469243', 'step': 5576, 'epoch': 1}
{'type': 'loss', 'content': 0.05974416807293892, 'timestamp': '2025-10-02 00:20:52.481008', 'step': 5577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:52.544862', 'step': 5577, 'epoch': 1}
{'type': 'loss', 'content': 0.06927299499511719, 'timestamp': '2025-10-02 00:20:52.554444', 'step': 5578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:52.618710', 'step': 5578, 'epoch': 1}
{'type': 'loss', 'content': 0.13680961728096008, 'timestamp': '2025-10-02 00:20:52.622436', 'step': 5579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:52.691173', 'step': 5579, 'epoch': 1}
{'type': 'loss', 'content': 0.12783432006835938, 'timestamp': '2025-10-02 00:20:52.699669', 'step': 5580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:52.756573', 'step': 5580, 'epoch': 1}
{'type': 'loss', 'content': 0.11707920581102371, 'timestamp': '2025-10-02 00:20:52.764047', 'step': 5581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:20:52.823654', 'step': 5581, 'epoch': 1}
{'type': 'loss', 'content': 0.10364534705877304, 'timestamp': '2025-10-02 00:20:52.826318', 'step': 5582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:52.880621', 'step': 5582, 'epoch': 1}
{'type': 'loss', 'content': 0.15570662915706635, 'timestamp': '2025-10-02 00:20:52.884456', 'step': 5583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:52.941339', 'step': 5583, 'epoch': 1}
{'type': 'loss', 'content': 0.12237917631864548, 'timestamp': '2025-10-02 00:20:52.947341', 'step': 5584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:53.004533', 'step': 5584, 'epoch': 1}
{'type': 'loss', 'content': 0.10136555880308151, 'timestamp': '2025-10-02 00:20:53.007597', 'step': 5585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:20:53.062807', 'step': 5585, 'epoch': 1}
{'type': 'loss', 'content': 0.06421788036823273, 'timestamp': '2025-10-02 00:20:53.068229', 'step': 5586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:53.123447', 'step': 5586, 'epoch': 1}
{'type': 'loss', 'content': 0.12516306340694427, 'timestamp': '2025-10-02 00:20:53.127336', 'step': 5587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:53.183488', 'step': 5587, 'epoch': 1}
{'type': 'loss', 'content': 0.13298511505126953, 'timestamp': '2025-10-02 00:20:53.191676', 'step': 5588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:20:53.251639', 'step': 5588, 'epoch': 1}
{'type': 'loss', 'content': 0.20245882868766785, 'timestamp': '2025-10-02 00:20:53.254759', 'step': 5589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:53.311225', 'step': 5589, 'epoch': 1}
{'type': 'loss', 'content': 0.07766222208738327, 'timestamp': '2025-10-02 00:20:53.317240', 'step': 5590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:53.373742', 'step': 5590, 'epoch': 1}
{'type': 'loss', 'content': 0.10802536457777023, 'timestamp': '2025-10-02 00:20:53.378131', 'step': 5591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:53.443887', 'step': 5591, 'epoch': 1}
{'type': 'loss', 'content': 0.047240450978279114, 'timestamp': '2025-10-02 00:20:53.454144', 'step': 5592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:53.519264', 'step': 5592, 'epoch': 1}
{'type': 'loss', 'content': 0.16187502443790436, 'timestamp': '2025-10-02 00:20:53.521934', 'step': 5593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:53.576570', 'step': 5593, 'epoch': 1}
{'type': 'loss', 'content': 0.1935158520936966, 'timestamp': '2025-10-02 00:20:53.578639', 'step': 5594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:53.632722', 'step': 5594, 'epoch': 1}
{'type': 'loss', 'content': 0.21563617885112762, 'timestamp': '2025-10-02 00:20:53.634749', 'step': 5595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:20:53.696551', 'step': 5595, 'epoch': 1}
{'type': 'loss', 'content': 0.04342057555913925, 'timestamp': '2025-10-02 00:20:53.708111', 'step': 5596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:53.761675', 'step': 5596, 'epoch': 1}
{'type': 'loss', 'content': 0.08578076213598251, 'timestamp': '2025-10-02 00:20:53.770903', 'step': 5597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:53.825757', 'step': 5597, 'epoch': 1}
{'type': 'loss', 'content': 0.15404444932937622, 'timestamp': '2025-10-02 00:20:53.828055', 'step': 5598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:53.882278', 'step': 5598, 'epoch': 1}
{'type': 'loss', 'content': 0.029503123834729195, 'timestamp': '2025-10-02 00:20:53.884584', 'step': 5599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:53.943319', 'step': 5599, 'epoch': 1}
{'type': 'loss', 'content': 0.01845318078994751, 'timestamp': '2025-10-02 00:20:53.954283', 'step': 5600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:54.007823', 'step': 5600, 'epoch': 1}
{'type': 'loss', 'content': 0.10652732849121094, 'timestamp': '2025-10-02 00:20:54.010202', 'step': 5601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:54.064314', 'step': 5601, 'epoch': 1}
{'type': 'loss', 'content': 0.040157634764909744, 'timestamp': '2025-10-02 00:20:54.066672', 'step': 5602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:54.123376', 'step': 5602, 'epoch': 1}
{'type': 'loss', 'content': 0.08140476047992706, 'timestamp': '2025-10-02 00:20:54.130742', 'step': 5603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:54.185632', 'step': 5603, 'epoch': 1}
{'type': 'loss', 'content': 0.019976314157247543, 'timestamp': '2025-10-02 00:20:54.191443', 'step': 5604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:54.245149', 'step': 5604, 'epoch': 1}
{'type': 'loss', 'content': 0.16076406836509705, 'timestamp': '2025-10-02 00:20:54.247814', 'step': 5605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:54.302646', 'step': 5605, 'epoch': 1}
{'type': 'loss', 'content': 0.10045398026704788, 'timestamp': '2025-10-02 00:20:54.308536', 'step': 5606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:54.370654', 'step': 5606, 'epoch': 1}
{'type': 'loss', 'content': 0.12925474345684052, 'timestamp': '2025-10-02 00:20:54.372915', 'step': 5607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:54.426914', 'step': 5607, 'epoch': 1}
{'type': 'loss', 'content': 0.1193319708108902, 'timestamp': '2025-10-02 00:20:54.432986', 'step': 5608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:54.487588', 'step': 5608, 'epoch': 1}
{'type': 'loss', 'content': 0.11271566152572632, 'timestamp': '2025-10-02 00:20:54.490029', 'step': 5609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:54.549827', 'step': 5609, 'epoch': 1}
{'type': 'loss', 'content': 0.05275708809494972, 'timestamp': '2025-10-02 00:20:54.560050', 'step': 5610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:54.613683', 'step': 5610, 'epoch': 1}
{'type': 'loss', 'content': 0.16053301095962524, 'timestamp': '2025-10-02 00:20:54.616388', 'step': 5611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:54.672322', 'step': 5611, 'epoch': 1}
{'type': 'loss', 'content': 0.04811323434114456, 'timestamp': '2025-10-02 00:20:54.680646', 'step': 5612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:54.736054', 'step': 5612, 'epoch': 1}
{'type': 'loss', 'content': 0.04507816582918167, 'timestamp': '2025-10-02 00:20:54.746289', 'step': 5613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:54.800482', 'step': 5613, 'epoch': 1}
{'type': 'loss', 'content': 0.2169194519519806, 'timestamp': '2025-10-02 00:20:54.802738', 'step': 5614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:54.856892', 'step': 5614, 'epoch': 1}
{'type': 'loss', 'content': 0.15325923264026642, 'timestamp': '2025-10-02 00:20:54.859252', 'step': 5615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:54.917670', 'step': 5615, 'epoch': 1}
{'type': 'loss', 'content': 0.059230826795101166, 'timestamp': '2025-10-02 00:20:54.928788', 'step': 5616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:54.982288', 'step': 5616, 'epoch': 1}
{'type': 'loss', 'content': 0.1235666275024414, 'timestamp': '2025-10-02 00:20:54.984406', 'step': 5617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:55.039054', 'step': 5617, 'epoch': 1}
{'type': 'loss', 'content': 0.17932645976543427, 'timestamp': '2025-10-02 00:20:55.041393', 'step': 5618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:55.098340', 'step': 5618, 'epoch': 1}
{'type': 'loss', 'content': 0.04119568690657616, 'timestamp': '2025-10-02 00:20:55.107819', 'step': 5619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:55.163818', 'step': 5619, 'epoch': 1}
{'type': 'loss', 'content': 0.08268065005540848, 'timestamp': '2025-10-02 00:20:55.169903', 'step': 5620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:55.223833', 'step': 5620, 'epoch': 1}
{'type': 'loss', 'content': 0.02754971943795681, 'timestamp': '2025-10-02 00:20:55.226723', 'step': 5621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:55.282989', 'step': 5621, 'epoch': 1}
{'type': 'loss', 'content': 0.09706934541463852, 'timestamp': '2025-10-02 00:20:55.285500', 'step': 5622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:55.341491', 'step': 5622, 'epoch': 1}
{'type': 'loss', 'content': 0.1537531316280365, 'timestamp': '2025-10-02 00:20:55.344287', 'step': 5623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:55.398336', 'step': 5623, 'epoch': 1}
{'type': 'loss', 'content': 0.10866708308458328, 'timestamp': '2025-10-02 00:20:55.404593', 'step': 5624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:55.460304', 'step': 5624, 'epoch': 1}
{'type': 'loss', 'content': 0.1857934147119522, 'timestamp': '2025-10-02 00:20:55.463165', 'step': 5625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:55.517328', 'step': 5625, 'epoch': 1}
{'type': 'loss', 'content': 0.12206023186445236, 'timestamp': '2025-10-02 00:20:55.520377', 'step': 5626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:55.575979', 'step': 5626, 'epoch': 1}
{'type': 'loss', 'content': 0.02836369164288044, 'timestamp': '2025-10-02 00:20:55.578461', 'step': 5627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:55.632897', 'step': 5627, 'epoch': 1}
{'type': 'loss', 'content': 0.2528594732284546, 'timestamp': '2025-10-02 00:20:55.638736', 'step': 5628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:55.693374', 'step': 5628, 'epoch': 1}
{'type': 'loss', 'content': 0.03626227006316185, 'timestamp': '2025-10-02 00:20:55.702823', 'step': 5629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:55.758011', 'step': 5629, 'epoch': 1}
{'type': 'loss', 'content': 0.10300926119089127, 'timestamp': '2025-10-02 00:20:55.760538', 'step': 5630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:55.815314', 'step': 5630, 'epoch': 1}
{'type': 'loss', 'content': 0.11846819519996643, 'timestamp': '2025-10-02 00:20:55.818148', 'step': 5631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:55.872893', 'step': 5631, 'epoch': 1}
{'type': 'loss', 'content': 0.10697370022535324, 'timestamp': '2025-10-02 00:20:55.879163', 'step': 5632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:55.933379', 'step': 5632, 'epoch': 1}
{'type': 'loss', 'content': 0.05321017652750015, 'timestamp': '2025-10-02 00:20:55.939414', 'step': 5633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:55.993288', 'step': 5633, 'epoch': 1}
{'type': 'loss', 'content': 0.05063467472791672, 'timestamp': '2025-10-02 00:20:55.995845', 'step': 5634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:56.051150', 'step': 5634, 'epoch': 1}
{'type': 'loss', 'content': 0.04062823951244354, 'timestamp': '2025-10-02 00:20:56.054287', 'step': 5635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:56.109357', 'step': 5635, 'epoch': 1}
{'type': 'loss', 'content': 0.08479811996221542, 'timestamp': '2025-10-02 00:20:56.115397', 'step': 5636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:56.168461', 'step': 5636, 'epoch': 1}
{'type': 'loss', 'content': 0.1733826845884323, 'timestamp': '2025-10-02 00:20:56.170906', 'step': 5637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:56.234961', 'step': 5637, 'epoch': 1}
{'type': 'loss', 'content': 0.01628759130835533, 'timestamp': '2025-10-02 00:20:56.245466', 'step': 5638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:56.302033', 'step': 5638, 'epoch': 1}
{'type': 'loss', 'content': 0.030855974182486534, 'timestamp': '2025-10-02 00:20:56.309448', 'step': 5639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:56.363999', 'step': 5639, 'epoch': 1}
{'type': 'loss', 'content': 0.07940211892127991, 'timestamp': '2025-10-02 00:20:56.370859', 'step': 5640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:56.430985', 'step': 5640, 'epoch': 1}
{'type': 'loss', 'content': 0.021701494231820107, 'timestamp': '2025-10-02 00:20:56.442350', 'step': 5641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:56.498084', 'step': 5641, 'epoch': 1}
{'type': 'loss', 'content': 0.08178283274173737, 'timestamp': '2025-10-02 00:20:56.500821', 'step': 5642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:56.554966', 'step': 5642, 'epoch': 1}
{'type': 'loss', 'content': 0.1500633805990219, 'timestamp': '2025-10-02 00:20:56.558291', 'step': 5643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:56.614037', 'step': 5643, 'epoch': 1}
{'type': 'loss', 'content': 0.11128215491771698, 'timestamp': '2025-10-02 00:20:56.619847', 'step': 5644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:56.673217', 'step': 5644, 'epoch': 1}
{'type': 'loss', 'content': 0.08837302774190903, 'timestamp': '2025-10-02 00:20:56.679175', 'step': 5645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:56.733218', 'step': 5645, 'epoch': 1}
{'type': 'loss', 'content': 0.051391251385211945, 'timestamp': '2025-10-02 00:20:56.735827', 'step': 5646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:56.789953', 'step': 5646, 'epoch': 1}
{'type': 'loss', 'content': 0.18754903972148895, 'timestamp': '2025-10-02 00:20:56.792337', 'step': 5647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:56.846452', 'step': 5647, 'epoch': 1}
{'type': 'loss', 'content': 0.23428748548030853, 'timestamp': '2025-10-02 00:20:56.852361', 'step': 5648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:56.906926', 'step': 5648, 'epoch': 1}
{'type': 'loss', 'content': 0.03316435217857361, 'timestamp': '2025-10-02 00:20:56.916663', 'step': 5649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:56.970478', 'step': 5649, 'epoch': 1}
{'type': 'loss', 'content': 0.18233220279216766, 'timestamp': '2025-10-02 00:20:56.973003', 'step': 5650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:57.027791', 'step': 5650, 'epoch': 1}
{'type': 'loss', 'content': 0.0955730751156807, 'timestamp': '2025-10-02 00:20:57.030374', 'step': 5651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:20:57.084139', 'step': 5651, 'epoch': 1}
{'type': 'loss', 'content': 0.1288549304008484, 'timestamp': '2025-10-02 00:20:57.089992', 'step': 5652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:57.145160', 'step': 5652, 'epoch': 1}
{'type': 'loss', 'content': 0.14950156211853027, 'timestamp': '2025-10-02 00:20:57.147450', 'step': 5653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:57.210145', 'step': 5653, 'epoch': 1}
{'type': 'loss', 'content': 0.07586012035608292, 'timestamp': '2025-10-02 00:20:57.220635', 'step': 5654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:57.276027', 'step': 5654, 'epoch': 1}
{'type': 'loss', 'content': 0.027460774406790733, 'timestamp': '2025-10-02 00:20:57.278339', 'step': 5655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:20:57.332719', 'step': 5655, 'epoch': 1}
{'type': 'loss', 'content': 0.17425653338432312, 'timestamp': '2025-10-02 00:20:57.338658', 'step': 5656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:57.392101', 'step': 5656, 'epoch': 1}
{'type': 'loss', 'content': 0.06500625610351562, 'timestamp': '2025-10-02 00:20:57.394278', 'step': 5657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:57.447636', 'step': 5657, 'epoch': 1}
{'type': 'loss', 'content': 0.21340857446193695, 'timestamp': '2025-10-02 00:20:57.450629', 'step': 5658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:57.505698', 'step': 5658, 'epoch': 1}
{'type': 'loss', 'content': 0.07436204701662064, 'timestamp': '2025-10-02 00:20:57.514894', 'step': 5659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:20:57.569823', 'step': 5659, 'epoch': 1}
{'type': 'loss', 'content': 0.07170027494430542, 'timestamp': '2025-10-02 00:20:57.576387', 'step': 5660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:57.631400', 'step': 5660, 'epoch': 1}
{'type': 'loss', 'content': 0.11749575287103653, 'timestamp': '2025-10-02 00:20:57.633887', 'step': 5661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:57.690112', 'step': 5661, 'epoch': 1}
{'type': 'loss', 'content': 0.021047769114375114, 'timestamp': '2025-10-02 00:20:57.697678', 'step': 5662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:57.752168', 'step': 5662, 'epoch': 1}
{'type': 'loss', 'content': 0.09387005865573883, 'timestamp': '2025-10-02 00:20:57.761383', 'step': 5663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:57.815562', 'step': 5663, 'epoch': 1}
{'type': 'loss', 'content': 0.0681404247879982, 'timestamp': '2025-10-02 00:20:57.823781', 'step': 5664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:57.878291', 'step': 5664, 'epoch': 1}
{'type': 'loss', 'content': 0.03784555196762085, 'timestamp': '2025-10-02 00:20:57.880880', 'step': 5665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:57.935201', 'step': 5665, 'epoch': 1}
{'type': 'loss', 'content': 0.17550508677959442, 'timestamp': '2025-10-02 00:20:57.937381', 'step': 5666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:20:57.991404', 'step': 5666, 'epoch': 1}
{'type': 'loss', 'content': 0.055618733167648315, 'timestamp': '2025-10-02 00:20:58.000776', 'step': 5667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:58.055147', 'step': 5667, 'epoch': 1}
{'type': 'loss', 'content': 0.1950494945049286, 'timestamp': '2025-10-02 00:20:58.061157', 'step': 5668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:58.114895', 'step': 5668, 'epoch': 1}
{'type': 'loss', 'content': 0.06572213023900986, 'timestamp': '2025-10-02 00:20:58.117122', 'step': 5669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:58.173397', 'step': 5669, 'epoch': 1}
{'type': 'loss', 'content': 0.03387736156582832, 'timestamp': '2025-10-02 00:20:58.183086', 'step': 5670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:58.237588', 'step': 5670, 'epoch': 1}
{'type': 'loss', 'content': 0.07838636636734009, 'timestamp': '2025-10-02 00:20:58.245142', 'step': 5671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:20:58.299197', 'step': 5671, 'epoch': 1}
{'type': 'loss', 'content': 0.1907166689634323, 'timestamp': '2025-10-02 00:20:58.304861', 'step': 5672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:58.365416', 'step': 5672, 'epoch': 1}
{'type': 'loss', 'content': 0.03435596078634262, 'timestamp': '2025-10-02 00:20:58.376798', 'step': 5673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:20:58.430452', 'step': 5673, 'epoch': 1}
{'type': 'loss', 'content': 0.16809900104999542, 'timestamp': '2025-10-02 00:20:58.433514', 'step': 5674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:58.487797', 'step': 5674, 'epoch': 1}
{'type': 'loss', 'content': 0.10329818725585938, 'timestamp': '2025-10-02 00:20:58.490224', 'step': 5675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:20:58.545098', 'step': 5675, 'epoch': 1}
{'type': 'loss', 'content': 0.18959088623523712, 'timestamp': '2025-10-02 00:20:58.550932', 'step': 5676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:20:58.609305', 'step': 5676, 'epoch': 1}
{'type': 'loss', 'content': 0.0622982457280159, 'timestamp': '2025-10-02 00:20:58.620269', 'step': 5677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:20:58.673911', 'step': 5677, 'epoch': 1}
{'type': 'loss', 'content': 0.1638728231191635, 'timestamp': '2025-10-02 00:20:58.676505', 'step': 5678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:20:58.738322', 'step': 5678, 'epoch': 1}
{'type': 'loss', 'content': 0.040688011795282364, 'timestamp': '2025-10-02 00:20:58.748975', 'step': 5679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:58.803782', 'step': 5679, 'epoch': 1}
{'type': 'loss', 'content': 0.09156792610883713, 'timestamp': '2025-10-02 00:20:58.809767', 'step': 5680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:58.863215', 'step': 5680, 'epoch': 1}
{'type': 'loss', 'content': 0.09677408635616302, 'timestamp': '2025-10-02 00:20:58.865944', 'step': 5681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:58.921213', 'step': 5681, 'epoch': 1}
{'type': 'loss', 'content': 0.19478417932987213, 'timestamp': '2025-10-02 00:20:58.923979', 'step': 5682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:58.979138', 'step': 5682, 'epoch': 1}
{'type': 'loss', 'content': 0.13627488911151886, 'timestamp': '2025-10-02 00:20:58.981627', 'step': 5683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:59.035655', 'step': 5683, 'epoch': 1}
{'type': 'loss', 'content': 0.07776641100645065, 'timestamp': '2025-10-02 00:20:59.041678', 'step': 5684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:20:59.102005', 'step': 5684, 'epoch': 1}
{'type': 'loss', 'content': 0.014171351678669453, 'timestamp': '2025-10-02 00:20:59.113528', 'step': 5685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:20:59.175297', 'step': 5685, 'epoch': 1}
{'type': 'loss', 'content': 0.035207007080316544, 'timestamp': '2025-10-02 00:20:59.185782', 'step': 5686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:20:59.255454', 'step': 5686, 'epoch': 1}
{'type': 'loss', 'content': 0.07913678884506226, 'timestamp': '2025-10-02 00:20:59.266163', 'step': 5687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:20:59.320492', 'step': 5687, 'epoch': 1}
{'type': 'loss', 'content': 0.1077946275472641, 'timestamp': '2025-10-02 00:20:59.327307', 'step': 5688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:59.380707', 'step': 5688, 'epoch': 1}
{'type': 'loss', 'content': 0.21732506155967712, 'timestamp': '2025-10-02 00:20:59.383230', 'step': 5689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:59.437366', 'step': 5689, 'epoch': 1}
{'type': 'loss', 'content': 0.02509247697889805, 'timestamp': '2025-10-02 00:20:59.439893', 'step': 5690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:20:59.494626', 'step': 5690, 'epoch': 1}
{'type': 'loss', 'content': 0.1936933398246765, 'timestamp': '2025-10-02 00:20:59.497393', 'step': 5691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:20:59.552102', 'step': 5691, 'epoch': 1}
{'type': 'loss', 'content': 0.10065796226263046, 'timestamp': '2025-10-02 00:20:59.558431', 'step': 5692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:59.612557', 'step': 5692, 'epoch': 1}
{'type': 'loss', 'content': 0.013138129375874996, 'timestamp': '2025-10-02 00:20:59.620264', 'step': 5693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:59.675664', 'step': 5693, 'epoch': 1}
{'type': 'loss', 'content': 0.04452456161379814, 'timestamp': '2025-10-02 00:20:59.683166', 'step': 5694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:20:59.737761', 'step': 5694, 'epoch': 1}
{'type': 'loss', 'content': 0.10924045741558075, 'timestamp': '2025-10-02 00:20:59.740525', 'step': 5695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:20:59.797768', 'step': 5695, 'epoch': 1}
{'type': 'loss', 'content': 0.10215634107589722, 'timestamp': '2025-10-02 00:20:59.805003', 'step': 5696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:20:59.860441', 'step': 5696, 'epoch': 1}
{'type': 'loss', 'content': 0.019201336428523064, 'timestamp': '2025-10-02 00:20:59.868091', 'step': 5697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:20:59.926045', 'step': 5697, 'epoch': 1}
{'type': 'loss', 'content': 0.04993896931409836, 'timestamp': '2025-10-02 00:20:59.935601', 'step': 5698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:20:59.992614', 'step': 5698, 'epoch': 1}
{'type': 'loss', 'content': 0.09766676276922226, 'timestamp': '2025-10-02 00:20:59.995656', 'step': 5699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:00.052121', 'step': 5699, 'epoch': 1}
{'type': 'loss', 'content': 0.0793425664305687, 'timestamp': '2025-10-02 00:21:00.061552', 'step': 5700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:21:00.127328', 'step': 5700, 'epoch': 1}
{'type': 'loss', 'content': 0.0821755900979042, 'timestamp': '2025-10-02 00:21:00.139106', 'step': 5701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:00.195273', 'step': 5701, 'epoch': 1}
{'type': 'loss', 'content': 0.1110374853014946, 'timestamp': '2025-10-02 00:21:00.198429', 'step': 5702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:00.254826', 'step': 5702, 'epoch': 1}
{'type': 'loss', 'content': 0.05497261881828308, 'timestamp': '2025-10-02 00:21:00.260662', 'step': 5703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:00.317263', 'step': 5703, 'epoch': 1}
{'type': 'loss', 'content': 0.151390939950943, 'timestamp': '2025-10-02 00:21:00.324620', 'step': 5704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:21:00.395470', 'step': 5704, 'epoch': 1}
{'type': 'loss', 'content': 0.04667048156261444, 'timestamp': '2025-10-02 00:21:00.408803', 'step': 5705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:00.472639', 'step': 5705, 'epoch': 1}
{'type': 'loss', 'content': 0.022717250511050224, 'timestamp': '2025-10-02 00:21:00.483137', 'step': 5706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:00.539831', 'step': 5706, 'epoch': 1}
{'type': 'loss', 'content': 0.18071617186069489, 'timestamp': '2025-10-02 00:21:00.543942', 'step': 5707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:00.600251', 'step': 5707, 'epoch': 1}
{'type': 'loss', 'content': 0.07714688032865524, 'timestamp': '2025-10-02 00:21:00.607440', 'step': 5708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:00.662962', 'step': 5708, 'epoch': 1}
{'type': 'loss', 'content': 0.08030082285404205, 'timestamp': '2025-10-02 00:21:00.669059', 'step': 5709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:00.726139', 'step': 5709, 'epoch': 1}
{'type': 'loss', 'content': 0.08096127212047577, 'timestamp': '2025-10-02 00:21:00.735686', 'step': 5710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:00.791854', 'step': 5710, 'epoch': 1}
{'type': 'loss', 'content': 0.19386936724185944, 'timestamp': '2025-10-02 00:21:00.795001', 'step': 5711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:00.852261', 'step': 5711, 'epoch': 1}
{'type': 'loss', 'content': 0.1624917834997177, 'timestamp': '2025-10-02 00:21:00.858952', 'step': 5712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:00.917187', 'step': 5712, 'epoch': 1}
{'type': 'loss', 'content': 0.11465802788734436, 'timestamp': '2025-10-02 00:21:00.924771', 'step': 5713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:00.982588', 'step': 5713, 'epoch': 1}
{'type': 'loss', 'content': 0.1251155138015747, 'timestamp': '2025-10-02 00:21:00.991855', 'step': 5714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:01.048464', 'step': 5714, 'epoch': 1}
{'type': 'loss', 'content': 0.03318609669804573, 'timestamp': '2025-10-02 00:21:01.057825', 'step': 5715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:01.114504', 'step': 5715, 'epoch': 1}
{'type': 'loss', 'content': 0.038070399314165115, 'timestamp': '2025-10-02 00:21:01.124861', 'step': 5716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:01.180258', 'step': 5716, 'epoch': 1}
{'type': 'loss', 'content': 0.2388242781162262, 'timestamp': '2025-10-02 00:21:01.183041', 'step': 5717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:01.241620', 'step': 5717, 'epoch': 1}
{'type': 'loss', 'content': 0.24534568190574646, 'timestamp': '2025-10-02 00:21:01.245090', 'step': 5718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:01.301478', 'step': 5718, 'epoch': 1}
{'type': 'loss', 'content': 0.12109831720590591, 'timestamp': '2025-10-02 00:21:01.304486', 'step': 5719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:01.360600', 'step': 5719, 'epoch': 1}
{'type': 'loss', 'content': 0.05721704289317131, 'timestamp': '2025-10-02 00:21:01.368694', 'step': 5720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:01.423192', 'step': 5720, 'epoch': 1}
{'type': 'loss', 'content': 0.11828560382127762, 'timestamp': '2025-10-02 00:21:01.425751', 'step': 5721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:01.480283', 'step': 5721, 'epoch': 1}
{'type': 'loss', 'content': 0.06398338824510574, 'timestamp': '2025-10-02 00:21:01.482339', 'step': 5722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:01.536341', 'step': 5722, 'epoch': 1}
{'type': 'loss', 'content': 0.09405524283647537, 'timestamp': '2025-10-02 00:21:01.538893', 'step': 5723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:01.592804', 'step': 5723, 'epoch': 1}
{'type': 'loss', 'content': 0.04746810719370842, 'timestamp': '2025-10-02 00:21:01.598806', 'step': 5724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:01.651277', 'step': 5724, 'epoch': 1}
{'type': 'loss', 'content': 0.17320463061332703, 'timestamp': '2025-10-02 00:21:01.654102', 'step': 5725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:21:01.724333', 'step': 5725, 'epoch': 1}
{'type': 'loss', 'content': 0.025410886853933334, 'timestamp': '2025-10-02 00:21:01.736837', 'step': 5726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:01.791852', 'step': 5726, 'epoch': 1}
{'type': 'loss', 'content': 0.08308401703834534, 'timestamp': '2025-10-02 00:21:01.794319', 'step': 5727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:21:01.856731', 'step': 5727, 'epoch': 1}
{'type': 'loss', 'content': 0.09320830553770065, 'timestamp': '2025-10-02 00:21:01.868362', 'step': 5728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:01.922262', 'step': 5728, 'epoch': 1}
{'type': 'loss', 'content': 0.024908816441893578, 'timestamp': '2025-10-02 00:21:01.928237', 'step': 5729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:01.982179', 'step': 5729, 'epoch': 1}
{'type': 'loss', 'content': 0.15682217478752136, 'timestamp': '2025-10-02 00:21:01.984661', 'step': 5730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:02.038803', 'step': 5730, 'epoch': 1}
{'type': 'loss', 'content': 0.14124277234077454, 'timestamp': '2025-10-02 00:21:02.041482', 'step': 5731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:21:02.096104', 'step': 5731, 'epoch': 1}
{'type': 'loss', 'content': 0.05968707799911499, 'timestamp': '2025-10-02 00:21:02.102360', 'step': 5732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:02.157021', 'step': 5732, 'epoch': 1}
{'type': 'loss', 'content': 0.2788873016834259, 'timestamp': '2025-10-02 00:21:02.159741', 'step': 5733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:02.215192', 'step': 5733, 'epoch': 1}
{'type': 'loss', 'content': 0.0346124991774559, 'timestamp': '2025-10-02 00:21:02.224748', 'step': 5734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:02.279489', 'step': 5734, 'epoch': 1}
{'type': 'loss', 'content': 0.1254960596561432, 'timestamp': '2025-10-02 00:21:02.282965', 'step': 5735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:02.337768', 'step': 5735, 'epoch': 1}
{'type': 'loss', 'content': 0.07430849969387054, 'timestamp': '2025-10-02 00:21:02.343779', 'step': 5736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:02.398275', 'step': 5736, 'epoch': 1}
{'type': 'loss', 'content': 0.02785484492778778, 'timestamp': '2025-10-02 00:21:02.405862', 'step': 5737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:02.467859', 'step': 5737, 'epoch': 1}
{'type': 'loss', 'content': 0.029724745079874992, 'timestamp': '2025-10-02 00:21:02.478379', 'step': 5738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:02.533133', 'step': 5738, 'epoch': 1}
{'type': 'loss', 'content': 0.24097904562950134, 'timestamp': '2025-10-02 00:21:02.535511', 'step': 5739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:02.590776', 'step': 5739, 'epoch': 1}
{'type': 'loss', 'content': 0.05305902659893036, 'timestamp': '2025-10-02 00:21:02.597184', 'step': 5740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:02.650826', 'step': 5740, 'epoch': 1}
{'type': 'loss', 'content': 0.22308260202407837, 'timestamp': '2025-10-02 00:21:02.653424', 'step': 5741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:02.708925', 'step': 5741, 'epoch': 1}
{'type': 'loss', 'content': 0.04003577306866646, 'timestamp': '2025-10-02 00:21:02.711643', 'step': 5742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:02.766247', 'step': 5742, 'epoch': 1}
{'type': 'loss', 'content': 0.17747358977794647, 'timestamp': '2025-10-02 00:21:02.768745', 'step': 5743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:02.827648', 'step': 5743, 'epoch': 1}
{'type': 'loss', 'content': 0.038470130413770676, 'timestamp': '2025-10-02 00:21:02.838634', 'step': 5744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:02.897406', 'step': 5744, 'epoch': 1}
{'type': 'loss', 'content': 0.07238195836544037, 'timestamp': '2025-10-02 00:21:02.900319', 'step': 5745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:02.954723', 'step': 5745, 'epoch': 1}
{'type': 'loss', 'content': 0.1325022131204605, 'timestamp': '2025-10-02 00:21:02.957025', 'step': 5746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:03.011553', 'step': 5746, 'epoch': 1}
{'type': 'loss', 'content': 0.15334729850292206, 'timestamp': '2025-10-02 00:21:03.013614', 'step': 5747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:03.070182', 'step': 5747, 'epoch': 1}
{'type': 'loss', 'content': 0.10442820936441422, 'timestamp': '2025-10-02 00:21:03.076167', 'step': 5748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:03.133180', 'step': 5748, 'epoch': 1}
{'type': 'loss', 'content': 0.12016702443361282, 'timestamp': '2025-10-02 00:21:03.135693', 'step': 5749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:03.190353', 'step': 5749, 'epoch': 1}
{'type': 'loss', 'content': 0.09315864741802216, 'timestamp': '2025-10-02 00:21:03.192642', 'step': 5750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:03.251991', 'step': 5750, 'epoch': 1}
{'type': 'loss', 'content': 0.0253205094486475, 'timestamp': '2025-10-02 00:21:03.254931', 'step': 5751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:03.312617', 'step': 5751, 'epoch': 1}
{'type': 'loss', 'content': 0.16699092090129852, 'timestamp': '2025-10-02 00:21:03.318806', 'step': 5752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:03.373848', 'step': 5752, 'epoch': 1}
{'type': 'loss', 'content': 0.10733991861343384, 'timestamp': '2025-10-02 00:21:03.376376', 'step': 5753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:21:03.430160', 'step': 5753, 'epoch': 1}
{'type': 'loss', 'content': 0.16704808175563812, 'timestamp': '2025-10-02 00:21:03.435055', 'step': 5754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:03.490395', 'step': 5754, 'epoch': 1}
{'type': 'loss', 'content': 0.030072521418333054, 'timestamp': '2025-10-02 00:21:03.492859', 'step': 5755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:03.548189', 'step': 5755, 'epoch': 1}
{'type': 'loss', 'content': 0.05604550614953041, 'timestamp': '2025-10-02 00:21:03.554679', 'step': 5756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:03.608513', 'step': 5756, 'epoch': 1}
{'type': 'loss', 'content': 0.11562240123748779, 'timestamp': '2025-10-02 00:21:03.610877', 'step': 5757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:03.665349', 'step': 5757, 'epoch': 1}
{'type': 'loss', 'content': 0.11646044254302979, 'timestamp': '2025-10-02 00:21:03.667693', 'step': 5758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:03.729088', 'step': 5758, 'epoch': 1}
{'type': 'loss', 'content': 0.029708221554756165, 'timestamp': '2025-10-02 00:21:03.731420', 'step': 5759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:03.785449', 'step': 5759, 'epoch': 1}
{'type': 'loss', 'content': 0.11181917786598206, 'timestamp': '2025-10-02 00:21:03.790883', 'step': 5760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:03.846158', 'step': 5760, 'epoch': 1}
{'type': 'loss', 'content': 0.19060736894607544, 'timestamp': '2025-10-02 00:21:03.856241', 'step': 5761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:03.916353', 'step': 5761, 'epoch': 1}
{'type': 'loss', 'content': 0.0571153350174427, 'timestamp': '2025-10-02 00:21:03.926577', 'step': 5762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:03.982675', 'step': 5762, 'epoch': 1}
{'type': 'loss', 'content': 0.0036498713307082653, 'timestamp': '2025-10-02 00:21:03.989971', 'step': 5763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:21:04.065027', 'step': 5763, 'epoch': 1}
{'type': 'loss', 'content': 0.030317625030875206, 'timestamp': '2025-10-02 00:21:04.076632', 'step': 5764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:04.131258', 'step': 5764, 'epoch': 1}
{'type': 'loss', 'content': 0.06374510377645493, 'timestamp': '2025-10-02 00:21:04.141488', 'step': 5765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:04.197166', 'step': 5765, 'epoch': 1}
{'type': 'loss', 'content': 0.05886392667889595, 'timestamp': '2025-10-02 00:21:04.200480', 'step': 5766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:04.258364', 'step': 5766, 'epoch': 1}
{'type': 'loss', 'content': 0.07318108528852463, 'timestamp': '2025-10-02 00:21:04.265773', 'step': 5767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:04.320848', 'step': 5767, 'epoch': 1}
{'type': 'loss', 'content': 0.08226951211690903, 'timestamp': '2025-10-02 00:21:04.327607', 'step': 5768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:04.385165', 'step': 5768, 'epoch': 1}
{'type': 'loss', 'content': 0.08109728991985321, 'timestamp': '2025-10-02 00:21:04.396149', 'step': 5769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:21:04.461647', 'step': 5769, 'epoch': 1}
{'type': 'loss', 'content': 0.02537783421576023, 'timestamp': '2025-10-02 00:21:04.472462', 'step': 5770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:04.527903', 'step': 5770, 'epoch': 1}
{'type': 'loss', 'content': 0.06288150697946548, 'timestamp': '2025-10-02 00:21:04.530451', 'step': 5771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:04.585321', 'step': 5771, 'epoch': 1}
{'type': 'loss', 'content': 0.04998166859149933, 'timestamp': '2025-10-02 00:21:04.591701', 'step': 5772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:04.645434', 'step': 5772, 'epoch': 1}
{'type': 'loss', 'content': 0.29964131116867065, 'timestamp': '2025-10-02 00:21:04.648243', 'step': 5773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:04.703380', 'step': 5773, 'epoch': 1}
{'type': 'loss', 'content': 0.04210895672440529, 'timestamp': '2025-10-02 00:21:04.705900', 'step': 5774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:04.761740', 'step': 5774, 'epoch': 1}
{'type': 'loss', 'content': 0.10681784152984619, 'timestamp': '2025-10-02 00:21:04.764466', 'step': 5775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:04.818691', 'step': 5775, 'epoch': 1}
{'type': 'loss', 'content': 0.041475310921669006, 'timestamp': '2025-10-02 00:21:04.829174', 'step': 5776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:04.895569', 'step': 5776, 'epoch': 1}
{'type': 'loss', 'content': 0.04911110922694206, 'timestamp': '2025-10-02 00:21:04.901462', 'step': 5777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:04.961475', 'step': 5777, 'epoch': 1}
{'type': 'loss', 'content': 0.04685744270682335, 'timestamp': '2025-10-02 00:21:04.968830', 'step': 5778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:05.024877', 'step': 5778, 'epoch': 1}
{'type': 'loss', 'content': 0.06577442586421967, 'timestamp': '2025-10-02 00:21:05.030729', 'step': 5779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:05.086445', 'step': 5779, 'epoch': 1}
{'type': 'loss', 'content': 0.03804055601358414, 'timestamp': '2025-10-02 00:21:05.102788', 'step': 5780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:05.158437', 'step': 5780, 'epoch': 1}
{'type': 'loss', 'content': 0.04984355717897415, 'timestamp': '2025-10-02 00:21:05.160831', 'step': 5781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:05.215913', 'step': 5781, 'epoch': 1}
{'type': 'loss', 'content': 0.21614757180213928, 'timestamp': '2025-10-02 00:21:05.218606', 'step': 5782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:05.274344', 'step': 5782, 'epoch': 1}
{'type': 'loss', 'content': 0.16364777088165283, 'timestamp': '2025-10-02 00:21:05.277118', 'step': 5783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:05.339108', 'step': 5783, 'epoch': 1}
{'type': 'loss', 'content': 0.2044728547334671, 'timestamp': '2025-10-02 00:21:05.345405', 'step': 5784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:05.399701', 'step': 5784, 'epoch': 1}
{'type': 'loss', 'content': 0.13427598774433136, 'timestamp': '2025-10-02 00:21:05.402040', 'step': 5785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:05.456055', 'step': 5785, 'epoch': 1}
{'type': 'loss', 'content': 0.16944704949855804, 'timestamp': '2025-10-02 00:21:05.458433', 'step': 5786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:21:05.539481', 'step': 5786, 'epoch': 1}
{'type': 'loss', 'content': 0.0309356190264225, 'timestamp': '2025-10-02 00:21:05.553282', 'step': 5787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:05.608396', 'step': 5787, 'epoch': 1}
{'type': 'loss', 'content': 0.03157258778810501, 'timestamp': '2025-10-02 00:21:05.614405', 'step': 5788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:05.668464', 'step': 5788, 'epoch': 1}
{'type': 'loss', 'content': 0.050565317273139954, 'timestamp': '2025-10-02 00:21:05.676805', 'step': 5789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:05.736388', 'step': 5789, 'epoch': 1}
{'type': 'loss', 'content': 0.05783546715974808, 'timestamp': '2025-10-02 00:21:05.739099', 'step': 5790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:05.795048', 'step': 5790, 'epoch': 1}
{'type': 'loss', 'content': 0.05166599899530411, 'timestamp': '2025-10-02 00:21:05.802457', 'step': 5791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:05.856988', 'step': 5791, 'epoch': 1}
{'type': 'loss', 'content': 0.04539646580815315, 'timestamp': '2025-10-02 00:21:05.863104', 'step': 5792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:05.916862', 'step': 5792, 'epoch': 1}
{'type': 'loss', 'content': 0.07091120630502701, 'timestamp': '2025-10-02 00:21:05.919758', 'step': 5793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:05.975405', 'step': 5793, 'epoch': 1}
{'type': 'loss', 'content': 0.16338713467121124, 'timestamp': '2025-10-02 00:21:05.977711', 'step': 5794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:06.033716', 'step': 5794, 'epoch': 1}
{'type': 'loss', 'content': 0.20964094996452332, 'timestamp': '2025-10-02 00:21:06.036239', 'step': 5795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:06.090383', 'step': 5795, 'epoch': 1}
{'type': 'loss', 'content': 0.10045241564512253, 'timestamp': '2025-10-02 00:21:06.096240', 'step': 5796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:06.154150', 'step': 5796, 'epoch': 1}
{'type': 'loss', 'content': 0.06357313692569733, 'timestamp': '2025-10-02 00:21:06.165156', 'step': 5797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:06.220176', 'step': 5797, 'epoch': 1}
{'type': 'loss', 'content': 0.12416062504053116, 'timestamp': '2025-10-02 00:21:06.222650', 'step': 5798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:21:06.296656', 'step': 5798, 'epoch': 1}
{'type': 'loss', 'content': 0.023945380002260208, 'timestamp': '2025-10-02 00:21:06.309879', 'step': 5799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:06.366101', 'step': 5799, 'epoch': 1}
{'type': 'loss', 'content': 0.051765333861112595, 'timestamp': '2025-10-02 00:21:06.376408', 'step': 5800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:06.430409', 'step': 5800, 'epoch': 1}
{'type': 'loss', 'content': 0.1604011356830597, 'timestamp': '2025-10-02 00:21:06.432979', 'step': 5801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:06.488150', 'step': 5801, 'epoch': 1}
{'type': 'loss', 'content': 0.0664549469947815, 'timestamp': '2025-10-02 00:21:06.490735', 'step': 5802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:06.549712', 'step': 5802, 'epoch': 1}
{'type': 'loss', 'content': 0.07525292783975601, 'timestamp': '2025-10-02 00:21:06.559821', 'step': 5803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:06.621926', 'step': 5803, 'epoch': 1}
{'type': 'loss', 'content': 0.024676475673913956, 'timestamp': '2025-10-02 00:21:06.633210', 'step': 5804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:06.687909', 'step': 5804, 'epoch': 1}
{'type': 'loss', 'content': 0.01770508661866188, 'timestamp': '2025-10-02 00:21:06.690603', 'step': 5805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:06.745912', 'step': 5805, 'epoch': 1}
{'type': 'loss', 'content': 0.048936907202005386, 'timestamp': '2025-10-02 00:21:06.755484', 'step': 5806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:06.810308', 'step': 5806, 'epoch': 1}
{'type': 'loss', 'content': 0.08979518711566925, 'timestamp': '2025-10-02 00:21:06.812670', 'step': 5807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:06.867911', 'step': 5807, 'epoch': 1}
{'type': 'loss', 'content': 0.059705812484025955, 'timestamp': '2025-10-02 00:21:06.878274', 'step': 5808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:06.932912', 'step': 5808, 'epoch': 1}
{'type': 'loss', 'content': 0.049655791372060776, 'timestamp': '2025-10-02 00:21:06.935545', 'step': 5809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:06.991178', 'step': 5809, 'epoch': 1}
{'type': 'loss', 'content': 0.07399490475654602, 'timestamp': '2025-10-02 00:21:06.998830', 'step': 5810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:07.054975', 'step': 5810, 'epoch': 1}
{'type': 'loss', 'content': 0.058460891246795654, 'timestamp': '2025-10-02 00:21:07.062483', 'step': 5811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:07.117479', 'step': 5811, 'epoch': 1}
{'type': 'loss', 'content': 0.1629982888698578, 'timestamp': '2025-10-02 00:21:07.123663', 'step': 5812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:07.179778', 'step': 5812, 'epoch': 1}
{'type': 'loss', 'content': 0.040316492319107056, 'timestamp': '2025-10-02 00:21:07.190047', 'step': 5813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:07.246071', 'step': 5813, 'epoch': 1}
{'type': 'loss', 'content': 0.10957059264183044, 'timestamp': '2025-10-02 00:21:07.255661', 'step': 5814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:07.311375', 'step': 5814, 'epoch': 1}
{'type': 'loss', 'content': 0.13367488980293274, 'timestamp': '2025-10-02 00:21:07.318796', 'step': 5815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:07.373636', 'step': 5815, 'epoch': 1}
{'type': 'loss', 'content': 0.05505262315273285, 'timestamp': '2025-10-02 00:21:07.380077', 'step': 5816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:07.434445', 'step': 5816, 'epoch': 1}
{'type': 'loss', 'content': 0.04988733306527138, 'timestamp': '2025-10-02 00:21:07.444208', 'step': 5817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:07.508287', 'step': 5817, 'epoch': 1}
{'type': 'loss', 'content': 0.06255077570676804, 'timestamp': '2025-10-02 00:21:07.518937', 'step': 5818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:07.574982', 'step': 5818, 'epoch': 1}
{'type': 'loss', 'content': 0.12278615683317184, 'timestamp': '2025-10-02 00:21:07.584518', 'step': 5819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:07.639586', 'step': 5819, 'epoch': 1}
{'type': 'loss', 'content': 0.16919423639774323, 'timestamp': '2025-10-02 00:21:07.646008', 'step': 5820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:07.700370', 'step': 5820, 'epoch': 1}
{'type': 'loss', 'content': 0.08616137504577637, 'timestamp': '2025-10-02 00:21:07.702774', 'step': 5821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:07.757864', 'step': 5821, 'epoch': 1}
{'type': 'loss', 'content': 0.03402772173285484, 'timestamp': '2025-10-02 00:21:07.767255', 'step': 5822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:07.822584', 'step': 5822, 'epoch': 1}
{'type': 'loss', 'content': 0.03214479237794876, 'timestamp': '2025-10-02 00:21:07.831801', 'step': 5823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:07.886726', 'step': 5823, 'epoch': 1}
{'type': 'loss', 'content': 0.04415181279182434, 'timestamp': '2025-10-02 00:21:07.892549', 'step': 5824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:21:07.947698', 'step': 5824, 'epoch': 1}
{'type': 'loss', 'content': 0.12572096288204193, 'timestamp': '2025-10-02 00:21:07.950335', 'step': 5825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:08.004794', 'step': 5825, 'epoch': 1}
{'type': 'loss', 'content': 0.1712229996919632, 'timestamp': '2025-10-02 00:21:08.007632', 'step': 5826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:08.063087', 'step': 5826, 'epoch': 1}
{'type': 'loss', 'content': 0.237651526927948, 'timestamp': '2025-10-02 00:21:08.065688', 'step': 5827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:08.120512', 'step': 5827, 'epoch': 1}
{'type': 'loss', 'content': 0.151723712682724, 'timestamp': '2025-10-02 00:21:08.126311', 'step': 5828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:08.180570', 'step': 5828, 'epoch': 1}
{'type': 'loss', 'content': 0.10561138391494751, 'timestamp': '2025-10-02 00:21:08.183015', 'step': 5829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:08.237496', 'step': 5829, 'epoch': 1}
{'type': 'loss', 'content': 0.08232518285512924, 'timestamp': '2025-10-02 00:21:08.244797', 'step': 5830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:08.299890', 'step': 5830, 'epoch': 1}
{'type': 'loss', 'content': 0.023364849388599396, 'timestamp': '2025-10-02 00:21:08.302715', 'step': 5831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:08.364916', 'step': 5831, 'epoch': 1}
{'type': 'loss', 'content': 0.0404924713075161, 'timestamp': '2025-10-02 00:21:08.376381', 'step': 5832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:08.431135', 'step': 5832, 'epoch': 1}
{'type': 'loss', 'content': 0.06053649261593819, 'timestamp': '2025-10-02 00:21:08.437064', 'step': 5833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:08.491838', 'step': 5833, 'epoch': 1}
{'type': 'loss', 'content': 0.2489858865737915, 'timestamp': '2025-10-02 00:21:08.494836', 'step': 5834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:08.551178', 'step': 5834, 'epoch': 1}
{'type': 'loss', 'content': 0.07015389204025269, 'timestamp': '2025-10-02 00:21:08.560627', 'step': 5835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:08.617341', 'step': 5835, 'epoch': 1}
{'type': 'loss', 'content': 0.1399671584367752, 'timestamp': '2025-10-02 00:21:08.625045', 'step': 5836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:08.685610', 'step': 5836, 'epoch': 1}
{'type': 'loss', 'content': 0.08114819973707199, 'timestamp': '2025-10-02 00:21:08.687992', 'step': 5837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:08.743519', 'step': 5837, 'epoch': 1}
{'type': 'loss', 'content': 0.17001038789749146, 'timestamp': '2025-10-02 00:21:08.746046', 'step': 5838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:08.801713', 'step': 5838, 'epoch': 1}
{'type': 'loss', 'content': 0.046653665602207184, 'timestamp': '2025-10-02 00:21:08.809046', 'step': 5839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:08.863514', 'step': 5839, 'epoch': 1}
{'type': 'loss', 'content': 0.1292034387588501, 'timestamp': '2025-10-02 00:21:08.871705', 'step': 5840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:08.927920', 'step': 5840, 'epoch': 1}
{'type': 'loss', 'content': 0.13835260272026062, 'timestamp': '2025-10-02 00:21:08.931107', 'step': 5841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:08.987929', 'step': 5841, 'epoch': 1}
{'type': 'loss', 'content': 0.09451986104249954, 'timestamp': '2025-10-02 00:21:08.993715', 'step': 5842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:09.049619', 'step': 5842, 'epoch': 1}
{'type': 'loss', 'content': 0.08709287643432617, 'timestamp': '2025-10-02 00:21:09.052654', 'step': 5843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:09.110711', 'step': 5843, 'epoch': 1}
{'type': 'loss', 'content': 0.04830928146839142, 'timestamp': '2025-10-02 00:21:09.118824', 'step': 5844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:09.176431', 'step': 5844, 'epoch': 1}
{'type': 'loss', 'content': 0.19612693786621094, 'timestamp': '2025-10-02 00:21:09.179945', 'step': 5845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:09.237222', 'step': 5845, 'epoch': 1}
{'type': 'loss', 'content': 0.0411560982465744, 'timestamp': '2025-10-02 00:21:09.244704', 'step': 5846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:09.305656', 'step': 5846, 'epoch': 1}
{'type': 'loss', 'content': 0.04273182153701782, 'timestamp': '2025-10-02 00:21:09.309420', 'step': 5847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:09.373656', 'step': 5847, 'epoch': 1}
{'type': 'loss', 'content': 0.18418170511722565, 'timestamp': '2025-10-02 00:21:09.385061', 'step': 5848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:09.441569', 'step': 5848, 'epoch': 1}
{'type': 'loss', 'content': 0.20451095700263977, 'timestamp': '2025-10-02 00:21:09.446359', 'step': 5849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:09.504747', 'step': 5849, 'epoch': 1}
{'type': 'loss', 'content': 0.08100146800279617, 'timestamp': '2025-10-02 00:21:09.507334', 'step': 5850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:09.566133', 'step': 5850, 'epoch': 1}
{'type': 'loss', 'content': 0.04362405091524124, 'timestamp': '2025-10-02 00:21:09.575650', 'step': 5851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:09.636194', 'step': 5851, 'epoch': 1}
{'type': 'loss', 'content': 0.01942705363035202, 'timestamp': '2025-10-02 00:21:09.644121', 'step': 5852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:09.702166', 'step': 5852, 'epoch': 1}
{'type': 'loss', 'content': 0.2986948490142822, 'timestamp': '2025-10-02 00:21:09.705645', 'step': 5853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:09.762559', 'step': 5853, 'epoch': 1}
{'type': 'loss', 'content': 0.18416567146778107, 'timestamp': '2025-10-02 00:21:09.770093', 'step': 5854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:21:09.837060', 'step': 5854, 'epoch': 1}
{'type': 'loss', 'content': 0.04610572010278702, 'timestamp': '2025-10-02 00:21:09.847865', 'step': 5855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:09.904766', 'step': 5855, 'epoch': 1}
{'type': 'loss', 'content': 0.07709435373544693, 'timestamp': '2025-10-02 00:21:09.911135', 'step': 5856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:09.968817', 'step': 5856, 'epoch': 1}
{'type': 'loss', 'content': 0.15115663409233093, 'timestamp': '2025-10-02 00:21:09.971842', 'step': 5857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:10.028415', 'step': 5857, 'epoch': 1}
{'type': 'loss', 'content': 0.07017876952886581, 'timestamp': '2025-10-02 00:21:10.032427', 'step': 5858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:10.088138', 'step': 5858, 'epoch': 1}
{'type': 'loss', 'content': 0.0213939119130373, 'timestamp': '2025-10-02 00:21:10.092604', 'step': 5859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:10.147300', 'step': 5859, 'epoch': 1}
{'type': 'loss', 'content': 0.11857640743255615, 'timestamp': '2025-10-02 00:21:10.154700', 'step': 5860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:10.210546', 'step': 5860, 'epoch': 1}
{'type': 'loss', 'content': 0.10864462703466415, 'timestamp': '2025-10-02 00:21:10.216098', 'step': 5861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:10.275521', 'step': 5861, 'epoch': 1}
{'type': 'loss', 'content': 0.0916544646024704, 'timestamp': '2025-10-02 00:21:10.279394', 'step': 5862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:10.335704', 'step': 5862, 'epoch': 1}
{'type': 'loss', 'content': 0.09513925015926361, 'timestamp': '2025-10-02 00:21:10.338902', 'step': 5863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:21:10.410393', 'step': 5863, 'epoch': 1}
{'type': 'loss', 'content': 0.021981265395879745, 'timestamp': '2025-10-02 00:21:10.423541', 'step': 5864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:10.479046', 'step': 5864, 'epoch': 1}
{'type': 'loss', 'content': 0.11068009585142136, 'timestamp': '2025-10-02 00:21:10.482432', 'step': 5865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:10.540745', 'step': 5865, 'epoch': 1}
{'type': 'loss', 'content': 0.0763147622346878, 'timestamp': '2025-10-02 00:21:10.543770', 'step': 5866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:10.602630', 'step': 5866, 'epoch': 1}
{'type': 'loss', 'content': 0.09401166439056396, 'timestamp': '2025-10-02 00:21:10.606154', 'step': 5867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:10.666812', 'step': 5867, 'epoch': 1}
{'type': 'loss', 'content': 0.09668892621994019, 'timestamp': '2025-10-02 00:21:10.677772', 'step': 5868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:10.732808', 'step': 5868, 'epoch': 1}
{'type': 'loss', 'content': 0.07910566031932831, 'timestamp': '2025-10-02 00:21:10.742107', 'step': 5869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:10.797496', 'step': 5869, 'epoch': 1}
{'type': 'loss', 'content': 0.0995040237903595, 'timestamp': '2025-10-02 00:21:10.799673', 'step': 5870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:10.854808', 'step': 5870, 'epoch': 1}
{'type': 'loss', 'content': 0.060357023030519485, 'timestamp': '2025-10-02 00:21:10.862147', 'step': 5871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:10.916808', 'step': 5871, 'epoch': 1}
{'type': 'loss', 'content': 0.09480393677949905, 'timestamp': '2025-10-02 00:21:10.922764', 'step': 5872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:10.976916', 'step': 5872, 'epoch': 1}
{'type': 'loss', 'content': 0.17501530051231384, 'timestamp': '2025-10-02 00:21:10.979350', 'step': 5873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:11.035154', 'step': 5873, 'epoch': 1}
{'type': 'loss', 'content': 0.017784113064408302, 'timestamp': '2025-10-02 00:21:11.040918', 'step': 5874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:11.095369', 'step': 5874, 'epoch': 1}
{'type': 'loss', 'content': 0.07522323727607727, 'timestamp': '2025-10-02 00:21:11.102826', 'step': 5875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:11.161702', 'step': 5875, 'epoch': 1}
{'type': 'loss', 'content': 0.11198585480451584, 'timestamp': '2025-10-02 00:21:11.172686', 'step': 5876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:11.227604', 'step': 5876, 'epoch': 1}
{'type': 'loss', 'content': 0.04817720130085945, 'timestamp': '2025-10-02 00:21:11.237848', 'step': 5877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:11.293594', 'step': 5877, 'epoch': 1}
{'type': 'loss', 'content': 0.04581337422132492, 'timestamp': '2025-10-02 00:21:11.295648', 'step': 5878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:11.350742', 'step': 5878, 'epoch': 1}
{'type': 'loss', 'content': 0.1116006001830101, 'timestamp': '2025-10-02 00:21:11.353183', 'step': 5879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:11.407731', 'step': 5879, 'epoch': 1}
{'type': 'loss', 'content': 0.046583641320466995, 'timestamp': '2025-10-02 00:21:11.414613', 'step': 5880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:11.468842', 'step': 5880, 'epoch': 1}
{'type': 'loss', 'content': 0.020684655755758286, 'timestamp': '2025-10-02 00:21:11.476424', 'step': 5881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:11.531117', 'step': 5881, 'epoch': 1}
{'type': 'loss', 'content': 0.20676465332508087, 'timestamp': '2025-10-02 00:21:11.533526', 'step': 5882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:21:11.587768', 'step': 5882, 'epoch': 1}
{'type': 'loss', 'content': 0.1853851079940796, 'timestamp': '2025-10-02 00:21:11.590087', 'step': 5883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:11.644422', 'step': 5883, 'epoch': 1}
{'type': 'loss', 'content': 0.058893200010061264, 'timestamp': '2025-10-02 00:21:11.650887', 'step': 5884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:11.708388', 'step': 5884, 'epoch': 1}
{'type': 'loss', 'content': 0.045653849840164185, 'timestamp': '2025-10-02 00:21:11.719413', 'step': 5885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:11.773771', 'step': 5885, 'epoch': 1}
{'type': 'loss', 'content': 0.16706284880638123, 'timestamp': '2025-10-02 00:21:11.776206', 'step': 5886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:11.831270', 'step': 5886, 'epoch': 1}
{'type': 'loss', 'content': 0.10395707190036774, 'timestamp': '2025-10-02 00:21:11.833473', 'step': 5887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:11.887805', 'step': 5887, 'epoch': 1}
{'type': 'loss', 'content': 0.1731368601322174, 'timestamp': '2025-10-02 00:21:11.893645', 'step': 5888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:11.947464', 'step': 5888, 'epoch': 1}
{'type': 'loss', 'content': 0.0387193039059639, 'timestamp': '2025-10-02 00:21:11.949961', 'step': 5889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:12.004338', 'step': 5889, 'epoch': 1}
{'type': 'loss', 'content': 0.13573326170444489, 'timestamp': '2025-10-02 00:21:12.010374', 'step': 5890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:12.073260', 'step': 5890, 'epoch': 1}
{'type': 'loss', 'content': 0.0778718814253807, 'timestamp': '2025-10-02 00:21:12.075665', 'step': 5891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:12.130867', 'step': 5891, 'epoch': 1}
{'type': 'loss', 'content': 0.056182824075222015, 'timestamp': '2025-10-02 00:21:12.137269', 'step': 5892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:12.194833', 'step': 5892, 'epoch': 1}
{'type': 'loss', 'content': 0.11820736527442932, 'timestamp': '2025-10-02 00:21:12.205836', 'step': 5893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:12.262917', 'step': 5893, 'epoch': 1}
{'type': 'loss', 'content': 0.012251246720552444, 'timestamp': '2025-10-02 00:21:12.268796', 'step': 5894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:12.325380', 'step': 5894, 'epoch': 1}
{'type': 'loss', 'content': 0.06139074265956879, 'timestamp': '2025-10-02 00:21:12.327777', 'step': 5895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:12.382715', 'step': 5895, 'epoch': 1}
{'type': 'loss', 'content': 0.11516966670751572, 'timestamp': '2025-10-02 00:21:12.388908', 'step': 5896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:12.442982', 'step': 5896, 'epoch': 1}
{'type': 'loss', 'content': 0.058939818292856216, 'timestamp': '2025-10-02 00:21:12.452509', 'step': 5897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:12.506837', 'step': 5897, 'epoch': 1}
{'type': 'loss', 'content': 0.0866108387708664, 'timestamp': '2025-10-02 00:21:12.509211', 'step': 5898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:12.563588', 'step': 5898, 'epoch': 1}
{'type': 'loss', 'content': 0.08232878148555756, 'timestamp': '2025-10-02 00:21:12.565823', 'step': 5899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:12.620448', 'step': 5899, 'epoch': 1}
{'type': 'loss', 'content': 0.04981737583875656, 'timestamp': '2025-10-02 00:21:12.626556', 'step': 5900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:12.680665', 'step': 5900, 'epoch': 1}
{'type': 'loss', 'content': 0.10369682312011719, 'timestamp': '2025-10-02 00:21:12.683443', 'step': 5901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:12.738093', 'step': 5901, 'epoch': 1}
{'type': 'loss', 'content': 0.12533025443553925, 'timestamp': '2025-10-02 00:21:12.740564', 'step': 5902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:12.795021', 'step': 5902, 'epoch': 1}
{'type': 'loss', 'content': 0.08725836873054504, 'timestamp': '2025-10-02 00:21:12.802524', 'step': 5903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:12.857379', 'step': 5903, 'epoch': 1}
{'type': 'loss', 'content': 0.05025351047515869, 'timestamp': '2025-10-02 00:21:12.865574', 'step': 5904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:12.919059', 'step': 5904, 'epoch': 1}
{'type': 'loss', 'content': 0.06466377526521683, 'timestamp': '2025-10-02 00:21:12.922116', 'step': 5905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:12.977348', 'step': 5905, 'epoch': 1}
{'type': 'loss', 'content': 0.03435096517205238, 'timestamp': '2025-10-02 00:21:12.979731', 'step': 5906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:13.035247', 'step': 5906, 'epoch': 1}
{'type': 'loss', 'content': 0.009800273925065994, 'timestamp': '2025-10-02 00:21:13.040532', 'step': 5907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:13.094488', 'step': 5907, 'epoch': 1}
{'type': 'loss', 'content': 0.06672938168048859, 'timestamp': '2025-10-02 00:21:13.100244', 'step': 5908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:13.155121', 'step': 5908, 'epoch': 1}
{'type': 'loss', 'content': 0.0638120248913765, 'timestamp': '2025-10-02 00:21:13.161071', 'step': 5909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:13.215223', 'step': 5909, 'epoch': 1}
{'type': 'loss', 'content': 0.08359023183584213, 'timestamp': '2025-10-02 00:21:13.217604', 'step': 5910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:13.273046', 'step': 5910, 'epoch': 1}
{'type': 'loss', 'content': 0.0564054511487484, 'timestamp': '2025-10-02 00:21:13.280474', 'step': 5911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:13.334712', 'step': 5911, 'epoch': 1}
{'type': 'loss', 'content': 0.06219842657446861, 'timestamp': '2025-10-02 00:21:13.340525', 'step': 5912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:13.394154', 'step': 5912, 'epoch': 1}
{'type': 'loss', 'content': 0.13897772133350372, 'timestamp': '2025-10-02 00:21:13.396712', 'step': 5913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:13.451978', 'step': 5913, 'epoch': 1}
{'type': 'loss', 'content': 0.05827321112155914, 'timestamp': '2025-10-02 00:21:13.461349', 'step': 5914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:13.516587', 'step': 5914, 'epoch': 1}
{'type': 'loss', 'content': 0.046376097947359085, 'timestamp': '2025-10-02 00:21:13.522293', 'step': 5915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:13.579382', 'step': 5915, 'epoch': 1}
{'type': 'loss', 'content': 0.0791715681552887, 'timestamp': '2025-10-02 00:21:13.585345', 'step': 5916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:13.639815', 'step': 5916, 'epoch': 1}
{'type': 'loss', 'content': 0.07264567911624908, 'timestamp': '2025-10-02 00:21:13.644379', 'step': 5917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:13.702375', 'step': 5917, 'epoch': 1}
{'type': 'loss', 'content': 0.2171364575624466, 'timestamp': '2025-10-02 00:21:13.704654', 'step': 5918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:13.765752', 'step': 5918, 'epoch': 1}
{'type': 'loss', 'content': 0.022736983373761177, 'timestamp': '2025-10-02 00:21:13.775951', 'step': 5919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:13.833207', 'step': 5919, 'epoch': 1}
{'type': 'loss', 'content': 0.09561166912317276, 'timestamp': '2025-10-02 00:21:13.839197', 'step': 5920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:21:13.907337', 'step': 5920, 'epoch': 1}
{'type': 'loss', 'content': 0.042899612337350845, 'timestamp': '2025-10-02 00:21:13.920904', 'step': 5921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:13.975593', 'step': 5921, 'epoch': 1}
{'type': 'loss', 'content': 0.042224008589982986, 'timestamp': '2025-10-02 00:21:13.977838', 'step': 5922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:14.032269', 'step': 5922, 'epoch': 1}
{'type': 'loss', 'content': 0.20801329612731934, 'timestamp': '2025-10-02 00:21:14.035397', 'step': 5923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:14.089673', 'step': 5923, 'epoch': 1}
{'type': 'loss', 'content': 0.15906591713428497, 'timestamp': '2025-10-02 00:21:14.095344', 'step': 5924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:14.148935', 'step': 5924, 'epoch': 1}
{'type': 'loss', 'content': 0.1749963015317917, 'timestamp': '2025-10-02 00:21:14.151386', 'step': 5925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:14.205352', 'step': 5925, 'epoch': 1}
{'type': 'loss', 'content': 0.1163184642791748, 'timestamp': '2025-10-02 00:21:14.207723', 'step': 5926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:14.262502', 'step': 5926, 'epoch': 1}
{'type': 'loss', 'content': 0.10167741775512695, 'timestamp': '2025-10-02 00:21:14.265471', 'step': 5927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:14.327019', 'step': 5927, 'epoch': 1}
{'type': 'loss', 'content': 0.031190995126962662, 'timestamp': '2025-10-02 00:21:14.338318', 'step': 5928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:14.392686', 'step': 5928, 'epoch': 1}
{'type': 'loss', 'content': 0.04083577170968056, 'timestamp': '2025-10-02 00:21:14.402173', 'step': 5929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:14.456030', 'step': 5929, 'epoch': 1}
{'type': 'loss', 'content': 0.1975344717502594, 'timestamp': '2025-10-02 00:21:14.458213', 'step': 5930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:14.512573', 'step': 5930, 'epoch': 1}
{'type': 'loss', 'content': 0.1341867297887802, 'timestamp': '2025-10-02 00:21:14.514919', 'step': 5931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:14.569069', 'step': 5931, 'epoch': 1}
{'type': 'loss', 'content': 0.154896080493927, 'timestamp': '2025-10-02 00:21:14.574988', 'step': 5932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:14.629568', 'step': 5932, 'epoch': 1}
{'type': 'loss', 'content': 0.0742000937461853, 'timestamp': '2025-10-02 00:21:14.639309', 'step': 5933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:14.695501', 'step': 5933, 'epoch': 1}
{'type': 'loss', 'content': 0.32572004199028015, 'timestamp': '2025-10-02 00:21:14.698041', 'step': 5934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:14.756825', 'step': 5934, 'epoch': 1}
{'type': 'loss', 'content': 0.02930573746562004, 'timestamp': '2025-10-02 00:21:14.767022', 'step': 5935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:14.821566', 'step': 5935, 'epoch': 1}
{'type': 'loss', 'content': 0.14198262989521027, 'timestamp': '2025-10-02 00:21:14.827428', 'step': 5936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:14.881158', 'step': 5936, 'epoch': 1}
{'type': 'loss', 'content': 0.12729354202747345, 'timestamp': '2025-10-02 00:21:14.883670', 'step': 5937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:14.937667', 'step': 5937, 'epoch': 1}
{'type': 'loss', 'content': 0.04943452030420303, 'timestamp': '2025-10-02 00:21:14.939718', 'step': 5938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:14.994922', 'step': 5938, 'epoch': 1}
{'type': 'loss', 'content': 0.07730937749147415, 'timestamp': '2025-10-02 00:21:14.997277', 'step': 5939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:15.051065', 'step': 5939, 'epoch': 1}
{'type': 'loss', 'content': 0.23442909121513367, 'timestamp': '2025-10-02 00:21:15.056943', 'step': 5940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:15.109957', 'step': 5940, 'epoch': 1}
{'type': 'loss', 'content': 0.1546424925327301, 'timestamp': '2025-10-02 00:21:15.112279', 'step': 5941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:15.166052', 'step': 5941, 'epoch': 1}
{'type': 'loss', 'content': 0.12840652465820312, 'timestamp': '2025-10-02 00:21:15.168340', 'step': 5942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:15.222783', 'step': 5942, 'epoch': 1}
{'type': 'loss', 'content': 0.07020683586597443, 'timestamp': '2025-10-02 00:21:15.224998', 'step': 5943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:15.279225', 'step': 5943, 'epoch': 1}
{'type': 'loss', 'content': 0.044080041348934174, 'timestamp': '2025-10-02 00:21:15.287324', 'step': 5944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:15.350587', 'step': 5944, 'epoch': 1}
{'type': 'loss', 'content': 0.19497883319854736, 'timestamp': '2025-10-02 00:21:15.353105', 'step': 5945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:15.407110', 'step': 5945, 'epoch': 1}
{'type': 'loss', 'content': 0.03273376449942589, 'timestamp': '2025-10-02 00:21:15.409486', 'step': 5946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:15.464137', 'step': 5946, 'epoch': 1}
{'type': 'loss', 'content': 0.0889832079410553, 'timestamp': '2025-10-02 00:21:15.466403', 'step': 5947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:15.522511', 'step': 5947, 'epoch': 1}
{'type': 'loss', 'content': 0.10967635363340378, 'timestamp': '2025-10-02 00:21:15.528336', 'step': 5948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:15.582182', 'step': 5948, 'epoch': 1}
{'type': 'loss', 'content': 0.06599370390176773, 'timestamp': '2025-10-02 00:21:15.584546', 'step': 5949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:15.639248', 'step': 5949, 'epoch': 1}
{'type': 'loss', 'content': 0.034672658890485764, 'timestamp': '2025-10-02 00:21:15.641993', 'step': 5950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:15.696669', 'step': 5950, 'epoch': 1}
{'type': 'loss', 'content': 0.09222772717475891, 'timestamp': '2025-10-02 00:21:15.699389', 'step': 5951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:15.760965', 'step': 5951, 'epoch': 1}
{'type': 'loss', 'content': 0.043544188141822815, 'timestamp': '2025-10-02 00:21:15.772252', 'step': 5952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:15.827266', 'step': 5952, 'epoch': 1}
{'type': 'loss', 'content': 0.1430186629295349, 'timestamp': '2025-10-02 00:21:15.829603', 'step': 5953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:21:15.883641', 'step': 5953, 'epoch': 1}
{'type': 'loss', 'content': 0.22247207164764404, 'timestamp': '2025-10-02 00:21:15.886049', 'step': 5954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:15.941211', 'step': 5954, 'epoch': 1}
{'type': 'loss', 'content': 0.041912708431482315, 'timestamp': '2025-10-02 00:21:15.948608', 'step': 5955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:21:16.002501', 'step': 5955, 'epoch': 1}
{'type': 'loss', 'content': 0.14908750355243683, 'timestamp': '2025-10-02 00:21:16.009059', 'step': 5956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:16.075890', 'step': 5956, 'epoch': 1}
{'type': 'loss', 'content': 0.0673699826002121, 'timestamp': '2025-10-02 00:21:16.081766', 'step': 5957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:16.135925', 'step': 5957, 'epoch': 1}
{'type': 'loss', 'content': 0.1316678375005722, 'timestamp': '2025-10-02 00:21:16.143442', 'step': 5958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:16.199503', 'step': 5958, 'epoch': 1}
{'type': 'loss', 'content': 0.19841235876083374, 'timestamp': '2025-10-02 00:21:16.201940', 'step': 5959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:16.256167', 'step': 5959, 'epoch': 1}
{'type': 'loss', 'content': 0.05328531190752983, 'timestamp': '2025-10-02 00:21:16.266268', 'step': 5960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:16.319754', 'step': 5960, 'epoch': 1}
{'type': 'loss', 'content': 0.17482049763202667, 'timestamp': '2025-10-02 00:21:16.322346', 'step': 5961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:16.376902', 'step': 5961, 'epoch': 1}
{'type': 'loss', 'content': 0.0646912232041359, 'timestamp': '2025-10-02 00:21:16.379674', 'step': 5962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:16.435306', 'step': 5962, 'epoch': 1}
{'type': 'loss', 'content': 0.10089726746082306, 'timestamp': '2025-10-02 00:21:16.437634', 'step': 5963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:16.492254', 'step': 5963, 'epoch': 1}
{'type': 'loss', 'content': 0.05695958808064461, 'timestamp': '2025-10-02 00:21:16.498835', 'step': 5964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:16.553066', 'step': 5964, 'epoch': 1}
{'type': 'loss', 'content': 0.10819878429174423, 'timestamp': '2025-10-02 00:21:16.555623', 'step': 5965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:16.609733', 'step': 5965, 'epoch': 1}
{'type': 'loss', 'content': 0.05673173442482948, 'timestamp': '2025-10-02 00:21:16.612162', 'step': 5966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:16.667305', 'step': 5966, 'epoch': 1}
{'type': 'loss', 'content': 0.08534347265958786, 'timestamp': '2025-10-02 00:21:16.669839', 'step': 5967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:16.723962', 'step': 5967, 'epoch': 1}
{'type': 'loss', 'content': 0.11129153519868851, 'timestamp': '2025-10-02 00:21:16.729697', 'step': 5968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:16.789708', 'step': 5968, 'epoch': 1}
{'type': 'loss', 'content': 0.04048456251621246, 'timestamp': '2025-10-02 00:21:16.801055', 'step': 5969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:16.857644', 'step': 5969, 'epoch': 1}
{'type': 'loss', 'content': 0.12374625355005264, 'timestamp': '2025-10-02 00:21:16.865115', 'step': 5970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:16.920785', 'step': 5970, 'epoch': 1}
{'type': 'loss', 'content': 0.07818435877561569, 'timestamp': '2025-10-02 00:21:16.930317', 'step': 5971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:16.985875', 'step': 5971, 'epoch': 1}
{'type': 'loss', 'content': 0.08042307198047638, 'timestamp': '2025-10-02 00:21:16.996046', 'step': 5972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:17.049652', 'step': 5972, 'epoch': 1}
{'type': 'loss', 'content': 0.13407421112060547, 'timestamp': '2025-10-02 00:21:17.051799', 'step': 5973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:17.105692', 'step': 5973, 'epoch': 1}
{'type': 'loss', 'content': 0.05840134620666504, 'timestamp': '2025-10-02 00:21:17.108061', 'step': 5974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:17.164230', 'step': 5974, 'epoch': 1}
{'type': 'loss', 'content': 0.055210210382938385, 'timestamp': '2025-10-02 00:21:17.171668', 'step': 5975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:17.227607', 'step': 5975, 'epoch': 1}
{'type': 'loss', 'content': 0.040716491639614105, 'timestamp': '2025-10-02 00:21:17.233159', 'step': 5976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:17.288321', 'step': 5976, 'epoch': 1}
{'type': 'loss', 'content': 0.04027649760246277, 'timestamp': '2025-10-02 00:21:17.295750', 'step': 5977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:17.350131', 'step': 5977, 'epoch': 1}
{'type': 'loss', 'content': 0.05810804292559624, 'timestamp': '2025-10-02 00:21:17.352567', 'step': 5978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:17.407631', 'step': 5978, 'epoch': 1}
{'type': 'loss', 'content': 0.05833226069808006, 'timestamp': '2025-10-02 00:21:17.409792', 'step': 5979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:17.468873', 'step': 5979, 'epoch': 1}
{'type': 'loss', 'content': 0.06831306964159012, 'timestamp': '2025-10-02 00:21:17.479783', 'step': 5980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:17.534161', 'step': 5980, 'epoch': 1}
{'type': 'loss', 'content': 0.09349189698696136, 'timestamp': '2025-10-02 00:21:17.539609', 'step': 5981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:17.594608', 'step': 5981, 'epoch': 1}
{'type': 'loss', 'content': 0.014199350029230118, 'timestamp': '2025-10-02 00:21:17.598963', 'step': 5982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:21:17.653486', 'step': 5982, 'epoch': 1}
{'type': 'loss', 'content': 0.09933460503816605, 'timestamp': '2025-10-02 00:21:17.656219', 'step': 5983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:21:17.710209', 'step': 5983, 'epoch': 1}
{'type': 'loss', 'content': 0.09809892624616623, 'timestamp': '2025-10-02 00:21:17.715913', 'step': 5984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:17.769530', 'step': 5984, 'epoch': 1}
{'type': 'loss', 'content': 0.2366420328617096, 'timestamp': '2025-10-02 00:21:17.772059', 'step': 5985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:17.826288', 'step': 5985, 'epoch': 1}
{'type': 'loss', 'content': 0.10709112882614136, 'timestamp': '2025-10-02 00:21:17.832305', 'step': 5986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:17.886822', 'step': 5986, 'epoch': 1}
{'type': 'loss', 'content': 0.11942096799612045, 'timestamp': '2025-10-02 00:21:17.889101', 'step': 5987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:17.944170', 'step': 5987, 'epoch': 1}
{'type': 'loss', 'content': 0.03194057568907738, 'timestamp': '2025-10-02 00:21:17.952355', 'step': 5988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:18.008289', 'step': 5988, 'epoch': 1}
{'type': 'loss', 'content': 0.08589522540569305, 'timestamp': '2025-10-02 00:21:18.017626', 'step': 5989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:18.077127', 'step': 5989, 'epoch': 1}
{'type': 'loss', 'content': 0.024360287934541702, 'timestamp': '2025-10-02 00:21:18.079649', 'step': 5990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:21:18.157000', 'step': 5990, 'epoch': 1}
{'type': 'loss', 'content': 0.018280528485774994, 'timestamp': '2025-10-02 00:21:18.170217', 'step': 5991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:18.226761', 'step': 5991, 'epoch': 1}
{'type': 'loss', 'content': 0.07061336189508438, 'timestamp': '2025-10-02 00:21:18.233686', 'step': 5992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:18.288212', 'step': 5992, 'epoch': 1}
{'type': 'loss', 'content': 0.09387186914682388, 'timestamp': '2025-10-02 00:21:18.291502', 'step': 5993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:18.347168', 'step': 5993, 'epoch': 1}
{'type': 'loss', 'content': 0.18066398799419403, 'timestamp': '2025-10-02 00:21:18.350192', 'step': 5994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:18.407213', 'step': 5994, 'epoch': 1}
{'type': 'loss', 'content': 0.11014629155397415, 'timestamp': '2025-10-02 00:21:18.410474', 'step': 5995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:18.467187', 'step': 5995, 'epoch': 1}
{'type': 'loss', 'content': 0.09118861705064774, 'timestamp': '2025-10-02 00:21:18.473880', 'step': 5996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:18.533341', 'step': 5996, 'epoch': 1}
{'type': 'loss', 'content': 0.05562564358115196, 'timestamp': '2025-10-02 00:21:18.536435', 'step': 5997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:18.593495', 'step': 5997, 'epoch': 1}
{'type': 'loss', 'content': 0.0240440983325243, 'timestamp': '2025-10-02 00:21:18.596333', 'step': 5998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:18.652160', 'step': 5998, 'epoch': 1}
{'type': 'loss', 'content': 0.0353178046643734, 'timestamp': '2025-10-02 00:21:18.654576', 'step': 5999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:18.712824', 'step': 5999, 'epoch': 1}
{'type': 'loss', 'content': 0.045641250908374786, 'timestamp': '2025-10-02 00:21:18.719399', 'step': 6000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-10-02 00:21:19.139022', 'step': 6000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:19.194628', 'step': 6000, 'epoch': 1}
{'type': 'loss', 'content': 0.06940104812383652, 'timestamp': '2025-10-02 00:21:19.197478', 'step': 6001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:19.262922', 'step': 6001, 'epoch': 1}
{'type': 'loss', 'content': 0.005334476474672556, 'timestamp': '2025-10-02 00:21:19.273388', 'step': 6002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:19.338494', 'step': 6002, 'epoch': 1}
{'type': 'loss', 'content': 0.02745293639600277, 'timestamp': '2025-10-02 00:21:19.349082', 'step': 6003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:19.406172', 'step': 6003, 'epoch': 1}
{'type': 'loss', 'content': 0.062434494495391846, 'timestamp': '2025-10-02 00:21:19.412216', 'step': 6004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:19.467004', 'step': 6004, 'epoch': 1}
{'type': 'loss', 'content': 0.11287631839513779, 'timestamp': '2025-10-02 00:21:19.469752', 'step': 6005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:19.526801', 'step': 6005, 'epoch': 1}
{'type': 'loss', 'content': 0.05004443600773811, 'timestamp': '2025-10-02 00:21:19.529932', 'step': 6006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:19.587390', 'step': 6006, 'epoch': 1}
{'type': 'loss', 'content': 0.04648629203438759, 'timestamp': '2025-10-02 00:21:19.594640', 'step': 6007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:19.652162', 'step': 6007, 'epoch': 1}
{'type': 'loss', 'content': 0.19044063985347748, 'timestamp': '2025-10-02 00:21:19.659131', 'step': 6008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:19.715913', 'step': 6008, 'epoch': 1}
{'type': 'loss', 'content': 0.08893558382987976, 'timestamp': '2025-10-02 00:21:19.718048', 'step': 6009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:19.781900', 'step': 6009, 'epoch': 1}
{'type': 'loss', 'content': 0.050440263003110886, 'timestamp': '2025-10-02 00:21:19.792573', 'step': 6010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:19.848340', 'step': 6010, 'epoch': 1}
{'type': 'loss', 'content': 0.046469248831272125, 'timestamp': '2025-10-02 00:21:19.857875', 'step': 6011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:21:19.912983', 'step': 6011, 'epoch': 1}
{'type': 'loss', 'content': 0.06993333250284195, 'timestamp': '2025-10-02 00:21:19.918796', 'step': 6012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:19.974045', 'step': 6012, 'epoch': 1}
{'type': 'loss', 'content': 0.026810260489583015, 'timestamp': '2025-10-02 00:21:19.983970', 'step': 6013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:20.038524', 'step': 6013, 'epoch': 1}
{'type': 'loss', 'content': 0.07829081267118454, 'timestamp': '2025-10-02 00:21:20.040512', 'step': 6014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:20.094655', 'step': 6014, 'epoch': 1}
{'type': 'loss', 'content': 0.26958999037742615, 'timestamp': '2025-10-02 00:21:20.098015', 'step': 6015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:20.152108', 'step': 6015, 'epoch': 1}
{'type': 'loss', 'content': 0.04107174649834633, 'timestamp': '2025-10-02 00:21:20.158709', 'step': 6016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:20.212655', 'step': 6016, 'epoch': 1}
{'type': 'loss', 'content': 0.12298119813203812, 'timestamp': '2025-10-02 00:21:20.218524', 'step': 6017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:20.273339', 'step': 6017, 'epoch': 1}
{'type': 'loss', 'content': 0.023877030238509178, 'timestamp': '2025-10-02 00:21:20.282899', 'step': 6018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:20.337656', 'step': 6018, 'epoch': 1}
{'type': 'loss', 'content': 0.028073124587535858, 'timestamp': '2025-10-02 00:21:20.343441', 'step': 6019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:20.398793', 'step': 6019, 'epoch': 1}
{'type': 'loss', 'content': 0.1370619684457779, 'timestamp': '2025-10-02 00:21:20.404900', 'step': 6020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:20.460085', 'step': 6020, 'epoch': 1}
{'type': 'loss', 'content': 0.06422624737024307, 'timestamp': '2025-10-02 00:21:20.462533', 'step': 6021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:20.523404', 'step': 6021, 'epoch': 1}
{'type': 'loss', 'content': 0.06986825913190842, 'timestamp': '2025-10-02 00:21:20.533880', 'step': 6022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:20.588956', 'step': 6022, 'epoch': 1}
{'type': 'loss', 'content': 0.17087730765342712, 'timestamp': '2025-10-02 00:21:20.591343', 'step': 6023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:20.645255', 'step': 6023, 'epoch': 1}
{'type': 'loss', 'content': 0.1194157674908638, 'timestamp': '2025-10-02 00:21:20.651798', 'step': 6024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:20.705622', 'step': 6024, 'epoch': 1}
{'type': 'loss', 'content': 0.14703938364982605, 'timestamp': '2025-10-02 00:21:20.707789', 'step': 6025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:20.762020', 'step': 6025, 'epoch': 1}
{'type': 'loss', 'content': 0.07812963426113129, 'timestamp': '2025-10-02 00:21:20.766001', 'step': 6026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:20.821840', 'step': 6026, 'epoch': 1}
{'type': 'loss', 'content': 0.0542106032371521, 'timestamp': '2025-10-02 00:21:20.831379', 'step': 6027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:20.886010', 'step': 6027, 'epoch': 1}
{'type': 'loss', 'content': 0.04485383257269859, 'timestamp': '2025-10-02 00:21:20.896108', 'step': 6028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:20.949474', 'step': 6028, 'epoch': 1}
{'type': 'loss', 'content': 0.10625439882278442, 'timestamp': '2025-10-02 00:21:20.951551', 'step': 6029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:21.010739', 'step': 6029, 'epoch': 1}
{'type': 'loss', 'content': 0.027220869436860085, 'timestamp': '2025-10-02 00:21:21.020953', 'step': 6030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:21.075599', 'step': 6030, 'epoch': 1}
{'type': 'loss', 'content': 0.18458326160907745, 'timestamp': '2025-10-02 00:21:21.078132', 'step': 6031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:21.139525', 'step': 6031, 'epoch': 1}
{'type': 'loss', 'content': 0.04288244619965553, 'timestamp': '2025-10-02 00:21:21.150814', 'step': 6032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:21.204832', 'step': 6032, 'epoch': 1}
{'type': 'loss', 'content': 0.047816261649131775, 'timestamp': '2025-10-02 00:21:21.207120', 'step': 6033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:21.269825', 'step': 6033, 'epoch': 1}
{'type': 'loss', 'content': 0.06279601156711578, 'timestamp': '2025-10-02 00:21:21.280507', 'step': 6034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:21.335937', 'step': 6034, 'epoch': 1}
{'type': 'loss', 'content': 0.07793601602315903, 'timestamp': '2025-10-02 00:21:21.341601', 'step': 6035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:21.396497', 'step': 6035, 'epoch': 1}
{'type': 'loss', 'content': 0.1964021921157837, 'timestamp': '2025-10-02 00:21:21.402708', 'step': 6036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:21.455978', 'step': 6036, 'epoch': 1}
{'type': 'loss', 'content': 0.09649962931871414, 'timestamp': '2025-10-02 00:21:21.458237', 'step': 6037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:21.512361', 'step': 6037, 'epoch': 1}
{'type': 'loss', 'content': 0.1367717981338501, 'timestamp': '2025-10-02 00:21:21.514539', 'step': 6038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:21.576810', 'step': 6038, 'epoch': 1}
{'type': 'loss', 'content': 0.04434487223625183, 'timestamp': '2025-10-02 00:21:21.587323', 'step': 6039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:21.641332', 'step': 6039, 'epoch': 1}
{'type': 'loss', 'content': 0.20837244391441345, 'timestamp': '2025-10-02 00:21:21.647037', 'step': 6040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:21.700283', 'step': 6040, 'epoch': 1}
{'type': 'loss', 'content': 0.12165981531143188, 'timestamp': '2025-10-02 00:21:21.702663', 'step': 6041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:21.756340', 'step': 6041, 'epoch': 1}
{'type': 'loss', 'content': 0.15728770196437836, 'timestamp': '2025-10-02 00:21:21.758692', 'step': 6042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:21.812784', 'step': 6042, 'epoch': 1}
{'type': 'loss', 'content': 0.12546779215335846, 'timestamp': '2025-10-02 00:21:21.814803', 'step': 6043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:21.873290', 'step': 6043, 'epoch': 1}
{'type': 'loss', 'content': 0.03115495666861534, 'timestamp': '2025-10-02 00:21:21.884315', 'step': 6044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:21.938577', 'step': 6044, 'epoch': 1}
{'type': 'loss', 'content': 0.09622780233621597, 'timestamp': '2025-10-02 00:21:21.945927', 'step': 6045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:21:22.000250', 'step': 6045, 'epoch': 1}
{'type': 'loss', 'content': 0.18197351694107056, 'timestamp': '2025-10-02 00:21:22.002613', 'step': 6046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:22.056411', 'step': 6046, 'epoch': 1}
{'type': 'loss', 'content': 0.23211443424224854, 'timestamp': '2025-10-02 00:21:22.058483', 'step': 6047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:22.112613', 'step': 6047, 'epoch': 1}
{'type': 'loss', 'content': 0.11928228288888931, 'timestamp': '2025-10-02 00:21:22.118477', 'step': 6048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:22.171431', 'step': 6048, 'epoch': 1}
{'type': 'loss', 'content': 0.12543262541294098, 'timestamp': '2025-10-02 00:21:22.175225', 'step': 6049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:22.230629', 'step': 6049, 'epoch': 1}
{'type': 'loss', 'content': 0.03014395758509636, 'timestamp': '2025-10-02 00:21:22.236788', 'step': 6050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:22.292349', 'step': 6050, 'epoch': 1}
{'type': 'loss', 'content': 0.043171476572752, 'timestamp': '2025-10-02 00:21:22.294505', 'step': 6051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:22.348494', 'step': 6051, 'epoch': 1}
{'type': 'loss', 'content': 0.05736216530203819, 'timestamp': '2025-10-02 00:21:22.355330', 'step': 6052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:21:22.417330', 'step': 6052, 'epoch': 1}
{'type': 'loss', 'content': 0.009951191954314709, 'timestamp': '2025-10-02 00:21:22.429166', 'step': 6053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:22.484424', 'step': 6053, 'epoch': 1}
{'type': 'loss', 'content': 0.07373224198818207, 'timestamp': '2025-10-02 00:21:22.486653', 'step': 6054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:22.541513', 'step': 6054, 'epoch': 1}
{'type': 'loss', 'content': 0.12845832109451294, 'timestamp': '2025-10-02 00:21:22.544477', 'step': 6055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:22.598746', 'step': 6055, 'epoch': 1}
{'type': 'loss', 'content': 0.1567259132862091, 'timestamp': '2025-10-02 00:21:22.604587', 'step': 6056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:22.658366', 'step': 6056, 'epoch': 1}
{'type': 'loss', 'content': 0.024722186848521233, 'timestamp': '2025-10-02 00:21:22.660671', 'step': 6057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:22.714593', 'step': 6057, 'epoch': 1}
{'type': 'loss', 'content': 0.14634844660758972, 'timestamp': '2025-10-02 00:21:22.716962', 'step': 6058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:22.771244', 'step': 6058, 'epoch': 1}
{'type': 'loss', 'content': 0.13787846267223358, 'timestamp': '2025-10-02 00:21:22.773290', 'step': 6059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:22.826932', 'step': 6059, 'epoch': 1}
{'type': 'loss', 'content': 0.0773082748055458, 'timestamp': '2025-10-02 00:21:22.832873', 'step': 6060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:21:22.893661', 'step': 6060, 'epoch': 1}
{'type': 'loss', 'content': 0.03789251670241356, 'timestamp': '2025-10-02 00:21:22.905427', 'step': 6061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:22.961251', 'step': 6061, 'epoch': 1}
{'type': 'loss', 'content': 0.04293356463313103, 'timestamp': '2025-10-02 00:21:22.970800', 'step': 6062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:23.026144', 'step': 6062, 'epoch': 1}
{'type': 'loss', 'content': 0.23350650072097778, 'timestamp': '2025-10-02 00:21:23.028508', 'step': 6063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:23.083783', 'step': 6063, 'epoch': 1}
{'type': 'loss', 'content': 0.02418138086795807, 'timestamp': '2025-10-02 00:21:23.093946', 'step': 6064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:23.147961', 'step': 6064, 'epoch': 1}
{'type': 'loss', 'content': 0.1499059796333313, 'timestamp': '2025-10-02 00:21:23.150143', 'step': 6065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:23.205344', 'step': 6065, 'epoch': 1}
{'type': 'loss', 'content': 0.10293857008218765, 'timestamp': '2025-10-02 00:21:23.207755', 'step': 6066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:23.263093', 'step': 6066, 'epoch': 1}
{'type': 'loss', 'content': 0.08696015179157257, 'timestamp': '2025-10-02 00:21:23.272610', 'step': 6067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:23.326325', 'step': 6067, 'epoch': 1}
{'type': 'loss', 'content': 0.1857805699110031, 'timestamp': '2025-10-02 00:21:23.332543', 'step': 6068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:23.385894', 'step': 6068, 'epoch': 1}
{'type': 'loss', 'content': 0.19608621299266815, 'timestamp': '2025-10-02 00:21:23.388159', 'step': 6069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:23.449224', 'step': 6069, 'epoch': 1}
{'type': 'loss', 'content': 0.053430672734975815, 'timestamp': '2025-10-02 00:21:23.459679', 'step': 6070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:23.515489', 'step': 6070, 'epoch': 1}
{'type': 'loss', 'content': 0.0496351383626461, 'timestamp': '2025-10-02 00:21:23.522925', 'step': 6071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:23.577070', 'step': 6071, 'epoch': 1}
{'type': 'loss', 'content': 0.28262457251548767, 'timestamp': '2025-10-02 00:21:23.582857', 'step': 6072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:23.643212', 'step': 6072, 'epoch': 1}
{'type': 'loss', 'content': 0.1160992681980133, 'timestamp': '2025-10-02 00:21:23.654727', 'step': 6073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:23.710857', 'step': 6073, 'epoch': 1}
{'type': 'loss', 'content': 0.08474206924438477, 'timestamp': '2025-10-02 00:21:23.713301', 'step': 6074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:21:23.787288', 'step': 6074, 'epoch': 1}
{'type': 'loss', 'content': 0.005634000524878502, 'timestamp': '2025-10-02 00:21:23.800523', 'step': 6075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:23.855938', 'step': 6075, 'epoch': 1}
{'type': 'loss', 'content': 0.018683550879359245, 'timestamp': '2025-10-02 00:21:23.866284', 'step': 6076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:23.920047', 'step': 6076, 'epoch': 1}
{'type': 'loss', 'content': 0.1748945415019989, 'timestamp': '2025-10-02 00:21:23.922329', 'step': 6077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:23.985132', 'step': 6077, 'epoch': 1}
{'type': 'loss', 'content': 0.02022460848093033, 'timestamp': '2025-10-02 00:21:23.995791', 'step': 6078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:24.051671', 'step': 6078, 'epoch': 1}
{'type': 'loss', 'content': 0.10483349859714508, 'timestamp': '2025-10-02 00:21:24.057274', 'step': 6079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:24.112376', 'step': 6079, 'epoch': 1}
{'type': 'loss', 'content': 0.11838244646787643, 'timestamp': '2025-10-02 00:21:24.119035', 'step': 6080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:24.172370', 'step': 6080, 'epoch': 1}
{'type': 'loss', 'content': 0.0688982829451561, 'timestamp': '2025-10-02 00:21:24.174721', 'step': 6081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:24.230012', 'step': 6081, 'epoch': 1}
{'type': 'loss', 'content': 0.02652362361550331, 'timestamp': '2025-10-02 00:21:24.239561', 'step': 6082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:24.294223', 'step': 6082, 'epoch': 1}
{'type': 'loss', 'content': 0.08418911695480347, 'timestamp': '2025-10-02 00:21:24.299927', 'step': 6083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:24.355248', 'step': 6083, 'epoch': 1}
{'type': 'loss', 'content': 0.0447482168674469, 'timestamp': '2025-10-02 00:21:24.361087', 'step': 6084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:24.423361', 'step': 6084, 'epoch': 1}
{'type': 'loss', 'content': 0.027200788259506226, 'timestamp': '2025-10-02 00:21:24.434730', 'step': 6085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:24.495698', 'step': 6085, 'epoch': 1}
{'type': 'loss', 'content': 0.03727211430668831, 'timestamp': '2025-10-02 00:21:24.506193', 'step': 6086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:21:24.560340', 'step': 6086, 'epoch': 1}
{'type': 'loss', 'content': 0.1976483166217804, 'timestamp': '2025-10-02 00:21:24.562662', 'step': 6087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:24.617396', 'step': 6087, 'epoch': 1}
{'type': 'loss', 'content': 0.1110968366265297, 'timestamp': '2025-10-02 00:21:24.622984', 'step': 6088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:21:24.677582', 'step': 6088, 'epoch': 1}
{'type': 'loss', 'content': 0.15704914927482605, 'timestamp': '2025-10-02 00:21:24.680236', 'step': 6089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:24.735069', 'step': 6089, 'epoch': 1}
{'type': 'loss', 'content': 0.052080489695072174, 'timestamp': '2025-10-02 00:21:24.737508', 'step': 6090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:21:24.807606', 'step': 6090, 'epoch': 1}
{'type': 'loss', 'content': 0.03611116483807564, 'timestamp': '2025-10-02 00:21:24.819913', 'step': 6091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:24.874524', 'step': 6091, 'epoch': 1}
{'type': 'loss', 'content': 0.14159445464611053, 'timestamp': '2025-10-02 00:21:24.880860', 'step': 6092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:24.934706', 'step': 6092, 'epoch': 1}
{'type': 'loss', 'content': 0.12325002998113632, 'timestamp': '2025-10-02 00:21:24.937016', 'step': 6093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:24.992725', 'step': 6093, 'epoch': 1}
{'type': 'loss', 'content': 0.059175051748752594, 'timestamp': '2025-10-02 00:21:24.994830', 'step': 6094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:25.055815', 'step': 6094, 'epoch': 1}
{'type': 'loss', 'content': 0.06827324628829956, 'timestamp': '2025-10-02 00:21:25.066331', 'step': 6095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:25.121058', 'step': 6095, 'epoch': 1}
{'type': 'loss', 'content': 0.04868806526064873, 'timestamp': '2025-10-02 00:21:25.126751', 'step': 6096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:25.181461', 'step': 6096, 'epoch': 1}
{'type': 'loss', 'content': 0.14071275293827057, 'timestamp': '2025-10-02 00:21:25.183893', 'step': 6097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:25.239600', 'step': 6097, 'epoch': 1}
{'type': 'loss', 'content': 0.15651512145996094, 'timestamp': '2025-10-02 00:21:25.242437', 'step': 6098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:25.301724', 'step': 6098, 'epoch': 1}
{'type': 'loss', 'content': 0.13990679383277893, 'timestamp': '2025-10-02 00:21:25.303768', 'step': 6099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:25.359802', 'step': 6099, 'epoch': 1}
{'type': 'loss', 'content': 0.014186657033860683, 'timestamp': '2025-10-02 00:21:25.366104', 'step': 6100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:25.420372', 'step': 6100, 'epoch': 1}
{'type': 'loss', 'content': 0.04196299985051155, 'timestamp': '2025-10-02 00:21:25.422519', 'step': 6101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:25.476071', 'step': 6101, 'epoch': 1}
{'type': 'loss', 'content': 0.19260266423225403, 'timestamp': '2025-10-02 00:21:25.478149', 'step': 6102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:25.533796', 'step': 6102, 'epoch': 1}
{'type': 'loss', 'content': 0.08071319013834, 'timestamp': '2025-10-02 00:21:25.535792', 'step': 6103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:25.589681', 'step': 6103, 'epoch': 1}
{'type': 'loss', 'content': 0.11332300305366516, 'timestamp': '2025-10-02 00:21:25.596100', 'step': 6104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:25.650058', 'step': 6104, 'epoch': 1}
{'type': 'loss', 'content': 0.12719056010246277, 'timestamp': '2025-10-02 00:21:25.652412', 'step': 6105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:25.707910', 'step': 6105, 'epoch': 1}
{'type': 'loss', 'content': 0.04639728367328644, 'timestamp': '2025-10-02 00:21:25.717425', 'step': 6106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:21:25.778640', 'step': 6106, 'epoch': 1}
{'type': 'loss', 'content': 0.17693938314914703, 'timestamp': '2025-10-02 00:21:25.781090', 'step': 6107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:25.836641', 'step': 6107, 'epoch': 1}
{'type': 'loss', 'content': 0.15215671062469482, 'timestamp': '2025-10-02 00:21:25.842787', 'step': 6108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:25.900874', 'step': 6108, 'epoch': 1}
{'type': 'loss', 'content': 0.017450343817472458, 'timestamp': '2025-10-02 00:21:25.911897', 'step': 6109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:25.966594', 'step': 6109, 'epoch': 1}
{'type': 'loss', 'content': 0.045232854783535004, 'timestamp': '2025-10-02 00:21:25.972333', 'step': 6110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:26.030897', 'step': 6110, 'epoch': 1}
{'type': 'loss', 'content': 0.04932336136698723, 'timestamp': '2025-10-02 00:21:26.041120', 'step': 6111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:26.096820', 'step': 6111, 'epoch': 1}
{'type': 'loss', 'content': 0.11271704733371735, 'timestamp': '2025-10-02 00:21:26.102779', 'step': 6112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:26.156740', 'step': 6112, 'epoch': 1}
{'type': 'loss', 'content': 0.06785929203033447, 'timestamp': '2025-10-02 00:21:26.162693', 'step': 6113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:26.221778', 'step': 6113, 'epoch': 1}
{'type': 'loss', 'content': 0.011409939266741276, 'timestamp': '2025-10-02 00:21:26.231990', 'step': 6114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:26.287036', 'step': 6114, 'epoch': 1}
{'type': 'loss', 'content': 0.07732818275690079, 'timestamp': '2025-10-02 00:21:26.289227', 'step': 6115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:26.344135', 'step': 6115, 'epoch': 1}
{'type': 'loss', 'content': 0.0687531903386116, 'timestamp': '2025-10-02 00:21:26.352253', 'step': 6116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:26.406123', 'step': 6116, 'epoch': 1}
{'type': 'loss', 'content': 0.08575527369976044, 'timestamp': '2025-10-02 00:21:26.408536', 'step': 6117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:26.467332', 'step': 6117, 'epoch': 1}
{'type': 'loss', 'content': 0.0564066581428051, 'timestamp': '2025-10-02 00:21:26.477518', 'step': 6118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:26.532934', 'step': 6118, 'epoch': 1}
{'type': 'loss', 'content': 0.07123124599456787, 'timestamp': '2025-10-02 00:21:26.538774', 'step': 6119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:26.594299', 'step': 6119, 'epoch': 1}
{'type': 'loss', 'content': 0.12673097848892212, 'timestamp': '2025-10-02 00:21:26.602429', 'step': 6120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:26.656567', 'step': 6120, 'epoch': 1}
{'type': 'loss', 'content': 0.10967648029327393, 'timestamp': '2025-10-02 00:21:26.658798', 'step': 6121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:26.712956', 'step': 6121, 'epoch': 1}
{'type': 'loss', 'content': 0.17926636338233948, 'timestamp': '2025-10-02 00:21:26.715227', 'step': 6122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:26.774088', 'step': 6122, 'epoch': 1}
{'type': 'loss', 'content': 0.03740960359573364, 'timestamp': '2025-10-02 00:21:26.784260', 'step': 6123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:26.838757', 'step': 6123, 'epoch': 1}
{'type': 'loss', 'content': 0.06759844720363617, 'timestamp': '2025-10-02 00:21:26.844801', 'step': 6124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:26.898805', 'step': 6124, 'epoch': 1}
{'type': 'loss', 'content': 0.07681140303611755, 'timestamp': '2025-10-02 00:21:26.905452', 'step': 6125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:26.960883', 'step': 6125, 'epoch': 1}
{'type': 'loss', 'content': 0.10796888917684555, 'timestamp': '2025-10-02 00:21:26.964550', 'step': 6126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:27.019723', 'step': 6126, 'epoch': 1}
{'type': 'loss', 'content': 0.07404082268476486, 'timestamp': '2025-10-02 00:21:27.027009', 'step': 6127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:27.081819', 'step': 6127, 'epoch': 1}
{'type': 'loss', 'content': 0.020858293399214745, 'timestamp': '2025-10-02 00:21:27.087707', 'step': 6128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:27.141499', 'step': 6128, 'epoch': 1}
{'type': 'loss', 'content': 0.07944091409444809, 'timestamp': '2025-10-02 00:21:27.143584', 'step': 6129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:27.199119', 'step': 6129, 'epoch': 1}
{'type': 'loss', 'content': 0.01722540706396103, 'timestamp': '2025-10-02 00:21:27.208428', 'step': 6130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:27.263319', 'step': 6130, 'epoch': 1}
{'type': 'loss', 'content': 0.13937155902385712, 'timestamp': '2025-10-02 00:21:27.265449', 'step': 6131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:27.320179', 'step': 6131, 'epoch': 1}
{'type': 'loss', 'content': 0.06894992291927338, 'timestamp': '2025-10-02 00:21:27.326019', 'step': 6132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:27.380641', 'step': 6132, 'epoch': 1}
{'type': 'loss', 'content': 0.05714041739702225, 'timestamp': '2025-10-02 00:21:27.392458', 'step': 6133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:27.448042', 'step': 6133, 'epoch': 1}
{'type': 'loss', 'content': 0.12719561159610748, 'timestamp': '2025-10-02 00:21:27.450276', 'step': 6134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:27.504657', 'step': 6134, 'epoch': 1}
{'type': 'loss', 'content': 0.13586057722568512, 'timestamp': '2025-10-02 00:21:27.509131', 'step': 6135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:27.577035', 'step': 6135, 'epoch': 1}
{'type': 'loss', 'content': 0.08342178910970688, 'timestamp': '2025-10-02 00:21:27.587164', 'step': 6136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:27.641672', 'step': 6136, 'epoch': 1}
{'type': 'loss', 'content': 0.1759405881166458, 'timestamp': '2025-10-02 00:21:27.643865', 'step': 6137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:27.698304', 'step': 6137, 'epoch': 1}
{'type': 'loss', 'content': 0.10392335802316666, 'timestamp': '2025-10-02 00:21:27.700539', 'step': 6138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:27.754348', 'step': 6138, 'epoch': 1}
{'type': 'loss', 'content': 0.29389044642448425, 'timestamp': '2025-10-02 00:21:27.756659', 'step': 6139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:27.811651', 'step': 6139, 'epoch': 1}
{'type': 'loss', 'content': 0.05526996776461601, 'timestamp': '2025-10-02 00:21:27.817736', 'step': 6140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:27.877173', 'step': 6140, 'epoch': 1}
{'type': 'loss', 'content': 0.09309160709381104, 'timestamp': '2025-10-02 00:21:27.888481', 'step': 6141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:27.952423', 'step': 6141, 'epoch': 1}
{'type': 'loss', 'content': 0.03380683809518814, 'timestamp': '2025-10-02 00:21:27.963105', 'step': 6142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:28.025606', 'step': 6142, 'epoch': 1}
{'type': 'loss', 'content': 0.034930270165205, 'timestamp': '2025-10-02 00:21:28.036219', 'step': 6143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:28.094882', 'step': 6143, 'epoch': 1}
{'type': 'loss', 'content': 0.07031852006912231, 'timestamp': '2025-10-02 00:21:28.101977', 'step': 6144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:28.158588', 'step': 6144, 'epoch': 1}
{'type': 'loss', 'content': 0.04436564818024635, 'timestamp': '2025-10-02 00:21:28.161419', 'step': 6145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:28.217790', 'step': 6145, 'epoch': 1}
{'type': 'loss', 'content': 0.10621348023414612, 'timestamp': '2025-10-02 00:21:28.223722', 'step': 6146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:28.281234', 'step': 6146, 'epoch': 1}
{'type': 'loss', 'content': 0.07744606584310532, 'timestamp': '2025-10-02 00:21:28.284848', 'step': 6147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:28.349159', 'step': 6147, 'epoch': 1}
{'type': 'loss', 'content': 0.07838652282953262, 'timestamp': '2025-10-02 00:21:28.360451', 'step': 6148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:28.415477', 'step': 6148, 'epoch': 1}
{'type': 'loss', 'content': 0.09390907734632492, 'timestamp': '2025-10-02 00:21:28.418603', 'step': 6149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:28.474780', 'step': 6149, 'epoch': 1}
{'type': 'loss', 'content': 0.059646207839250565, 'timestamp': '2025-10-02 00:21:28.477796', 'step': 6150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:28.535071', 'step': 6150, 'epoch': 1}
{'type': 'loss', 'content': 0.11635774374008179, 'timestamp': '2025-10-02 00:21:28.538244', 'step': 6151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:28.595654', 'step': 6151, 'epoch': 1}
{'type': 'loss', 'content': 0.29638874530792236, 'timestamp': '2025-10-02 00:21:28.601796', 'step': 6152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:28.657979', 'step': 6152, 'epoch': 1}
{'type': 'loss', 'content': 0.03147219121456146, 'timestamp': '2025-10-02 00:21:28.668218', 'step': 6153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:28.726343', 'step': 6153, 'epoch': 1}
{'type': 'loss', 'content': 0.028423666954040527, 'timestamp': '2025-10-02 00:21:28.729450', 'step': 6154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:28.788417', 'step': 6154, 'epoch': 1}
{'type': 'loss', 'content': 0.10379506647586823, 'timestamp': '2025-10-02 00:21:28.790705', 'step': 6155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:28.845119', 'step': 6155, 'epoch': 1}
{'type': 'loss', 'content': 0.24305565655231476, 'timestamp': '2025-10-02 00:21:28.854119', 'step': 6156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:28.910310', 'step': 6156, 'epoch': 1}
{'type': 'loss', 'content': 0.06512647867202759, 'timestamp': '2025-10-02 00:21:28.919900', 'step': 6157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:28.977413', 'step': 6157, 'epoch': 1}
{'type': 'loss', 'content': 0.11244910955429077, 'timestamp': '2025-10-02 00:21:28.980231', 'step': 6158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:29.038280', 'step': 6158, 'epoch': 1}
{'type': 'loss', 'content': 0.18774494528770447, 'timestamp': '2025-10-02 00:21:29.040930', 'step': 6159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:29.097833', 'step': 6159, 'epoch': 1}
{'type': 'loss', 'content': 0.06533300876617432, 'timestamp': '2025-10-02 00:21:29.104882', 'step': 6160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:29.161139', 'step': 6160, 'epoch': 1}
{'type': 'loss', 'content': 0.05960801988840103, 'timestamp': '2025-10-02 00:21:29.166859', 'step': 6161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:29.224379', 'step': 6161, 'epoch': 1}
{'type': 'loss', 'content': 0.048041511327028275, 'timestamp': '2025-10-02 00:21:29.227906', 'step': 6162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:29.291134', 'step': 6162, 'epoch': 1}
{'type': 'loss', 'content': 0.02022107131779194, 'timestamp': '2025-10-02 00:21:29.301336', 'step': 6163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:29.365411', 'step': 6163, 'epoch': 1}
{'type': 'loss', 'content': 0.07828128337860107, 'timestamp': '2025-10-02 00:21:29.376700', 'step': 6164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:29.435009', 'step': 6164, 'epoch': 1}
{'type': 'loss', 'content': 0.1655024290084839, 'timestamp': '2025-10-02 00:21:29.438596', 'step': 6165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:29.496818', 'step': 6165, 'epoch': 1}
{'type': 'loss', 'content': 0.07686483860015869, 'timestamp': '2025-10-02 00:21:29.499539', 'step': 6166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:29.556702', 'step': 6166, 'epoch': 1}
{'type': 'loss', 'content': 0.06548267602920532, 'timestamp': '2025-10-02 00:21:29.559734', 'step': 6167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:29.617902', 'step': 6167, 'epoch': 1}
{'type': 'loss', 'content': 0.09289795160293579, 'timestamp': '2025-10-02 00:21:29.624359', 'step': 6168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:29.679576', 'step': 6168, 'epoch': 1}
{'type': 'loss', 'content': 0.07626932114362717, 'timestamp': '2025-10-02 00:21:29.682630', 'step': 6169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:29.745564', 'step': 6169, 'epoch': 1}
{'type': 'loss', 'content': 0.053555406630039215, 'timestamp': '2025-10-02 00:21:29.755999', 'step': 6170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:29.820333', 'step': 6170, 'epoch': 1}
{'type': 'loss', 'content': 0.04275188595056534, 'timestamp': '2025-10-02 00:21:29.830960', 'step': 6171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:29.888824', 'step': 6171, 'epoch': 1}
{'type': 'loss', 'content': 0.04488370940089226, 'timestamp': '2025-10-02 00:21:29.897043', 'step': 6172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:29.953950', 'step': 6172, 'epoch': 1}
{'type': 'loss', 'content': 0.026196936145424843, 'timestamp': '2025-10-02 00:21:29.956688', 'step': 6173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:30.012596', 'step': 6173, 'epoch': 1}
{'type': 'loss', 'content': 0.13738739490509033, 'timestamp': '2025-10-02 00:21:30.015561', 'step': 6174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:30.074170', 'step': 6174, 'epoch': 1}
{'type': 'loss', 'content': 0.04676266387104988, 'timestamp': '2025-10-02 00:21:30.076630', 'step': 6175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:30.141595', 'step': 6175, 'epoch': 1}
{'type': 'loss', 'content': 0.039572715759277344, 'timestamp': '2025-10-02 00:21:30.153000', 'step': 6176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:30.208414', 'step': 6176, 'epoch': 1}
{'type': 'loss', 'content': 0.04456597566604614, 'timestamp': '2025-10-02 00:21:30.210596', 'step': 6177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:30.265979', 'step': 6177, 'epoch': 1}
{'type': 'loss', 'content': 0.05061326548457146, 'timestamp': '2025-10-02 00:21:30.268283', 'step': 6178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:30.322656', 'step': 6178, 'epoch': 1}
{'type': 'loss', 'content': 0.1483084112405777, 'timestamp': '2025-10-02 00:21:30.327792', 'step': 6179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:30.390447', 'step': 6179, 'epoch': 1}
{'type': 'loss', 'content': 0.06292196363210678, 'timestamp': '2025-10-02 00:21:30.401900', 'step': 6180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:30.469187', 'step': 6180, 'epoch': 1}
{'type': 'loss', 'content': 0.10916007310152054, 'timestamp': '2025-10-02 00:21:30.471399', 'step': 6181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:30.525634', 'step': 6181, 'epoch': 1}
{'type': 'loss', 'content': 0.1418980062007904, 'timestamp': '2025-10-02 00:21:30.528448', 'step': 6182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:30.586961', 'step': 6182, 'epoch': 1}
{'type': 'loss', 'content': 0.15853996574878693, 'timestamp': '2025-10-02 00:21:30.589106', 'step': 6183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:30.645084', 'step': 6183, 'epoch': 1}
{'type': 'loss', 'content': 0.02938232757151127, 'timestamp': '2025-10-02 00:21:30.664389', 'step': 6184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:30.733210', 'step': 6184, 'epoch': 1}
{'type': 'loss', 'content': 0.17028340697288513, 'timestamp': '2025-10-02 00:21:30.735229', 'step': 6185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:30.788948', 'step': 6185, 'epoch': 1}
{'type': 'loss', 'content': 0.1583385318517685, 'timestamp': '2025-10-02 00:21:30.791166', 'step': 6186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:21:30.849356', 'step': 6186, 'epoch': 1}
{'type': 'loss', 'content': 0.06358247250318527, 'timestamp': '2025-10-02 00:21:30.858559', 'step': 6187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:30.913135', 'step': 6187, 'epoch': 1}
{'type': 'loss', 'content': 0.07677733153104782, 'timestamp': '2025-10-02 00:21:30.921388', 'step': 6188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:30.975458', 'step': 6188, 'epoch': 1}
{'type': 'loss', 'content': 0.1287882775068283, 'timestamp': '2025-10-02 00:21:30.983109', 'step': 6189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:31.042532', 'step': 6189, 'epoch': 1}
{'type': 'loss', 'content': 0.09608355164527893, 'timestamp': '2025-10-02 00:21:31.044924', 'step': 6190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:31.099594', 'step': 6190, 'epoch': 1}
{'type': 'loss', 'content': 0.20485460758209229, 'timestamp': '2025-10-02 00:21:31.101922', 'step': 6191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:31.156930', 'step': 6191, 'epoch': 1}
{'type': 'loss', 'content': 0.05940735340118408, 'timestamp': '2025-10-02 00:21:31.165239', 'step': 6192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:21:31.221259', 'step': 6192, 'epoch': 1}
{'type': 'loss', 'content': 0.1438784897327423, 'timestamp': '2025-10-02 00:21:31.223688', 'step': 6193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:21:31.286283', 'step': 6193, 'epoch': 1}
{'type': 'loss', 'content': 0.036030013114213943, 'timestamp': '2025-10-02 00:21:31.296915', 'step': 6194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:31.362455', 'step': 6194, 'epoch': 1}
{'type': 'loss', 'content': 0.053294941782951355, 'timestamp': '2025-10-02 00:21:31.372977', 'step': 6195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:31.447492', 'step': 6195, 'epoch': 1}
{'type': 'loss', 'content': 0.0744597390294075, 'timestamp': '2025-10-02 00:21:31.454201', 'step': 6196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:31.507943', 'step': 6196, 'epoch': 1}
{'type': 'loss', 'content': 0.1950417011976242, 'timestamp': '2025-10-02 00:21:31.510252', 'step': 6197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:31.564346', 'step': 6197, 'epoch': 1}
{'type': 'loss', 'content': 0.11010399460792542, 'timestamp': '2025-10-02 00:21:31.566607', 'step': 6198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:31.627767', 'step': 6198, 'epoch': 1}
{'type': 'loss', 'content': 0.05058308318257332, 'timestamp': '2025-10-02 00:21:31.638264', 'step': 6199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:31.692936', 'step': 6199, 'epoch': 1}
{'type': 'loss', 'content': 0.20922306180000305, 'timestamp': '2025-10-02 00:21:31.698806', 'step': 6200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:31.752534', 'step': 6200, 'epoch': 1}
{'type': 'loss', 'content': 0.053985241800546646, 'timestamp': '2025-10-02 00:21:31.762758', 'step': 6201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:31.830011', 'step': 6201, 'epoch': 1}
{'type': 'loss', 'content': 0.10574664175510406, 'timestamp': '2025-10-02 00:21:31.832568', 'step': 6202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:31.887674', 'step': 6202, 'epoch': 1}
{'type': 'loss', 'content': 0.12271702289581299, 'timestamp': '2025-10-02 00:21:31.893506', 'step': 6203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:31.948057', 'step': 6203, 'epoch': 1}
{'type': 'loss', 'content': 0.11155983060598373, 'timestamp': '2025-10-02 00:21:31.953754', 'step': 6204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:32.007380', 'step': 6204, 'epoch': 1}
{'type': 'loss', 'content': 0.058785151690244675, 'timestamp': '2025-10-02 00:21:32.014766', 'step': 6205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:32.068909', 'step': 6205, 'epoch': 1}
{'type': 'loss', 'content': 0.1057804524898529, 'timestamp': '2025-10-02 00:21:32.071230', 'step': 6206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:32.125325', 'step': 6206, 'epoch': 1}
{'type': 'loss', 'content': 0.09682653844356537, 'timestamp': '2025-10-02 00:21:32.127803', 'step': 6207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:21:32.181462', 'step': 6207, 'epoch': 1}
{'type': 'loss', 'content': 0.10600448399782181, 'timestamp': '2025-10-02 00:21:32.187545', 'step': 6208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:21:32.241805', 'step': 6208, 'epoch': 1}
{'type': 'loss', 'content': 0.1723821610212326, 'timestamp': '2025-10-02 00:21:32.244704', 'step': 6209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:32.300036', 'step': 6209, 'epoch': 1}
{'type': 'loss', 'content': 0.10065416246652603, 'timestamp': '2025-10-02 00:21:32.305844', 'step': 6210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:32.371393', 'step': 6210, 'epoch': 1}
{'type': 'loss', 'content': 0.17863653600215912, 'timestamp': '2025-10-02 00:21:32.373830', 'step': 6211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:32.427714', 'step': 6211, 'epoch': 1}
{'type': 'loss', 'content': 0.16004721820354462, 'timestamp': '2025-10-02 00:21:32.433656', 'step': 6212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:32.494475', 'step': 6212, 'epoch': 1}
{'type': 'loss', 'content': 0.04228784143924713, 'timestamp': '2025-10-02 00:21:32.505818', 'step': 6213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:32.560060', 'step': 6213, 'epoch': 1}
{'type': 'loss', 'content': 0.08324640244245529, 'timestamp': '2025-10-02 00:21:32.562382', 'step': 6214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:32.616693', 'step': 6214, 'epoch': 1}
{'type': 'loss', 'content': 0.049271468073129654, 'timestamp': '2025-10-02 00:21:32.619014', 'step': 6215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:32.674157', 'step': 6215, 'epoch': 1}
{'type': 'loss', 'content': 0.03417600318789482, 'timestamp': '2025-10-02 00:21:32.684445', 'step': 6216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:32.738908', 'step': 6216, 'epoch': 1}
{'type': 'loss', 'content': 0.08628888428211212, 'timestamp': '2025-10-02 00:21:32.744833', 'step': 6217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:32.799281', 'step': 6217, 'epoch': 1}
{'type': 'loss', 'content': 0.07161616533994675, 'timestamp': '2025-10-02 00:21:32.805080', 'step': 6218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:32.859941', 'step': 6218, 'epoch': 1}
{'type': 'loss', 'content': 0.12285768985748291, 'timestamp': '2025-10-02 00:21:32.862127', 'step': 6219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:21:32.916600', 'step': 6219, 'epoch': 1}
{'type': 'loss', 'content': 0.0889044851064682, 'timestamp': '2025-10-02 00:21:32.922172', 'step': 6220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:32.982264', 'step': 6220, 'epoch': 1}
{'type': 'loss', 'content': 0.0398186594247818, 'timestamp': '2025-10-02 00:21:32.993580', 'step': 6221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:33.047832', 'step': 6221, 'epoch': 1}
{'type': 'loss', 'content': 0.07784698158502579, 'timestamp': '2025-10-02 00:21:33.055336', 'step': 6222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:33.110096', 'step': 6222, 'epoch': 1}
{'type': 'loss', 'content': 0.02732912264764309, 'timestamp': '2025-10-02 00:21:33.115691', 'step': 6223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:33.170093', 'step': 6223, 'epoch': 1}
{'type': 'loss', 'content': 0.09796242415904999, 'timestamp': '2025-10-02 00:21:33.176014', 'step': 6224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:33.230489', 'step': 6224, 'epoch': 1}
{'type': 'loss', 'content': 0.05174502730369568, 'timestamp': '2025-10-02 00:21:33.237974', 'step': 6225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:33.296140', 'step': 6225, 'epoch': 1}
{'type': 'loss', 'content': 0.06267814338207245, 'timestamp': '2025-10-02 00:21:33.298524', 'step': 6226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:33.353200', 'step': 6226, 'epoch': 1}
{'type': 'loss', 'content': 0.10233521461486816, 'timestamp': '2025-10-02 00:21:33.355475', 'step': 6227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:33.409017', 'step': 6227, 'epoch': 1}
{'type': 'loss', 'content': 0.1879773736000061, 'timestamp': '2025-10-02 00:21:33.414833', 'step': 6228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:33.468870', 'step': 6228, 'epoch': 1}
{'type': 'loss', 'content': 0.1298375278711319, 'timestamp': '2025-10-02 00:21:33.471071', 'step': 6229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:33.525342', 'step': 6229, 'epoch': 1}
{'type': 'loss', 'content': 0.08319021016359329, 'timestamp': '2025-10-02 00:21:33.531128', 'step': 6230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:33.587461', 'step': 6230, 'epoch': 1}
{'type': 'loss', 'content': 0.22479937970638275, 'timestamp': '2025-10-02 00:21:33.589769', 'step': 6231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:33.645933', 'step': 6231, 'epoch': 1}
{'type': 'loss', 'content': 0.059119366109371185, 'timestamp': '2025-10-02 00:21:33.656266', 'step': 6232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:33.710288', 'step': 6232, 'epoch': 1}
{'type': 'loss', 'content': 0.08110478520393372, 'timestamp': '2025-10-02 00:21:33.712547', 'step': 6233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:33.773690', 'step': 6233, 'epoch': 1}
{'type': 'loss', 'content': 0.03701265528798103, 'timestamp': '2025-10-02 00:21:33.784195', 'step': 6234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:33.839001', 'step': 6234, 'epoch': 1}
{'type': 'loss', 'content': 0.08997927606105804, 'timestamp': '2025-10-02 00:21:33.841273', 'step': 6235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:21:33.908272', 'step': 6235, 'epoch': 1}
{'type': 'loss', 'content': 0.0665610060095787, 'timestamp': '2025-10-02 00:21:33.921061', 'step': 6236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:21:33.974964', 'step': 6236, 'epoch': 1}
{'type': 'loss', 'content': 0.07593602687120438, 'timestamp': '2025-10-02 00:21:33.977430', 'step': 6237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:34.032950', 'step': 6237, 'epoch': 1}
{'type': 'loss', 'content': 0.02269543707370758, 'timestamp': '2025-10-02 00:21:34.040524', 'step': 6238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:34.095534', 'step': 6238, 'epoch': 1}
{'type': 'loss', 'content': 0.055533502250909805, 'timestamp': '2025-10-02 00:21:34.102977', 'step': 6239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:34.157674', 'step': 6239, 'epoch': 1}
{'type': 'loss', 'content': 0.22732409834861755, 'timestamp': '2025-10-02 00:21:34.164571', 'step': 6240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:34.218253', 'step': 6240, 'epoch': 1}
{'type': 'loss', 'content': 0.19662418961524963, 'timestamp': '2025-10-02 00:21:34.220410', 'step': 6241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:34.282181', 'step': 6241, 'epoch': 1}
{'type': 'loss', 'content': 0.06689541786909103, 'timestamp': '2025-10-02 00:21:34.292602', 'step': 6242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:34.348409', 'step': 6242, 'epoch': 1}
{'type': 'loss', 'content': 0.038284409791231155, 'timestamp': '2025-10-02 00:21:34.355843', 'step': 6243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:21:34.418577', 'step': 6243, 'epoch': 1}
{'type': 'loss', 'content': 0.032886654138565063, 'timestamp': '2025-10-02 00:21:34.430181', 'step': 6244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:34.483847', 'step': 6244, 'epoch': 1}
{'type': 'loss', 'content': 0.11500360816717148, 'timestamp': '2025-10-02 00:21:34.489740', 'step': 6245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:34.544649', 'step': 6245, 'epoch': 1}
{'type': 'loss', 'content': 0.12165956199169159, 'timestamp': '2025-10-02 00:21:34.546995', 'step': 6246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:34.603224', 'step': 6246, 'epoch': 1}
{'type': 'loss', 'content': 0.06138304993510246, 'timestamp': '2025-10-02 00:21:34.605479', 'step': 6247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:21:34.659504', 'step': 6247, 'epoch': 1}
{'type': 'loss', 'content': 0.058130569756031036, 'timestamp': '2025-10-02 00:21:34.665388', 'step': 6248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:34.719130', 'step': 6248, 'epoch': 1}
{'type': 'loss', 'content': 0.0631219670176506, 'timestamp': '2025-10-02 00:21:34.725115', 'step': 6249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:34.779651', 'step': 6249, 'epoch': 1}
{'type': 'loss', 'content': 0.04114023596048355, 'timestamp': '2025-10-02 00:21:34.785336', 'step': 6250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:34.840731', 'step': 6250, 'epoch': 1}
{'type': 'loss', 'content': 0.12367556989192963, 'timestamp': '2025-10-02 00:21:34.850217', 'step': 6251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:21:34.905955', 'step': 6251, 'epoch': 1}
{'type': 'loss', 'content': 0.12074271589517593, 'timestamp': '2025-10-02 00:21:34.916333', 'step': 6252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:21:34.982605', 'step': 6252, 'epoch': 1}
{'type': 'loss', 'content': 0.053990621119737625, 'timestamp': '2025-10-02 00:21:34.993615', 'step': 6253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:21:35.049585', 'step': 6253, 'epoch': 1}
{'type': 'loss', 'content': 0.07971435785293579, 'timestamp': '2025-10-02 00:21:35.051768', 'step': 6254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:21:35.114158', 'step': 6254, 'epoch': 1}
{'type': 'loss', 'content': 0.009152079001069069, 'timestamp': '2025-10-02 00:21:35.124637', 'step': 6255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:21:35.179481', 'step': 6255, 'epoch': 1}
{'type': 'loss', 'content': 0.07568579167127609, 'timestamp': '2025-10-02 00:21:35.185077', 'step': 6256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:35.238400', 'step': 6256, 'epoch': 1}
{'type': 'loss', 'content': 0.19594354927539825, 'timestamp': '2025-10-02 00:21:35.240453', 'step': 6257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:21:35.295167', 'step': 6257, 'epoch': 1}
{'type': 'loss', 'content': 0.11723920702934265, 'timestamp': '2025-10-02 00:21:35.297424', 'step': 6258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:21:35.359594', 'step': 6258, 'epoch': 1}
{'type': 'loss', 'content': 0.035692304372787476, 'timestamp': '2025-10-02 00:21:35.365279', 'step': 6259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:21:35.419117', 'step': 6259, 'epoch': 1}
{'type': 'loss', 'content': 0.16393782198429108, 'timestamp': '2025-10-02 00:21:35.424878', 'step': 6260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:35.479028', 'step': 6260, 'epoch': 1}
{'type': 'loss', 'content': 0.09597789496183395, 'timestamp': '2025-10-02 00:21:35.486374', 'step': 6261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:21:35.541042', 'step': 6261, 'epoch': 1}
{'type': 'loss', 'content': 0.05679615959525108, 'timestamp': '2025-10-02 00:21:35.548513', 'step': 6262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:21:35.604422', 'step': 6262, 'epoch': 1}
{'type': 'loss', 'content': 0.05030561983585358, 'timestamp': '2025-10-02 00:21:35.606789', 'step': 6263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:21:35.663548', 'step': 6263, 'epoch': 1}
{'type': 'loss', 'content': 0.011443454772233963, 'timestamp': '2025-10-02 00:21:35.673893', 'step': 6264, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:22:02.826428', 'step': 6264, 'epoch': 1}
{'type': 'pplx', 'content': 92.74215359640722, 'timestamp': '2025-10-02 00:22:02.830503', 'step': 6264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:02.885693', 'step': 6264, 'epoch': 1}
{'type': 'loss', 'content': 0.2130887508392334, 'timestamp': '2025-10-02 00:22:02.888046', 'step': 6265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:02.944021', 'step': 6265, 'epoch': 1}
{'type': 'loss', 'content': 0.08794941753149033, 'timestamp': '2025-10-02 00:22:02.952954', 'step': 6266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:03.008599', 'step': 6266, 'epoch': 1}
{'type': 'loss', 'content': 0.09241557866334915, 'timestamp': '2025-10-02 00:22:03.010994', 'step': 6267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:03.067514', 'step': 6267, 'epoch': 1}
{'type': 'loss', 'content': 0.05576435849070549, 'timestamp': '2025-10-02 00:22:03.074480', 'step': 6268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:03.128737', 'step': 6268, 'epoch': 1}
{'type': 'loss', 'content': 0.05116022378206253, 'timestamp': '2025-10-02 00:22:03.131148', 'step': 6269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:03.185347', 'step': 6269, 'epoch': 1}
{'type': 'loss', 'content': 0.053822606801986694, 'timestamp': '2025-10-02 00:22:03.187838', 'step': 6270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:03.242347', 'step': 6270, 'epoch': 1}
{'type': 'loss', 'content': 0.0874205231666565, 'timestamp': '2025-10-02 00:22:03.244903', 'step': 6271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:03.300575', 'step': 6271, 'epoch': 1}
{'type': 'loss', 'content': 0.05776766315102577, 'timestamp': '2025-10-02 00:22:03.307859', 'step': 6272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:03.361151', 'step': 6272, 'epoch': 1}
{'type': 'loss', 'content': 0.06856266409158707, 'timestamp': '2025-10-02 00:22:03.363464', 'step': 6273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:03.416721', 'step': 6273, 'epoch': 1}
{'type': 'loss', 'content': 0.0802551731467247, 'timestamp': '2025-10-02 00:22:03.419701', 'step': 6274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:03.474707', 'step': 6274, 'epoch': 1}
{'type': 'loss', 'content': 0.0945129469037056, 'timestamp': '2025-10-02 00:22:03.476944', 'step': 6275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:03.531140', 'step': 6275, 'epoch': 1}
{'type': 'loss', 'content': 0.22433829307556152, 'timestamp': '2025-10-02 00:22:03.537300', 'step': 6276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:03.590603', 'step': 6276, 'epoch': 1}
{'type': 'loss', 'content': 0.055968303233385086, 'timestamp': '2025-10-02 00:22:03.596814', 'step': 6277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:03.656583', 'step': 6277, 'epoch': 1}
{'type': 'loss', 'content': 0.06567972898483276, 'timestamp': '2025-10-02 00:22:03.667040', 'step': 6278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:03.721263', 'step': 6278, 'epoch': 1}
{'type': 'loss', 'content': 0.09055791050195694, 'timestamp': '2025-10-02 00:22:03.723660', 'step': 6279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:03.777565', 'step': 6279, 'epoch': 1}
{'type': 'loss', 'content': 0.06258545815944672, 'timestamp': '2025-10-02 00:22:03.786170', 'step': 6280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:03.840191', 'step': 6280, 'epoch': 1}
{'type': 'loss', 'content': 0.05818284675478935, 'timestamp': '2025-10-02 00:22:03.846345', 'step': 6281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:03.901338', 'step': 6281, 'epoch': 1}
{'type': 'loss', 'content': 0.06001390144228935, 'timestamp': '2025-10-02 00:22:03.903644', 'step': 6282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:03.958812', 'step': 6282, 'epoch': 1}
{'type': 'loss', 'content': 0.025023430585861206, 'timestamp': '2025-10-02 00:22:03.961420', 'step': 6283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:04.015617', 'step': 6283, 'epoch': 1}
{'type': 'loss', 'content': 0.034945592284202576, 'timestamp': '2025-10-02 00:22:04.022488', 'step': 6284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:04.076348', 'step': 6284, 'epoch': 1}
{'type': 'loss', 'content': 0.1388244926929474, 'timestamp': '2025-10-02 00:22:04.078634', 'step': 6285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:04.133010', 'step': 6285, 'epoch': 1}
{'type': 'loss', 'content': 0.05975763127207756, 'timestamp': '2025-10-02 00:22:04.135267', 'step': 6286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:04.189963', 'step': 6286, 'epoch': 1}
{'type': 'loss', 'content': 0.19007544219493866, 'timestamp': '2025-10-02 00:22:04.192223', 'step': 6287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:04.246056', 'step': 6287, 'epoch': 1}
{'type': 'loss', 'content': 0.207322895526886, 'timestamp': '2025-10-02 00:22:04.251966', 'step': 6288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:04.312596', 'step': 6288, 'epoch': 1}
{'type': 'loss', 'content': 0.06905229389667511, 'timestamp': '2025-10-02 00:22:04.314845', 'step': 6289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:22:04.375565', 'step': 6289, 'epoch': 1}
{'type': 'loss', 'content': 0.11779803037643433, 'timestamp': '2025-10-02 00:22:04.386555', 'step': 6290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:04.441495', 'step': 6290, 'epoch': 1}
{'type': 'loss', 'content': 0.04018871486186981, 'timestamp': '2025-10-02 00:22:04.443812', 'step': 6291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:04.497871', 'step': 6291, 'epoch': 1}
{'type': 'loss', 'content': 0.08900671452283859, 'timestamp': '2025-10-02 00:22:04.504001', 'step': 6292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:04.558816', 'step': 6292, 'epoch': 1}
{'type': 'loss', 'content': 0.04667900502681732, 'timestamp': '2025-10-02 00:22:04.568374', 'step': 6293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:04.622378', 'step': 6293, 'epoch': 1}
{'type': 'loss', 'content': 0.14431245625019073, 'timestamp': '2025-10-02 00:22:04.625000', 'step': 6294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:04.679022', 'step': 6294, 'epoch': 1}
{'type': 'loss', 'content': 0.11465362459421158, 'timestamp': '2025-10-02 00:22:04.681869', 'step': 6295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:04.736763', 'step': 6295, 'epoch': 1}
{'type': 'loss', 'content': 0.0617624931037426, 'timestamp': '2025-10-02 00:22:04.743072', 'step': 6296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:04.797806', 'step': 6296, 'epoch': 1}
{'type': 'loss', 'content': 0.08431260287761688, 'timestamp': '2025-10-02 00:22:04.800220', 'step': 6297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:04.854718', 'step': 6297, 'epoch': 1}
{'type': 'loss', 'content': 0.10235201567411423, 'timestamp': '2025-10-02 00:22:04.857110', 'step': 6298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:04.915331', 'step': 6298, 'epoch': 1}
{'type': 'loss', 'content': 0.07637814432382584, 'timestamp': '2025-10-02 00:22:04.925741', 'step': 6299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:04.980166', 'step': 6299, 'epoch': 1}
{'type': 'loss', 'content': 0.07475370168685913, 'timestamp': '2025-10-02 00:22:04.988689', 'step': 6300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:05.043249', 'step': 6300, 'epoch': 1}
{'type': 'loss', 'content': 0.0809192880988121, 'timestamp': '2025-10-02 00:22:05.045847', 'step': 6301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:05.100117', 'step': 6301, 'epoch': 1}
{'type': 'loss', 'content': 0.10285818576812744, 'timestamp': '2025-10-02 00:22:05.102564', 'step': 6302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:05.157518', 'step': 6302, 'epoch': 1}
{'type': 'loss', 'content': 0.09666929394006729, 'timestamp': '2025-10-02 00:22:05.159946', 'step': 6303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:22:05.221800', 'step': 6303, 'epoch': 1}
{'type': 'loss', 'content': 0.0294216126203537, 'timestamp': '2025-10-02 00:22:05.233523', 'step': 6304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:05.288108', 'step': 6304, 'epoch': 1}
{'type': 'loss', 'content': 0.20565146207809448, 'timestamp': '2025-10-02 00:22:05.290515', 'step': 6305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:05.343748', 'step': 6305, 'epoch': 1}
{'type': 'loss', 'content': 0.08176052570343018, 'timestamp': '2025-10-02 00:22:05.346109', 'step': 6306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:05.399723', 'step': 6306, 'epoch': 1}
{'type': 'loss', 'content': 0.11092599481344223, 'timestamp': '2025-10-02 00:22:05.402335', 'step': 6307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:05.458355', 'step': 6307, 'epoch': 1}
{'type': 'loss', 'content': 0.17606809735298157, 'timestamp': '2025-10-02 00:22:05.464827', 'step': 6308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:05.518124', 'step': 6308, 'epoch': 1}
{'type': 'loss', 'content': 0.1623929888010025, 'timestamp': '2025-10-02 00:22:05.520405', 'step': 6309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:05.574957', 'step': 6309, 'epoch': 1}
{'type': 'loss', 'content': 0.07656744122505188, 'timestamp': '2025-10-02 00:22:05.581174', 'step': 6310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:05.635001', 'step': 6310, 'epoch': 1}
{'type': 'loss', 'content': 0.140367329120636, 'timestamp': '2025-10-02 00:22:05.637532', 'step': 6311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:05.690899', 'step': 6311, 'epoch': 1}
{'type': 'loss', 'content': 0.04130600392818451, 'timestamp': '2025-10-02 00:22:05.696763', 'step': 6312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:05.751431', 'step': 6312, 'epoch': 1}
{'type': 'loss', 'content': 0.08728712797164917, 'timestamp': '2025-10-02 00:22:05.753506', 'step': 6313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:22:05.822005', 'step': 6313, 'epoch': 1}
{'type': 'loss', 'content': 0.04941296577453613, 'timestamp': '2025-10-02 00:22:05.834696', 'step': 6314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:05.888357', 'step': 6314, 'epoch': 1}
{'type': 'loss', 'content': 0.1025681346654892, 'timestamp': '2025-10-02 00:22:05.890913', 'step': 6315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:05.945101', 'step': 6315, 'epoch': 1}
{'type': 'loss', 'content': 0.2024935781955719, 'timestamp': '2025-10-02 00:22:05.951042', 'step': 6316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:06.004459', 'step': 6316, 'epoch': 1}
{'type': 'loss', 'content': 0.11434091627597809, 'timestamp': '2025-10-02 00:22:06.007845', 'step': 6317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:06.063127', 'step': 6317, 'epoch': 1}
{'type': 'loss', 'content': 0.0872170552611351, 'timestamp': '2025-10-02 00:22:06.070949', 'step': 6318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:06.127376', 'step': 6318, 'epoch': 1}
{'type': 'loss', 'content': 0.08556494116783142, 'timestamp': '2025-10-02 00:22:06.129935', 'step': 6319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:06.185294', 'step': 6319, 'epoch': 1}
{'type': 'loss', 'content': 0.18547840416431427, 'timestamp': '2025-10-02 00:22:06.191468', 'step': 6320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:06.246819', 'step': 6320, 'epoch': 1}
{'type': 'loss', 'content': 0.12124450504779816, 'timestamp': '2025-10-02 00:22:06.249135', 'step': 6321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:06.304467', 'step': 6321, 'epoch': 1}
{'type': 'loss', 'content': 0.1787305772304535, 'timestamp': '2025-10-02 00:22:06.307230', 'step': 6322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:06.366557', 'step': 6322, 'epoch': 1}
{'type': 'loss', 'content': 0.1633964478969574, 'timestamp': '2025-10-02 00:22:06.369396', 'step': 6323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:06.424532', 'step': 6323, 'epoch': 1}
{'type': 'loss', 'content': 0.08397550135850906, 'timestamp': '2025-10-02 00:22:06.430743', 'step': 6324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:06.491751', 'step': 6324, 'epoch': 1}
{'type': 'loss', 'content': 0.06484437733888626, 'timestamp': '2025-10-02 00:22:06.503003', 'step': 6325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:06.558345', 'step': 6325, 'epoch': 1}
{'type': 'loss', 'content': 0.05124689266085625, 'timestamp': '2025-10-02 00:22:06.566132', 'step': 6326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:06.642945', 'step': 6326, 'epoch': 1}
{'type': 'loss', 'content': 0.1584450602531433, 'timestamp': '2025-10-02 00:22:06.646635', 'step': 6327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:06.733800', 'step': 6327, 'epoch': 1}
{'type': 'loss', 'content': 0.08249112963676453, 'timestamp': '2025-10-02 00:22:06.751919', 'step': 6328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:06.834775', 'step': 6328, 'epoch': 1}
{'type': 'loss', 'content': 0.1662214696407318, 'timestamp': '2025-10-02 00:22:06.838866', 'step': 6329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:06.922015', 'step': 6329, 'epoch': 1}
{'type': 'loss', 'content': 0.0992591604590416, 'timestamp': '2025-10-02 00:22:06.925233', 'step': 6330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:07.010874', 'step': 6330, 'epoch': 1}
{'type': 'loss', 'content': 0.027950424700975418, 'timestamp': '2025-10-02 00:22:07.026569', 'step': 6331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:07.089239', 'step': 6331, 'epoch': 1}
{'type': 'loss', 'content': 0.15450254082679749, 'timestamp': '2025-10-02 00:22:07.096606', 'step': 6332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:07.160776', 'step': 6332, 'epoch': 1}
{'type': 'loss', 'content': 0.2545510530471802, 'timestamp': '2025-10-02 00:22:07.166908', 'step': 6333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:07.242248', 'step': 6333, 'epoch': 1}
{'type': 'loss', 'content': 0.12419316917657852, 'timestamp': '2025-10-02 00:22:07.250281', 'step': 6334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:07.314858', 'step': 6334, 'epoch': 1}
{'type': 'loss', 'content': 0.06653425842523575, 'timestamp': '2025-10-02 00:22:07.321441', 'step': 6335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:07.377426', 'step': 6335, 'epoch': 1}
{'type': 'loss', 'content': 0.11629685014486313, 'timestamp': '2025-10-02 00:22:07.389844', 'step': 6336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:07.458396', 'step': 6336, 'epoch': 1}
{'type': 'loss', 'content': 0.06871006637811661, 'timestamp': '2025-10-02 00:22:07.467613', 'step': 6337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:07.535090', 'step': 6337, 'epoch': 1}
{'type': 'loss', 'content': 0.1948602944612503, 'timestamp': '2025-10-02 00:22:07.541867', 'step': 6338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:07.616234', 'step': 6338, 'epoch': 1}
{'type': 'loss', 'content': 0.19835533201694489, 'timestamp': '2025-10-02 00:22:07.619424', 'step': 6339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:07.680063', 'step': 6339, 'epoch': 1}
{'type': 'loss', 'content': 0.13717125356197357, 'timestamp': '2025-10-02 00:22:07.691435', 'step': 6340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:07.758558', 'step': 6340, 'epoch': 1}
{'type': 'loss', 'content': 0.06685740500688553, 'timestamp': '2025-10-02 00:22:07.761493', 'step': 6341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:07.829730', 'step': 6341, 'epoch': 1}
{'type': 'loss', 'content': 0.03544560447335243, 'timestamp': '2025-10-02 00:22:07.840228', 'step': 6342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:07.908047', 'step': 6342, 'epoch': 1}
{'type': 'loss', 'content': 0.07673313468694687, 'timestamp': '2025-10-02 00:22:07.912471', 'step': 6343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:07.974541', 'step': 6343, 'epoch': 1}
{'type': 'loss', 'content': 0.030208274722099304, 'timestamp': '2025-10-02 00:22:07.981559', 'step': 6344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:08.037309', 'step': 6344, 'epoch': 1}
{'type': 'loss', 'content': 0.08601295948028564, 'timestamp': '2025-10-02 00:22:08.042133', 'step': 6345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:08.117047', 'step': 6345, 'epoch': 1}
{'type': 'loss', 'content': 0.16991102695465088, 'timestamp': '2025-10-02 00:22:08.122301', 'step': 6346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:08.182699', 'step': 6346, 'epoch': 1}
{'type': 'loss', 'content': 0.14000703394412994, 'timestamp': '2025-10-02 00:22:08.188076', 'step': 6347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:08.255565', 'step': 6347, 'epoch': 1}
{'type': 'loss', 'content': 0.04162515327334404, 'timestamp': '2025-10-02 00:22:08.265313', 'step': 6348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:08.338025', 'step': 6348, 'epoch': 1}
{'type': 'loss', 'content': 0.07357273250818253, 'timestamp': '2025-10-02 00:22:08.347467', 'step': 6349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:08.413812', 'step': 6349, 'epoch': 1}
{'type': 'loss', 'content': 0.12608440220355988, 'timestamp': '2025-10-02 00:22:08.416737', 'step': 6350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:08.487053', 'step': 6350, 'epoch': 1}
{'type': 'loss', 'content': 0.02434983104467392, 'timestamp': '2025-10-02 00:22:08.497496', 'step': 6351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:08.570268', 'step': 6351, 'epoch': 1}
{'type': 'loss', 'content': 0.06278867274522781, 'timestamp': '2025-10-02 00:22:08.578073', 'step': 6352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:08.645494', 'step': 6352, 'epoch': 1}
{'type': 'loss', 'content': 0.04575240612030029, 'timestamp': '2025-10-02 00:22:08.657088', 'step': 6353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:08.732577', 'step': 6353, 'epoch': 1}
{'type': 'loss', 'content': 0.0936335101723671, 'timestamp': '2025-10-02 00:22:08.741047', 'step': 6354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:08.801154', 'step': 6354, 'epoch': 1}
{'type': 'loss', 'content': 0.10004058480262756, 'timestamp': '2025-10-02 00:22:08.808837', 'step': 6355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:08.880671', 'step': 6355, 'epoch': 1}
{'type': 'loss', 'content': 0.10332860797643661, 'timestamp': '2025-10-02 00:22:08.892928', 'step': 6356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:08.965735', 'step': 6356, 'epoch': 1}
{'type': 'loss', 'content': 0.08839859813451767, 'timestamp': '2025-10-02 00:22:08.974219', 'step': 6357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:09.045794', 'step': 6357, 'epoch': 1}
{'type': 'loss', 'content': 0.07708489894866943, 'timestamp': '2025-10-02 00:22:09.055446', 'step': 6358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:09.126957', 'step': 6358, 'epoch': 1}
{'type': 'loss', 'content': 0.03304415941238403, 'timestamp': '2025-10-02 00:22:09.135270', 'step': 6359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:09.208411', 'step': 6359, 'epoch': 1}
{'type': 'loss', 'content': 0.16765786707401276, 'timestamp': '2025-10-02 00:22:09.215042', 'step': 6360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:09.280005', 'step': 6360, 'epoch': 1}
{'type': 'loss', 'content': 0.19414447247982025, 'timestamp': '2025-10-02 00:22:09.283010', 'step': 6361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:09.350438', 'step': 6361, 'epoch': 1}
{'type': 'loss', 'content': 0.13403019309043884, 'timestamp': '2025-10-02 00:22:09.359359', 'step': 6362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:09.415836', 'step': 6362, 'epoch': 1}
{'type': 'loss', 'content': 0.15749290585517883, 'timestamp': '2025-10-02 00:22:09.422000', 'step': 6363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:09.490256', 'step': 6363, 'epoch': 1}
{'type': 'loss', 'content': 0.040026791393756866, 'timestamp': '2025-10-02 00:22:09.502999', 'step': 6364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:09.562582', 'step': 6364, 'epoch': 1}
{'type': 'loss', 'content': 0.10803048312664032, 'timestamp': '2025-10-02 00:22:09.566623', 'step': 6365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:09.633951', 'step': 6365, 'epoch': 1}
{'type': 'loss', 'content': 0.05578259378671646, 'timestamp': '2025-10-02 00:22:09.643696', 'step': 6366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:09.714349', 'step': 6366, 'epoch': 1}
{'type': 'loss', 'content': 0.16663235425949097, 'timestamp': '2025-10-02 00:22:09.721539', 'step': 6367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:09.797940', 'step': 6367, 'epoch': 1}
{'type': 'loss', 'content': 0.08631348609924316, 'timestamp': '2025-10-02 00:22:09.808339', 'step': 6368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:09.886273', 'step': 6368, 'epoch': 1}
{'type': 'loss', 'content': 0.06258413195610046, 'timestamp': '2025-10-02 00:22:09.889300', 'step': 6369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:09.959161', 'step': 6369, 'epoch': 1}
{'type': 'loss', 'content': 0.06188105419278145, 'timestamp': '2025-10-02 00:22:09.962496', 'step': 6370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:10.026591', 'step': 6370, 'epoch': 1}
{'type': 'loss', 'content': 0.08601882308721542, 'timestamp': '2025-10-02 00:22:10.030692', 'step': 6371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:10.104458', 'step': 6371, 'epoch': 1}
{'type': 'loss', 'content': 0.09511193633079529, 'timestamp': '2025-10-02 00:22:10.111243', 'step': 6372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:10.185821', 'step': 6372, 'epoch': 1}
{'type': 'loss', 'content': 0.08433376997709274, 'timestamp': '2025-10-02 00:22:10.188668', 'step': 6373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:10.245525', 'step': 6373, 'epoch': 1}
{'type': 'loss', 'content': 0.04303212836384773, 'timestamp': '2025-10-02 00:22:10.253098', 'step': 6374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:10.313624', 'step': 6374, 'epoch': 1}
{'type': 'loss', 'content': 0.03807186707854271, 'timestamp': '2025-10-02 00:22:10.321166', 'step': 6375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:10.392527', 'step': 6375, 'epoch': 1}
{'type': 'loss', 'content': 0.056901417672634125, 'timestamp': '2025-10-02 00:22:10.399831', 'step': 6376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:10.460863', 'step': 6376, 'epoch': 1}
{'type': 'loss', 'content': 0.17047542333602905, 'timestamp': '2025-10-02 00:22:10.463940', 'step': 6377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:10.532546', 'step': 6377, 'epoch': 1}
{'type': 'loss', 'content': 0.06524919718503952, 'timestamp': '2025-10-02 00:22:10.542091', 'step': 6378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:10.602666', 'step': 6378, 'epoch': 1}
{'type': 'loss', 'content': 0.15632981061935425, 'timestamp': '2025-10-02 00:22:10.609686', 'step': 6379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:10.668121', 'step': 6379, 'epoch': 1}
{'type': 'loss', 'content': 0.057486679404973984, 'timestamp': '2025-10-02 00:22:10.679798', 'step': 6380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:10.750095', 'step': 6380, 'epoch': 1}
{'type': 'loss', 'content': 0.106177918612957, 'timestamp': '2025-10-02 00:22:10.758471', 'step': 6381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:10.831888', 'step': 6381, 'epoch': 1}
{'type': 'loss', 'content': 0.032870419323444366, 'timestamp': '2025-10-02 00:22:10.839547', 'step': 6382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:10.903235', 'step': 6382, 'epoch': 1}
{'type': 'loss', 'content': 0.12048886716365814, 'timestamp': '2025-10-02 00:22:10.906442', 'step': 6383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:10.969554', 'step': 6383, 'epoch': 1}
{'type': 'loss', 'content': 0.07327982783317566, 'timestamp': '2025-10-02 00:22:10.975801', 'step': 6384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:11.032241', 'step': 6384, 'epoch': 1}
{'type': 'loss', 'content': 0.056942395865917206, 'timestamp': '2025-10-02 00:22:11.039994', 'step': 6385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:22:11.120847', 'step': 6385, 'epoch': 1}
{'type': 'loss', 'content': 0.023084387183189392, 'timestamp': '2025-10-02 00:22:11.131992', 'step': 6386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:11.205086', 'step': 6386, 'epoch': 1}
{'type': 'loss', 'content': 0.03543234243988991, 'timestamp': '2025-10-02 00:22:11.212750', 'step': 6387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:11.288042', 'step': 6387, 'epoch': 1}
{'type': 'loss', 'content': 0.042025256901979446, 'timestamp': '2025-10-02 00:22:11.295165', 'step': 6388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:11.366180', 'step': 6388, 'epoch': 1}
{'type': 'loss', 'content': 0.1262919008731842, 'timestamp': '2025-10-02 00:22:11.371464', 'step': 6389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:11.435763', 'step': 6389, 'epoch': 1}
{'type': 'loss', 'content': 0.08262071758508682, 'timestamp': '2025-10-02 00:22:11.442204', 'step': 6390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:11.512976', 'step': 6390, 'epoch': 1}
{'type': 'loss', 'content': 0.05756595358252525, 'timestamp': '2025-10-02 00:22:11.515432', 'step': 6391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:11.580707', 'step': 6391, 'epoch': 1}
{'type': 'loss', 'content': 0.05951669067144394, 'timestamp': '2025-10-02 00:22:11.589352', 'step': 6392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:11.654360', 'step': 6392, 'epoch': 1}
{'type': 'loss', 'content': 0.16707251965999603, 'timestamp': '2025-10-02 00:22:11.658774', 'step': 6393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:11.736508', 'step': 6393, 'epoch': 1}
{'type': 'loss', 'content': 0.06333756446838379, 'timestamp': '2025-10-02 00:22:11.747207', 'step': 6394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:11.808244', 'step': 6394, 'epoch': 1}
{'type': 'loss', 'content': 0.15500038862228394, 'timestamp': '2025-10-02 00:22:11.812307', 'step': 6395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:11.882243', 'step': 6395, 'epoch': 1}
{'type': 'loss', 'content': 0.09600695967674255, 'timestamp': '2025-10-02 00:22:11.892606', 'step': 6396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:22:11.963940', 'step': 6396, 'epoch': 1}
{'type': 'loss', 'content': 0.020312245935201645, 'timestamp': '2025-10-02 00:22:11.975697', 'step': 6397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:12.041091', 'step': 6397, 'epoch': 1}
{'type': 'loss', 'content': 0.07269163429737091, 'timestamp': '2025-10-02 00:22:12.050897', 'step': 6398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:22:12.132189', 'step': 6398, 'epoch': 1}
{'type': 'loss', 'content': 0.04112914949655533, 'timestamp': '2025-10-02 00:22:12.144417', 'step': 6399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:12.212093', 'step': 6399, 'epoch': 1}
{'type': 'loss', 'content': 0.08571138978004456, 'timestamp': '2025-10-02 00:22:12.222670', 'step': 6400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:12.278175', 'step': 6400, 'epoch': 1}
{'type': 'loss', 'content': 0.11475654691457748, 'timestamp': '2025-10-02 00:22:12.281114', 'step': 6401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:12.341629', 'step': 6401, 'epoch': 1}
{'type': 'loss', 'content': 0.0959639847278595, 'timestamp': '2025-10-02 00:22:12.344936', 'step': 6402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:12.404602', 'step': 6402, 'epoch': 1}
{'type': 'loss', 'content': 0.12295399606227875, 'timestamp': '2025-10-02 00:22:12.410220', 'step': 6403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:12.472445', 'step': 6403, 'epoch': 1}
{'type': 'loss', 'content': 0.08720345050096512, 'timestamp': '2025-10-02 00:22:12.482471', 'step': 6404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:22:12.544398', 'step': 6404, 'epoch': 1}
{'type': 'loss', 'content': 0.14217866957187653, 'timestamp': '2025-10-02 00:22:12.548693', 'step': 6405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:12.625623', 'step': 6405, 'epoch': 1}
{'type': 'loss', 'content': 0.04035712406039238, 'timestamp': '2025-10-02 00:22:12.631773', 'step': 6406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:12.697780', 'step': 6406, 'epoch': 1}
{'type': 'loss', 'content': 0.17169873416423798, 'timestamp': '2025-10-02 00:22:12.701553', 'step': 6407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:12.763678', 'step': 6407, 'epoch': 1}
{'type': 'loss', 'content': 0.09150058776140213, 'timestamp': '2025-10-02 00:22:12.773421', 'step': 6408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:12.845046', 'step': 6408, 'epoch': 1}
{'type': 'loss', 'content': 0.18089260160923004, 'timestamp': '2025-10-02 00:22:12.851843', 'step': 6409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:12.913251', 'step': 6409, 'epoch': 1}
{'type': 'loss', 'content': 0.046133507043123245, 'timestamp': '2025-10-02 00:22:12.915974', 'step': 6410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:12.978660', 'step': 6410, 'epoch': 1}
{'type': 'loss', 'content': 0.1325927972793579, 'timestamp': '2025-10-02 00:22:12.984055', 'step': 6411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:13.052351', 'step': 6411, 'epoch': 1}
{'type': 'loss', 'content': 0.11068041622638702, 'timestamp': '2025-10-02 00:22:13.060055', 'step': 6412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:13.118515', 'step': 6412, 'epoch': 1}
{'type': 'loss', 'content': 0.022462153807282448, 'timestamp': '2025-10-02 00:22:13.125367', 'step': 6413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:13.200425', 'step': 6413, 'epoch': 1}
{'type': 'loss', 'content': 0.02800162509083748, 'timestamp': '2025-10-02 00:22:13.206175', 'step': 6414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:13.278144', 'step': 6414, 'epoch': 1}
{'type': 'loss', 'content': 0.017259769141674042, 'timestamp': '2025-10-02 00:22:13.287800', 'step': 6415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:13.347525', 'step': 6415, 'epoch': 1}
{'type': 'loss', 'content': 0.09328212589025497, 'timestamp': '2025-10-02 00:22:13.359080', 'step': 6416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:13.427411', 'step': 6416, 'epoch': 1}
{'type': 'loss', 'content': 0.11099836975336075, 'timestamp': '2025-10-02 00:22:13.442308', 'step': 6417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:13.509077', 'step': 6417, 'epoch': 1}
{'type': 'loss', 'content': 0.012667293660342693, 'timestamp': '2025-10-02 00:22:13.518002', 'step': 6418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:13.578422', 'step': 6418, 'epoch': 1}
{'type': 'loss', 'content': 0.14242103695869446, 'timestamp': '2025-10-02 00:22:13.586465', 'step': 6419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:13.656675', 'step': 6419, 'epoch': 1}
{'type': 'loss', 'content': 0.10177487879991531, 'timestamp': '2025-10-02 00:22:13.667380', 'step': 6420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:13.735219', 'step': 6420, 'epoch': 1}
{'type': 'loss', 'content': 0.07182779908180237, 'timestamp': '2025-10-02 00:22:13.741636', 'step': 6421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:13.808519', 'step': 6421, 'epoch': 1}
{'type': 'loss', 'content': 0.14665468037128448, 'timestamp': '2025-10-02 00:22:13.816328', 'step': 6422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:13.890600', 'step': 6422, 'epoch': 1}
{'type': 'loss', 'content': 0.042664069682359695, 'timestamp': '2025-10-02 00:22:13.901287', 'step': 6423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:13.967752', 'step': 6423, 'epoch': 1}
{'type': 'loss', 'content': 0.03942851349711418, 'timestamp': '2025-10-02 00:22:13.976222', 'step': 6424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:14.055245', 'step': 6424, 'epoch': 1}
{'type': 'loss', 'content': 0.03113410621881485, 'timestamp': '2025-10-02 00:22:14.066826', 'step': 6425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:14.134043', 'step': 6425, 'epoch': 1}
{'type': 'loss', 'content': 0.07440099120140076, 'timestamp': '2025-10-02 00:22:14.137327', 'step': 6426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:14.200590', 'step': 6426, 'epoch': 1}
{'type': 'loss', 'content': 0.015770163387060165, 'timestamp': '2025-10-02 00:22:14.210373', 'step': 6427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:14.283114', 'step': 6427, 'epoch': 1}
{'type': 'loss', 'content': 0.09862746298313141, 'timestamp': '2025-10-02 00:22:14.292888', 'step': 6428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:14.356275', 'step': 6428, 'epoch': 1}
{'type': 'loss', 'content': 0.16979089379310608, 'timestamp': '2025-10-02 00:22:14.359921', 'step': 6429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:14.426485', 'step': 6429, 'epoch': 1}
{'type': 'loss', 'content': 0.2180919349193573, 'timestamp': '2025-10-02 00:22:14.429241', 'step': 6430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:14.495086', 'step': 6430, 'epoch': 1}
{'type': 'loss', 'content': 0.18502531945705414, 'timestamp': '2025-10-02 00:22:14.499851', 'step': 6431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:14.567148', 'step': 6431, 'epoch': 1}
{'type': 'loss', 'content': 0.043616510927677155, 'timestamp': '2025-10-02 00:22:14.573277', 'step': 6432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:14.634153', 'step': 6432, 'epoch': 1}
{'type': 'loss', 'content': 0.12359423190355301, 'timestamp': '2025-10-02 00:22:14.640392', 'step': 6433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:22:14.715963', 'step': 6433, 'epoch': 1}
{'type': 'loss', 'content': 0.019381554797291756, 'timestamp': '2025-10-02 00:22:14.728213', 'step': 6434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:14.790529', 'step': 6434, 'epoch': 1}
{'type': 'loss', 'content': 0.11427352577447891, 'timestamp': '2025-10-02 00:22:14.796742', 'step': 6435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:14.860920', 'step': 6435, 'epoch': 1}
{'type': 'loss', 'content': 0.11246788501739502, 'timestamp': '2025-10-02 00:22:14.868022', 'step': 6436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:14.942252', 'step': 6436, 'epoch': 1}
{'type': 'loss', 'content': 0.22361129522323608, 'timestamp': '2025-10-02 00:22:14.950688', 'step': 6437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:22:15.016250', 'step': 6437, 'epoch': 1}
{'type': 'loss', 'content': 0.029534369707107544, 'timestamp': '2025-10-02 00:22:15.027117', 'step': 6438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:15.085036', 'step': 6438, 'epoch': 1}
{'type': 'loss', 'content': 0.008880107663571835, 'timestamp': '2025-10-02 00:22:15.088543', 'step': 6439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:15.149880', 'step': 6439, 'epoch': 1}
{'type': 'loss', 'content': 0.0886324793100357, 'timestamp': '2025-10-02 00:22:15.160612', 'step': 6440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:15.220266', 'step': 6440, 'epoch': 1}
{'type': 'loss', 'content': 0.05013434961438179, 'timestamp': '2025-10-02 00:22:15.231514', 'step': 6441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:15.285743', 'step': 6441, 'epoch': 1}
{'type': 'loss', 'content': 0.04830126091837883, 'timestamp': '2025-10-02 00:22:15.288398', 'step': 6442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:15.342756', 'step': 6442, 'epoch': 1}
{'type': 'loss', 'content': 0.06145811453461647, 'timestamp': '2025-10-02 00:22:15.350386', 'step': 6443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:15.404817', 'step': 6443, 'epoch': 1}
{'type': 'loss', 'content': 0.05214966461062431, 'timestamp': '2025-10-02 00:22:15.413178', 'step': 6444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:22:15.473388', 'step': 6444, 'epoch': 1}
{'type': 'loss', 'content': 0.06508677452802658, 'timestamp': '2025-10-02 00:22:15.485132', 'step': 6445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:15.539513', 'step': 6445, 'epoch': 1}
{'type': 'loss', 'content': 0.05299794301390648, 'timestamp': '2025-10-02 00:22:15.542141', 'step': 6446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:15.596160', 'step': 6446, 'epoch': 1}
{'type': 'loss', 'content': 0.24863271415233612, 'timestamp': '2025-10-02 00:22:15.599264', 'step': 6447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:15.664236', 'step': 6447, 'epoch': 1}
{'type': 'loss', 'content': 0.013921871781349182, 'timestamp': '2025-10-02 00:22:15.675814', 'step': 6448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:15.729244', 'step': 6448, 'epoch': 1}
{'type': 'loss', 'content': 0.03260107338428497, 'timestamp': '2025-10-02 00:22:15.735417', 'step': 6449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:15.790830', 'step': 6449, 'epoch': 1}
{'type': 'loss', 'content': 0.0518670417368412, 'timestamp': '2025-10-02 00:22:15.800448', 'step': 6450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:15.854683', 'step': 6450, 'epoch': 1}
{'type': 'loss', 'content': 0.12120098620653152, 'timestamp': '2025-10-02 00:22:15.858079', 'step': 6451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:15.932665', 'step': 6451, 'epoch': 1}
{'type': 'loss', 'content': 0.11196883767843246, 'timestamp': '2025-10-02 00:22:15.940872', 'step': 6452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:16.004248', 'step': 6452, 'epoch': 1}
{'type': 'loss', 'content': 0.07224875688552856, 'timestamp': '2025-10-02 00:22:16.009390', 'step': 6453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:16.072316', 'step': 6453, 'epoch': 1}
{'type': 'loss', 'content': 0.039629463106393814, 'timestamp': '2025-10-02 00:22:16.082113', 'step': 6454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:16.144891', 'step': 6454, 'epoch': 1}
{'type': 'loss', 'content': 0.1287323236465454, 'timestamp': '2025-10-02 00:22:16.154698', 'step': 6455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:16.213792', 'step': 6455, 'epoch': 1}
{'type': 'loss', 'content': 0.1419631689786911, 'timestamp': '2025-10-02 00:22:16.221505', 'step': 6456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:16.283866', 'step': 6456, 'epoch': 1}
{'type': 'loss', 'content': 0.028698468580842018, 'timestamp': '2025-10-02 00:22:16.289972', 'step': 6457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:16.346356', 'step': 6457, 'epoch': 1}
{'type': 'loss', 'content': 0.1366586834192276, 'timestamp': '2025-10-02 00:22:16.350820', 'step': 6458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:16.415736', 'step': 6458, 'epoch': 1}
{'type': 'loss', 'content': 0.06424706429243088, 'timestamp': '2025-10-02 00:22:16.422053', 'step': 6459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:16.492490', 'step': 6459, 'epoch': 1}
{'type': 'loss', 'content': 0.1302812248468399, 'timestamp': '2025-10-02 00:22:16.498937', 'step': 6460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:16.555469', 'step': 6460, 'epoch': 1}
{'type': 'loss', 'content': 0.02811589278280735, 'timestamp': '2025-10-02 00:22:16.560924', 'step': 6461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:16.628464', 'step': 6461, 'epoch': 1}
{'type': 'loss', 'content': 0.07478129118680954, 'timestamp': '2025-10-02 00:22:16.634664', 'step': 6462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:16.704501', 'step': 6462, 'epoch': 1}
{'type': 'loss', 'content': 0.04376379773020744, 'timestamp': '2025-10-02 00:22:16.714955', 'step': 6463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:16.778725', 'step': 6463, 'epoch': 1}
{'type': 'loss', 'content': 0.053490106016397476, 'timestamp': '2025-10-02 00:22:16.785108', 'step': 6464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:16.850717', 'step': 6464, 'epoch': 1}
{'type': 'loss', 'content': 0.045141469687223434, 'timestamp': '2025-10-02 00:22:16.858333', 'step': 6465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:16.924560', 'step': 6465, 'epoch': 1}
{'type': 'loss', 'content': 0.15264682471752167, 'timestamp': '2025-10-02 00:22:16.929385', 'step': 6466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:17.002726', 'step': 6466, 'epoch': 1}
{'type': 'loss', 'content': 0.03128579258918762, 'timestamp': '2025-10-02 00:22:17.012534', 'step': 6467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:17.078824', 'step': 6467, 'epoch': 1}
{'type': 'loss', 'content': 0.1394275575876236, 'timestamp': '2025-10-02 00:22:17.088572', 'step': 6468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:17.154327', 'step': 6468, 'epoch': 1}
{'type': 'loss', 'content': 0.01597652956843376, 'timestamp': '2025-10-02 00:22:17.162005', 'step': 6469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:17.229993', 'step': 6469, 'epoch': 1}
{'type': 'loss', 'content': 0.13859044015407562, 'timestamp': '2025-10-02 00:22:17.234361', 'step': 6470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:17.297379', 'step': 6470, 'epoch': 1}
{'type': 'loss', 'content': 0.07090027630329132, 'timestamp': '2025-10-02 00:22:17.304437', 'step': 6471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:17.371255', 'step': 6471, 'epoch': 1}
{'type': 'loss', 'content': 0.040797483175992966, 'timestamp': '2025-10-02 00:22:17.378005', 'step': 6472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:17.432802', 'step': 6472, 'epoch': 1}
{'type': 'loss', 'content': 0.12603749334812164, 'timestamp': '2025-10-02 00:22:17.438222', 'step': 6473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:17.505777', 'step': 6473, 'epoch': 1}
{'type': 'loss', 'content': 0.07762662321329117, 'timestamp': '2025-10-02 00:22:17.515398', 'step': 6474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:17.579858', 'step': 6474, 'epoch': 1}
{'type': 'loss', 'content': 0.04963843896985054, 'timestamp': '2025-10-02 00:22:17.585905', 'step': 6475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:17.654450', 'step': 6475, 'epoch': 1}
{'type': 'loss', 'content': 0.05858035758137703, 'timestamp': '2025-10-02 00:22:17.664361', 'step': 6476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:17.731797', 'step': 6476, 'epoch': 1}
{'type': 'loss', 'content': 0.11514449864625931, 'timestamp': '2025-10-02 00:22:17.734418', 'step': 6477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:17.794664', 'step': 6477, 'epoch': 1}
{'type': 'loss', 'content': 0.12590865790843964, 'timestamp': '2025-10-02 00:22:17.797786', 'step': 6478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:17.856043', 'step': 6478, 'epoch': 1}
{'type': 'loss', 'content': 0.015239058062434196, 'timestamp': '2025-10-02 00:22:17.865322', 'step': 6479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:17.929586', 'step': 6479, 'epoch': 1}
{'type': 'loss', 'content': 0.06483802944421768, 'timestamp': '2025-10-02 00:22:17.939868', 'step': 6480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:18.008405', 'step': 6480, 'epoch': 1}
{'type': 'loss', 'content': 0.09330384433269501, 'timestamp': '2025-10-02 00:22:18.012448', 'step': 6481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:18.073351', 'step': 6481, 'epoch': 1}
{'type': 'loss', 'content': 0.009130148217082024, 'timestamp': '2025-10-02 00:22:18.083820', 'step': 6482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:18.146563', 'step': 6482, 'epoch': 1}
{'type': 'loss', 'content': 0.04359184205532074, 'timestamp': '2025-10-02 00:22:18.153665', 'step': 6483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:22:18.221399', 'step': 6483, 'epoch': 1}
{'type': 'loss', 'content': 0.2788822054862976, 'timestamp': '2025-10-02 00:22:18.232492', 'step': 6484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:18.300702', 'step': 6484, 'epoch': 1}
{'type': 'loss', 'content': 0.046926695853471756, 'timestamp': '2025-10-02 00:22:18.306697', 'step': 6485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:18.359894', 'step': 6485, 'epoch': 1}
{'type': 'loss', 'content': 0.07065586000680923, 'timestamp': '2025-10-02 00:22:18.362248', 'step': 6486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:18.416539', 'step': 6486, 'epoch': 1}
{'type': 'loss', 'content': 0.22807066142559052, 'timestamp': '2025-10-02 00:22:18.419055', 'step': 6487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:18.473164', 'step': 6487, 'epoch': 1}
{'type': 'loss', 'content': 0.06944732367992401, 'timestamp': '2025-10-02 00:22:18.481596', 'step': 6488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:18.534513', 'step': 6488, 'epoch': 1}
{'type': 'loss', 'content': 0.05399990826845169, 'timestamp': '2025-10-02 00:22:18.540664', 'step': 6489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:18.594461', 'step': 6489, 'epoch': 1}
{'type': 'loss', 'content': 0.1667810082435608, 'timestamp': '2025-10-02 00:22:18.596639', 'step': 6490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:18.649827', 'step': 6490, 'epoch': 1}
{'type': 'loss', 'content': 0.04497119039297104, 'timestamp': '2025-10-02 00:22:18.652479', 'step': 6491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:18.705629', 'step': 6491, 'epoch': 1}
{'type': 'loss', 'content': 0.11444595456123352, 'timestamp': '2025-10-02 00:22:18.711453', 'step': 6492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:18.771480', 'step': 6492, 'epoch': 1}
{'type': 'loss', 'content': 0.029204560443758965, 'timestamp': '2025-10-02 00:22:18.782693', 'step': 6493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:18.839393', 'step': 6493, 'epoch': 1}
{'type': 'loss', 'content': 0.025270380079746246, 'timestamp': '2025-10-02 00:22:18.849012', 'step': 6494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:18.905446', 'step': 6494, 'epoch': 1}
{'type': 'loss', 'content': 0.0864151194691658, 'timestamp': '2025-10-02 00:22:18.908378', 'step': 6495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:18.962945', 'step': 6495, 'epoch': 1}
{'type': 'loss', 'content': 0.05772945657372475, 'timestamp': '2025-10-02 00:22:18.969625', 'step': 6496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:19.024135', 'step': 6496, 'epoch': 1}
{'type': 'loss', 'content': 0.0994168296456337, 'timestamp': '2025-10-02 00:22:19.030266', 'step': 6497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:19.087856', 'step': 6497, 'epoch': 1}
{'type': 'loss', 'content': 0.19450150430202484, 'timestamp': '2025-10-02 00:22:19.090779', 'step': 6498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:19.146948', 'step': 6498, 'epoch': 1}
{'type': 'loss', 'content': 0.043878473341464996, 'timestamp': '2025-10-02 00:22:19.153032', 'step': 6499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:19.206919', 'step': 6499, 'epoch': 1}
{'type': 'loss', 'content': 0.12083393335342407, 'timestamp': '2025-10-02 00:22:19.213194', 'step': 6500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-10-02 00:22:19.637170', 'step': 6500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:19.694756', 'step': 6500, 'epoch': 1}
{'type': 'loss', 'content': 0.11389534175395966, 'timestamp': '2025-10-02 00:22:19.697805', 'step': 6501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:19.765759', 'step': 6501, 'epoch': 1}
{'type': 'loss', 'content': 0.030958428978919983, 'timestamp': '2025-10-02 00:22:19.776439', 'step': 6502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:19.832168', 'step': 6502, 'epoch': 1}
{'type': 'loss', 'content': 0.035623036324977875, 'timestamp': '2025-10-02 00:22:19.834655', 'step': 6503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:19.890647', 'step': 6503, 'epoch': 1}
{'type': 'loss', 'content': 0.12206379324197769, 'timestamp': '2025-10-02 00:22:19.897423', 'step': 6504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:19.952058', 'step': 6504, 'epoch': 1}
{'type': 'loss', 'content': 0.08229976892471313, 'timestamp': '2025-10-02 00:22:19.958203', 'step': 6505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:20.015237', 'step': 6505, 'epoch': 1}
{'type': 'loss', 'content': 0.05600552633404732, 'timestamp': '2025-10-02 00:22:20.025021', 'step': 6506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:20.082959', 'step': 6506, 'epoch': 1}
{'type': 'loss', 'content': 0.11294304579496384, 'timestamp': '2025-10-02 00:22:20.086512', 'step': 6507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:20.148356', 'step': 6507, 'epoch': 1}
{'type': 'loss', 'content': 0.045884571969509125, 'timestamp': '2025-10-02 00:22:20.159875', 'step': 6508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:20.215547', 'step': 6508, 'epoch': 1}
{'type': 'loss', 'content': 0.10756447911262512, 'timestamp': '2025-10-02 00:22:20.226054', 'step': 6509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:20.282179', 'step': 6509, 'epoch': 1}
{'type': 'loss', 'content': 0.020305722951889038, 'timestamp': '2025-10-02 00:22:20.289851', 'step': 6510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:20.348170', 'step': 6510, 'epoch': 1}
{'type': 'loss', 'content': 0.08130443841218948, 'timestamp': '2025-10-02 00:22:20.366305', 'step': 6511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:20.421062', 'step': 6511, 'epoch': 1}
{'type': 'loss', 'content': 0.020015349611639977, 'timestamp': '2025-10-02 00:22:20.431479', 'step': 6512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:20.484128', 'step': 6512, 'epoch': 1}
{'type': 'loss', 'content': 0.03886503726243973, 'timestamp': '2025-10-02 00:22:20.486726', 'step': 6513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:20.539348', 'step': 6513, 'epoch': 1}
{'type': 'loss', 'content': 0.10765977203845978, 'timestamp': '2025-10-02 00:22:20.541469', 'step': 6514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:20.602565', 'step': 6514, 'epoch': 1}
{'type': 'loss', 'content': 0.11079911887645721, 'timestamp': '2025-10-02 00:22:20.613290', 'step': 6515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:20.668360', 'step': 6515, 'epoch': 1}
{'type': 'loss', 'content': 0.13744576275348663, 'timestamp': '2025-10-02 00:22:20.674231', 'step': 6516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:20.727180', 'step': 6516, 'epoch': 1}
{'type': 'loss', 'content': 0.08834310621023178, 'timestamp': '2025-10-02 00:22:20.736728', 'step': 6517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:20.789387', 'step': 6517, 'epoch': 1}
{'type': 'loss', 'content': 0.18396081030368805, 'timestamp': '2025-10-02 00:22:20.791443', 'step': 6518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:20.844983', 'step': 6518, 'epoch': 1}
{'type': 'loss', 'content': 0.042046934366226196, 'timestamp': '2025-10-02 00:22:20.847498', 'step': 6519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:20.903812', 'step': 6519, 'epoch': 1}
{'type': 'loss', 'content': 0.041206687688827515, 'timestamp': '2025-10-02 00:22:20.914312', 'step': 6520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:20.968368', 'step': 6520, 'epoch': 1}
{'type': 'loss', 'content': 0.01770215854048729, 'timestamp': '2025-10-02 00:22:20.974344', 'step': 6521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:21.028421', 'step': 6521, 'epoch': 1}
{'type': 'loss', 'content': 0.058122485876083374, 'timestamp': '2025-10-02 00:22:21.030996', 'step': 6522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:21.084171', 'step': 6522, 'epoch': 1}
{'type': 'loss', 'content': 0.07775358110666275, 'timestamp': '2025-10-02 00:22:21.086979', 'step': 6523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:22:21.139783', 'step': 6523, 'epoch': 1}
{'type': 'loss', 'content': 0.12690697610378265, 'timestamp': '2025-10-02 00:22:21.145865', 'step': 6524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:21.200419', 'step': 6524, 'epoch': 1}
{'type': 'loss', 'content': 0.08108654618263245, 'timestamp': '2025-10-02 00:22:21.206630', 'step': 6525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:21.259598', 'step': 6525, 'epoch': 1}
{'type': 'loss', 'content': 0.21413221955299377, 'timestamp': '2025-10-02 00:22:21.261768', 'step': 6526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:22:21.330905', 'step': 6526, 'epoch': 1}
{'type': 'loss', 'content': 0.057306766510009766, 'timestamp': '2025-10-02 00:22:21.343616', 'step': 6527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:21.401422', 'step': 6527, 'epoch': 1}
{'type': 'loss', 'content': 0.03437034413218498, 'timestamp': '2025-10-02 00:22:21.412560', 'step': 6528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:21.466012', 'step': 6528, 'epoch': 1}
{'type': 'loss', 'content': 0.047880616039037704, 'timestamp': '2025-10-02 00:22:21.468372', 'step': 6529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:21.521498', 'step': 6529, 'epoch': 1}
{'type': 'loss', 'content': 0.066421277821064, 'timestamp': '2025-10-02 00:22:21.526273', 'step': 6530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:21.586630', 'step': 6530, 'epoch': 1}
{'type': 'loss', 'content': 0.05142131820321083, 'timestamp': '2025-10-02 00:22:21.591230', 'step': 6531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:21.651461', 'step': 6531, 'epoch': 1}
{'type': 'loss', 'content': 0.048741426318883896, 'timestamp': '2025-10-02 00:22:21.657959', 'step': 6532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:21.713008', 'step': 6532, 'epoch': 1}
{'type': 'loss', 'content': 0.16025637090206146, 'timestamp': '2025-10-02 00:22:21.720844', 'step': 6533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:21.778488', 'step': 6533, 'epoch': 1}
{'type': 'loss', 'content': 0.24508298933506012, 'timestamp': '2025-10-02 00:22:21.781224', 'step': 6534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:21.850138', 'step': 6534, 'epoch': 1}
{'type': 'loss', 'content': 0.10533426702022552, 'timestamp': '2025-10-02 00:22:21.854405', 'step': 6535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:21.927978', 'step': 6535, 'epoch': 1}
{'type': 'loss', 'content': 0.07872601598501205, 'timestamp': '2025-10-02 00:22:21.942139', 'step': 6536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:22.015442', 'step': 6536, 'epoch': 1}
{'type': 'loss', 'content': 0.06683602929115295, 'timestamp': '2025-10-02 00:22:22.021958', 'step': 6537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:22.088244', 'step': 6537, 'epoch': 1}
{'type': 'loss', 'content': 0.031233584508299828, 'timestamp': '2025-10-02 00:22:22.095103', 'step': 6538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:22.165825', 'step': 6538, 'epoch': 1}
{'type': 'loss', 'content': 0.04102819412946701, 'timestamp': '2025-10-02 00:22:22.176761', 'step': 6539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:22.254427', 'step': 6539, 'epoch': 1}
{'type': 'loss', 'content': 0.010347150266170502, 'timestamp': '2025-10-02 00:22:22.266748', 'step': 6540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:22.351351', 'step': 6540, 'epoch': 1}
{'type': 'loss', 'content': 0.09987016022205353, 'timestamp': '2025-10-02 00:22:22.362043', 'step': 6541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:22.427138', 'step': 6541, 'epoch': 1}
{'type': 'loss', 'content': 0.0723678320646286, 'timestamp': '2025-10-02 00:22:22.433605', 'step': 6542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:22:22.516538', 'step': 6542, 'epoch': 1}
{'type': 'loss', 'content': 0.046015314757823944, 'timestamp': '2025-10-02 00:22:22.527404', 'step': 6543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:22:22.637349', 'step': 6543, 'epoch': 1}
{'type': 'loss', 'content': 0.041019417345523834, 'timestamp': '2025-10-02 00:22:22.650991', 'step': 6544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:22.707786', 'step': 6544, 'epoch': 1}
{'type': 'loss', 'content': 0.06920086592435837, 'timestamp': '2025-10-02 00:22:22.718279', 'step': 6545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:22.779428', 'step': 6545, 'epoch': 1}
{'type': 'loss', 'content': 0.046519555151462555, 'timestamp': '2025-10-02 00:22:22.783676', 'step': 6546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:22.840180', 'step': 6546, 'epoch': 1}
{'type': 'loss', 'content': 0.054234959185123444, 'timestamp': '2025-10-02 00:22:22.849768', 'step': 6547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:22.908133', 'step': 6547, 'epoch': 1}
{'type': 'loss', 'content': 0.04368068277835846, 'timestamp': '2025-10-02 00:22:22.915209', 'step': 6548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:22.970632', 'step': 6548, 'epoch': 1}
{'type': 'loss', 'content': 0.14213687181472778, 'timestamp': '2025-10-02 00:22:22.976700', 'step': 6549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:23.037239', 'step': 6549, 'epoch': 1}
{'type': 'loss', 'content': 0.04331902042031288, 'timestamp': '2025-10-02 00:22:23.047074', 'step': 6550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:23.104921', 'step': 6550, 'epoch': 1}
{'type': 'loss', 'content': 0.13292765617370605, 'timestamp': '2025-10-02 00:22:23.108300', 'step': 6551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:23.165142', 'step': 6551, 'epoch': 1}
{'type': 'loss', 'content': 0.14540329575538635, 'timestamp': '2025-10-02 00:22:23.171597', 'step': 6552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:23.230932', 'step': 6552, 'epoch': 1}
{'type': 'loss', 'content': 0.047203194350004196, 'timestamp': '2025-10-02 00:22:23.238822', 'step': 6553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:23.298150', 'step': 6553, 'epoch': 1}
{'type': 'loss', 'content': 0.07828916609287262, 'timestamp': '2025-10-02 00:22:23.305900', 'step': 6554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:22:23.372808', 'step': 6554, 'epoch': 1}
{'type': 'loss', 'content': 0.06650636345148087, 'timestamp': '2025-10-02 00:22:23.383690', 'step': 6555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:23.445197', 'step': 6555, 'epoch': 1}
{'type': 'loss', 'content': 0.15576831996440887, 'timestamp': '2025-10-02 00:22:23.455002', 'step': 6556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:23.515284', 'step': 6556, 'epoch': 1}
{'type': 'loss', 'content': 0.054652441293001175, 'timestamp': '2025-10-02 00:22:23.522843', 'step': 6557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:23.589581', 'step': 6557, 'epoch': 1}
{'type': 'loss', 'content': 0.16224904358386993, 'timestamp': '2025-10-02 00:22:23.592935', 'step': 6558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:23.649345', 'step': 6558, 'epoch': 1}
{'type': 'loss', 'content': 0.1973118633031845, 'timestamp': '2025-10-02 00:22:23.658476', 'step': 6559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:23.720868', 'step': 6559, 'epoch': 1}
{'type': 'loss', 'content': 0.026319462805986404, 'timestamp': '2025-10-02 00:22:23.727268', 'step': 6560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:23.788618', 'step': 6560, 'epoch': 1}
{'type': 'loss', 'content': 0.09215826541185379, 'timestamp': '2025-10-02 00:22:23.792461', 'step': 6561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:23.860987', 'step': 6561, 'epoch': 1}
{'type': 'loss', 'content': 0.10978672653436661, 'timestamp': '2025-10-02 00:22:23.865224', 'step': 6562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:23.921419', 'step': 6562, 'epoch': 1}
{'type': 'loss', 'content': 0.10751227289438248, 'timestamp': '2025-10-02 00:22:23.928328', 'step': 6563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:23.988672', 'step': 6563, 'epoch': 1}
{'type': 'loss', 'content': 0.05752095580101013, 'timestamp': '2025-10-02 00:22:23.995145', 'step': 6564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:24.051137', 'step': 6564, 'epoch': 1}
{'type': 'loss', 'content': 0.24171380698680878, 'timestamp': '2025-10-02 00:22:24.054505', 'step': 6565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:24.119405', 'step': 6565, 'epoch': 1}
{'type': 'loss', 'content': 0.1037234216928482, 'timestamp': '2025-10-02 00:22:24.122577', 'step': 6566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:24.177714', 'step': 6566, 'epoch': 1}
{'type': 'loss', 'content': 0.07613350450992584, 'timestamp': '2025-10-02 00:22:24.181026', 'step': 6567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:24.241065', 'step': 6567, 'epoch': 1}
{'type': 'loss', 'content': 0.09505016356706619, 'timestamp': '2025-10-02 00:22:24.248100', 'step': 6568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:24.305989', 'step': 6568, 'epoch': 1}
{'type': 'loss', 'content': 0.1386512964963913, 'timestamp': '2025-10-02 00:22:24.312159', 'step': 6569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:24.371340', 'step': 6569, 'epoch': 1}
{'type': 'loss', 'content': 0.10080735385417938, 'timestamp': '2025-10-02 00:22:24.373939', 'step': 6570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:24.439950', 'step': 6570, 'epoch': 1}
{'type': 'loss', 'content': 0.03533506393432617, 'timestamp': '2025-10-02 00:22:24.442738', 'step': 6571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:24.500475', 'step': 6571, 'epoch': 1}
{'type': 'loss', 'content': 0.07332943379878998, 'timestamp': '2025-10-02 00:22:24.507125', 'step': 6572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:24.565228', 'step': 6572, 'epoch': 1}
{'type': 'loss', 'content': 0.06252123415470123, 'timestamp': '2025-10-02 00:22:24.570741', 'step': 6573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:24.625627', 'step': 6573, 'epoch': 1}
{'type': 'loss', 'content': 0.04695998132228851, 'timestamp': '2025-10-02 00:22:24.628467', 'step': 6574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:24.698182', 'step': 6574, 'epoch': 1}
{'type': 'loss', 'content': 0.06316874921321869, 'timestamp': '2025-10-02 00:22:24.704436', 'step': 6575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:24.765433', 'step': 6575, 'epoch': 1}
{'type': 'loss', 'content': 0.019561482593417168, 'timestamp': '2025-10-02 00:22:24.772299', 'step': 6576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:24.834854', 'step': 6576, 'epoch': 1}
{'type': 'loss', 'content': 0.07603719085454941, 'timestamp': '2025-10-02 00:22:24.844832', 'step': 6577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:24.910418', 'step': 6577, 'epoch': 1}
{'type': 'loss', 'content': 0.0312674455344677, 'timestamp': '2025-10-02 00:22:24.920242', 'step': 6578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:24.992321', 'step': 6578, 'epoch': 1}
{'type': 'loss', 'content': 0.03902702033519745, 'timestamp': '2025-10-02 00:22:25.003065', 'step': 6579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:22:25.081874', 'step': 6579, 'epoch': 1}
{'type': 'loss', 'content': 0.0836884155869484, 'timestamp': '2025-10-02 00:22:25.095574', 'step': 6580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:25.157148', 'step': 6580, 'epoch': 1}
{'type': 'loss', 'content': 0.19359852373600006, 'timestamp': '2025-10-02 00:22:25.165462', 'step': 6581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:25.239833', 'step': 6581, 'epoch': 1}
{'type': 'loss', 'content': 0.07154920697212219, 'timestamp': '2025-10-02 00:22:25.246606', 'step': 6582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:25.306910', 'step': 6582, 'epoch': 1}
{'type': 'loss', 'content': 0.25123026967048645, 'timestamp': '2025-10-02 00:22:25.313520', 'step': 6583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:25.369257', 'step': 6583, 'epoch': 1}
{'type': 'loss', 'content': 0.039469536393880844, 'timestamp': '2025-10-02 00:22:25.377299', 'step': 6584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:25.434254', 'step': 6584, 'epoch': 1}
{'type': 'loss', 'content': 0.04540377855300903, 'timestamp': '2025-10-02 00:22:25.439373', 'step': 6585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:25.499485', 'step': 6585, 'epoch': 1}
{'type': 'loss', 'content': 0.10052374750375748, 'timestamp': '2025-10-02 00:22:25.501993', 'step': 6586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:25.563728', 'step': 6586, 'epoch': 1}
{'type': 'loss', 'content': 0.04265378415584564, 'timestamp': '2025-10-02 00:22:25.573309', 'step': 6587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:22:25.631409', 'step': 6587, 'epoch': 1}
{'type': 'loss', 'content': 0.12269812822341919, 'timestamp': '2025-10-02 00:22:25.640658', 'step': 6588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:25.695352', 'step': 6588, 'epoch': 1}
{'type': 'loss', 'content': 0.13830505311489105, 'timestamp': '2025-10-02 00:22:25.698067', 'step': 6589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:25.752013', 'step': 6589, 'epoch': 1}
{'type': 'loss', 'content': 0.09737460315227509, 'timestamp': '2025-10-02 00:22:25.759836', 'step': 6590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:25.815405', 'step': 6590, 'epoch': 1}
{'type': 'loss', 'content': 0.014699938707053661, 'timestamp': '2025-10-02 00:22:25.821350', 'step': 6591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:25.880323', 'step': 6591, 'epoch': 1}
{'type': 'loss', 'content': 0.09568621218204498, 'timestamp': '2025-10-02 00:22:25.886110', 'step': 6592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:25.943386', 'step': 6592, 'epoch': 1}
{'type': 'loss', 'content': 0.2168223112821579, 'timestamp': '2025-10-02 00:22:25.947287', 'step': 6593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:26.001820', 'step': 6593, 'epoch': 1}
{'type': 'loss', 'content': 0.18023042380809784, 'timestamp': '2025-10-02 00:22:26.004469', 'step': 6594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:26.065905', 'step': 6594, 'epoch': 1}
{'type': 'loss', 'content': 0.024829693138599396, 'timestamp': '2025-10-02 00:22:26.075550', 'step': 6595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:26.131807', 'step': 6595, 'epoch': 1}
{'type': 'loss', 'content': 0.06759755313396454, 'timestamp': '2025-10-02 00:22:26.139035', 'step': 6596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:26.194605', 'step': 6596, 'epoch': 1}
{'type': 'loss', 'content': 0.035166479647159576, 'timestamp': '2025-10-02 00:22:26.205120', 'step': 6597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:26.265182', 'step': 6597, 'epoch': 1}
{'type': 'loss', 'content': 0.04122579097747803, 'timestamp': '2025-10-02 00:22:26.270457', 'step': 6598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:26.324598', 'step': 6598, 'epoch': 1}
{'type': 'loss', 'content': 0.08459959179162979, 'timestamp': '2025-10-02 00:22:26.327626', 'step': 6599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:26.390876', 'step': 6599, 'epoch': 1}
{'type': 'loss', 'content': 0.029253946617245674, 'timestamp': '2025-10-02 00:22:26.402820', 'step': 6600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:26.464926', 'step': 6600, 'epoch': 1}
{'type': 'loss', 'content': 0.09376458078622818, 'timestamp': '2025-10-02 00:22:26.468288', 'step': 6601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:22:26.536385', 'step': 6601, 'epoch': 1}
{'type': 'loss', 'content': 0.041968997567892075, 'timestamp': '2025-10-02 00:22:26.547470', 'step': 6602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:26.609870', 'step': 6602, 'epoch': 1}
{'type': 'loss', 'content': 0.16228638589382172, 'timestamp': '2025-10-02 00:22:26.612734', 'step': 6603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:26.671861', 'step': 6603, 'epoch': 1}
{'type': 'loss', 'content': 0.11609082669019699, 'timestamp': '2025-10-02 00:22:26.680750', 'step': 6604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:26.738659', 'step': 6604, 'epoch': 1}
{'type': 'loss', 'content': 0.06839054077863693, 'timestamp': '2025-10-02 00:22:26.743944', 'step': 6605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:26.801180', 'step': 6605, 'epoch': 1}
{'type': 'loss', 'content': 0.03666155785322189, 'timestamp': '2025-10-02 00:22:26.810803', 'step': 6606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:22:26.867911', 'step': 6606, 'epoch': 1}
{'type': 'loss', 'content': 0.1342591941356659, 'timestamp': '2025-10-02 00:22:26.873144', 'step': 6607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:26.933498', 'step': 6607, 'epoch': 1}
{'type': 'loss', 'content': 0.12280594557523727, 'timestamp': '2025-10-02 00:22:26.940374', 'step': 6608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:27.008410', 'step': 6608, 'epoch': 1}
{'type': 'loss', 'content': 0.05983392149209976, 'timestamp': '2025-10-02 00:22:27.019930', 'step': 6609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:27.079738', 'step': 6609, 'epoch': 1}
{'type': 'loss', 'content': 0.026386991143226624, 'timestamp': '2025-10-02 00:22:27.084472', 'step': 6610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:27.140886', 'step': 6610, 'epoch': 1}
{'type': 'loss', 'content': 0.09915608167648315, 'timestamp': '2025-10-02 00:22:27.146754', 'step': 6611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:27.208519', 'step': 6611, 'epoch': 1}
{'type': 'loss', 'content': 0.09195258468389511, 'timestamp': '2025-10-02 00:22:27.217074', 'step': 6612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:27.270051', 'step': 6612, 'epoch': 1}
{'type': 'loss', 'content': 0.18785107135772705, 'timestamp': '2025-10-02 00:22:27.272501', 'step': 6613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:27.331946', 'step': 6613, 'epoch': 1}
{'type': 'loss', 'content': 0.052260249853134155, 'timestamp': '2025-10-02 00:22:27.339721', 'step': 6614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:27.405443', 'step': 6614, 'epoch': 1}
{'type': 'loss', 'content': 0.04429218918085098, 'timestamp': '2025-10-02 00:22:27.410851', 'step': 6615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:27.483094', 'step': 6615, 'epoch': 1}
{'type': 'loss', 'content': 0.06826681643724442, 'timestamp': '2025-10-02 00:22:27.489953', 'step': 6616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:27.562236', 'step': 6616, 'epoch': 1}
{'type': 'loss', 'content': 0.2269996851682663, 'timestamp': '2025-10-02 00:22:27.565237', 'step': 6617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:27.636827', 'step': 6617, 'epoch': 1}
{'type': 'loss', 'content': 0.06669797003269196, 'timestamp': '2025-10-02 00:22:27.644618', 'step': 6618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:27.700197', 'step': 6618, 'epoch': 1}
{'type': 'loss', 'content': 0.04586975276470184, 'timestamp': '2025-10-02 00:22:27.702764', 'step': 6619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:27.763478', 'step': 6619, 'epoch': 1}
{'type': 'loss', 'content': 0.07666092365980148, 'timestamp': '2025-10-02 00:22:27.771840', 'step': 6620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:27.838033', 'step': 6620, 'epoch': 1}
{'type': 'loss', 'content': 0.04789106175303459, 'timestamp': '2025-10-02 00:22:27.846493', 'step': 6621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:27.929030', 'step': 6621, 'epoch': 1}
{'type': 'loss', 'content': 0.05273820087313652, 'timestamp': '2025-10-02 00:22:27.939233', 'step': 6622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:28.008016', 'step': 6622, 'epoch': 1}
{'type': 'loss', 'content': 0.06764212250709534, 'timestamp': '2025-10-02 00:22:28.013045', 'step': 6623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:28.071611', 'step': 6623, 'epoch': 1}
{'type': 'loss', 'content': 0.05876067280769348, 'timestamp': '2025-10-02 00:22:28.079715', 'step': 6624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:28.146634', 'step': 6624, 'epoch': 1}
{'type': 'loss', 'content': 0.042805735021829605, 'timestamp': '2025-10-02 00:22:28.157149', 'step': 6625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:28.217284', 'step': 6625, 'epoch': 1}
{'type': 'loss', 'content': 0.0665510818362236, 'timestamp': '2025-10-02 00:22:28.223376', 'step': 6626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:28.290132', 'step': 6626, 'epoch': 1}
{'type': 'loss', 'content': 0.03613656014204025, 'timestamp': '2025-10-02 00:22:28.296518', 'step': 6627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:28.360006', 'step': 6627, 'epoch': 1}
{'type': 'loss', 'content': 0.21345186233520508, 'timestamp': '2025-10-02 00:22:28.366492', 'step': 6628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:28.432080', 'step': 6628, 'epoch': 1}
{'type': 'loss', 'content': 0.015910396352410316, 'timestamp': '2025-10-02 00:22:28.443339', 'step': 6629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:28.511394', 'step': 6629, 'epoch': 1}
{'type': 'loss', 'content': 0.030331065878272057, 'timestamp': '2025-10-02 00:22:28.518602', 'step': 6630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:28.580632', 'step': 6630, 'epoch': 1}
{'type': 'loss', 'content': 0.1654968410730362, 'timestamp': '2025-10-02 00:22:28.587492', 'step': 6631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:28.647443', 'step': 6631, 'epoch': 1}
{'type': 'loss', 'content': 0.09851078689098358, 'timestamp': '2025-10-02 00:22:28.654226', 'step': 6632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:28.713566', 'step': 6632, 'epoch': 1}
{'type': 'loss', 'content': 0.0780496671795845, 'timestamp': '2025-10-02 00:22:28.716651', 'step': 6633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:28.779415', 'step': 6633, 'epoch': 1}
{'type': 'loss', 'content': 0.12654760479927063, 'timestamp': '2025-10-02 00:22:28.792571', 'step': 6634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:28.862892', 'step': 6634, 'epoch': 1}
{'type': 'loss', 'content': 0.0538921095430851, 'timestamp': '2025-10-02 00:22:28.869014', 'step': 6635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:28.932572', 'step': 6635, 'epoch': 1}
{'type': 'loss', 'content': 0.09096015989780426, 'timestamp': '2025-10-02 00:22:28.942245', 'step': 6636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:29.014497', 'step': 6636, 'epoch': 1}
{'type': 'loss', 'content': 0.19164952635765076, 'timestamp': '2025-10-02 00:22:29.023149', 'step': 6637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:29.083251', 'step': 6637, 'epoch': 1}
{'type': 'loss', 'content': 0.06526558101177216, 'timestamp': '2025-10-02 00:22:29.087482', 'step': 6638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:29.146374', 'step': 6638, 'epoch': 1}
{'type': 'loss', 'content': 0.05615682527422905, 'timestamp': '2025-10-02 00:22:29.154402', 'step': 6639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:29.222677', 'step': 6639, 'epoch': 1}
{'type': 'loss', 'content': 0.24240919947624207, 'timestamp': '2025-10-02 00:22:29.229741', 'step': 6640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:29.290634', 'step': 6640, 'epoch': 1}
{'type': 'loss', 'content': 0.02809849940240383, 'timestamp': '2025-10-02 00:22:29.300474', 'step': 6641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:29.374420', 'step': 6641, 'epoch': 1}
{'type': 'loss', 'content': 0.041199080646038055, 'timestamp': '2025-10-02 00:22:29.384238', 'step': 6642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:29.445151', 'step': 6642, 'epoch': 1}
{'type': 'loss', 'content': 0.09431391209363937, 'timestamp': '2025-10-02 00:22:29.453096', 'step': 6643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:29.515231', 'step': 6643, 'epoch': 1}
{'type': 'loss', 'content': 0.07816625386476517, 'timestamp': '2025-10-02 00:22:29.525700', 'step': 6644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:29.591752', 'step': 6644, 'epoch': 1}
{'type': 'loss', 'content': 0.035444848239421844, 'timestamp': '2025-10-02 00:22:29.602291', 'step': 6645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:29.666071', 'step': 6645, 'epoch': 1}
{'type': 'loss', 'content': 0.1058744415640831, 'timestamp': '2025-10-02 00:22:29.674107', 'step': 6646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:29.738403', 'step': 6646, 'epoch': 1}
{'type': 'loss', 'content': 0.12126181274652481, 'timestamp': '2025-10-02 00:22:29.744849', 'step': 6647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:29.816765', 'step': 6647, 'epoch': 1}
{'type': 'loss', 'content': 0.09940440952777863, 'timestamp': '2025-10-02 00:22:29.826503', 'step': 6648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:29.884555', 'step': 6648, 'epoch': 1}
{'type': 'loss', 'content': 0.20642825961112976, 'timestamp': '2025-10-02 00:22:29.891643', 'step': 6649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:29.959154', 'step': 6649, 'epoch': 1}
{'type': 'loss', 'content': 0.14876097440719604, 'timestamp': '2025-10-02 00:22:29.962397', 'step': 6650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:30.031397', 'step': 6650, 'epoch': 1}
{'type': 'loss', 'content': 0.07974894344806671, 'timestamp': '2025-10-02 00:22:30.040319', 'step': 6651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:30.116237', 'step': 6651, 'epoch': 1}
{'type': 'loss', 'content': 0.1031327024102211, 'timestamp': '2025-10-02 00:22:30.126258', 'step': 6652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:30.187768', 'step': 6652, 'epoch': 1}
{'type': 'loss', 'content': 0.03157550469040871, 'timestamp': '2025-10-02 00:22:30.195658', 'step': 6653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:30.268871', 'step': 6653, 'epoch': 1}
{'type': 'loss', 'content': 0.21371209621429443, 'timestamp': '2025-10-02 00:22:30.273106', 'step': 6654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:30.341405', 'step': 6654, 'epoch': 1}
{'type': 'loss', 'content': 0.11205568164587021, 'timestamp': '2025-10-02 00:22:30.348841', 'step': 6655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:30.427527', 'step': 6655, 'epoch': 1}
{'type': 'loss', 'content': 0.031466905027627945, 'timestamp': '2025-10-02 00:22:30.440015', 'step': 6656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:30.514835', 'step': 6656, 'epoch': 1}
{'type': 'loss', 'content': 0.028398562222719193, 'timestamp': '2025-10-02 00:22:30.524794', 'step': 6657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:30.612355', 'step': 6657, 'epoch': 1}
{'type': 'loss', 'content': 0.04259863495826721, 'timestamp': '2025-10-02 00:22:30.625562', 'step': 6658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:30.691666', 'step': 6658, 'epoch': 1}
{'type': 'loss', 'content': 0.16861595213413239, 'timestamp': '2025-10-02 00:22:30.703249', 'step': 6659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:30.792574', 'step': 6659, 'epoch': 1}
{'type': 'loss', 'content': 0.06735756248235703, 'timestamp': '2025-10-02 00:22:30.799369', 'step': 6660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:30.872326', 'step': 6660, 'epoch': 1}
{'type': 'loss', 'content': 0.17723938822746277, 'timestamp': '2025-10-02 00:22:30.877237', 'step': 6661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:30.935696', 'step': 6661, 'epoch': 1}
{'type': 'loss', 'content': 0.02333928272128105, 'timestamp': '2025-10-02 00:22:30.945311', 'step': 6662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:31.023497', 'step': 6662, 'epoch': 1}
{'type': 'loss', 'content': 0.08088871091604233, 'timestamp': '2025-10-02 00:22:31.027979', 'step': 6663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:31.085144', 'step': 6663, 'epoch': 1}
{'type': 'loss', 'content': 0.09272444993257523, 'timestamp': '2025-10-02 00:22:31.095543', 'step': 6664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:31.182398', 'step': 6664, 'epoch': 1}
{'type': 'loss', 'content': 0.10314607620239258, 'timestamp': '2025-10-02 00:22:31.188682', 'step': 6665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:31.273844', 'step': 6665, 'epoch': 1}
{'type': 'loss', 'content': 0.08788543939590454, 'timestamp': '2025-10-02 00:22:31.284300', 'step': 6666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:31.351165', 'step': 6666, 'epoch': 1}
{'type': 'loss', 'content': 0.1536388397216797, 'timestamp': '2025-10-02 00:22:31.360068', 'step': 6667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:31.434094', 'step': 6667, 'epoch': 1}
{'type': 'loss', 'content': 0.1957547515630722, 'timestamp': '2025-10-02 00:22:31.446467', 'step': 6668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:31.510603', 'step': 6668, 'epoch': 1}
{'type': 'loss', 'content': 0.13113558292388916, 'timestamp': '2025-10-02 00:22:31.514878', 'step': 6669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:31.575148', 'step': 6669, 'epoch': 1}
{'type': 'loss', 'content': 0.15023237466812134, 'timestamp': '2025-10-02 00:22:31.582838', 'step': 6670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:31.653503', 'step': 6670, 'epoch': 1}
{'type': 'loss', 'content': 0.04261159151792526, 'timestamp': '2025-10-02 00:22:31.662676', 'step': 6671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:31.737073', 'step': 6671, 'epoch': 1}
{'type': 'loss', 'content': 0.13080821931362152, 'timestamp': '2025-10-02 00:22:31.745350', 'step': 6672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:31.801682', 'step': 6672, 'epoch': 1}
{'type': 'loss', 'content': 0.08389639109373093, 'timestamp': '2025-10-02 00:22:31.805599', 'step': 6673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:31.898258', 'step': 6673, 'epoch': 1}
{'type': 'loss', 'content': 0.04668997600674629, 'timestamp': '2025-10-02 00:22:31.908698', 'step': 6674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:22:31.969029', 'step': 6674, 'epoch': 1}
{'type': 'loss', 'content': 0.20770800113677979, 'timestamp': '2025-10-02 00:22:31.980975', 'step': 6675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:32.059842', 'step': 6675, 'epoch': 1}
{'type': 'loss', 'content': 0.15523259341716766, 'timestamp': '2025-10-02 00:22:32.068270', 'step': 6676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:32.150077', 'step': 6676, 'epoch': 1}
{'type': 'loss', 'content': 0.11780104041099548, 'timestamp': '2025-10-02 00:22:32.154959', 'step': 6677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:32.242795', 'step': 6677, 'epoch': 1}
{'type': 'loss', 'content': 0.11458421498537064, 'timestamp': '2025-10-02 00:22:32.255310', 'step': 6678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:32.343555', 'step': 6678, 'epoch': 1}
{'type': 'loss', 'content': 0.047953832894563675, 'timestamp': '2025-10-02 00:22:32.356261', 'step': 6679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:32.446250', 'step': 6679, 'epoch': 1}
{'type': 'loss', 'content': 0.02424020692706108, 'timestamp': '2025-10-02 00:22:32.456609', 'step': 6680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:32.552110', 'step': 6680, 'epoch': 1}
{'type': 'loss', 'content': 0.1402951329946518, 'timestamp': '2025-10-02 00:22:32.556917', 'step': 6681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:32.658677', 'step': 6681, 'epoch': 1}
{'type': 'loss', 'content': 0.06253351271152496, 'timestamp': '2025-10-02 00:22:32.669085', 'step': 6682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:32.753472', 'step': 6682, 'epoch': 1}
{'type': 'loss', 'content': 0.08952461928129196, 'timestamp': '2025-10-02 00:22:32.766528', 'step': 6683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:32.827101', 'step': 6683, 'epoch': 1}
{'type': 'loss', 'content': 0.10159488022327423, 'timestamp': '2025-10-02 00:22:32.834952', 'step': 6684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:32.904169', 'step': 6684, 'epoch': 1}
{'type': 'loss', 'content': 0.06704625487327576, 'timestamp': '2025-10-02 00:22:32.914868', 'step': 6685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:32.999519', 'step': 6685, 'epoch': 1}
{'type': 'loss', 'content': 0.02762393094599247, 'timestamp': '2025-10-02 00:22:33.004194', 'step': 6686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:33.083361', 'step': 6686, 'epoch': 1}
{'type': 'loss', 'content': 0.10386679321527481, 'timestamp': '2025-10-02 00:22:33.087387', 'step': 6687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:33.172797', 'step': 6687, 'epoch': 1}
{'type': 'loss', 'content': 0.046824079006910324, 'timestamp': '2025-10-02 00:22:33.181206', 'step': 6688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:33.259120', 'step': 6688, 'epoch': 1}
{'type': 'loss', 'content': 0.13368850946426392, 'timestamp': '2025-10-02 00:22:33.268811', 'step': 6689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:33.329906', 'step': 6689, 'epoch': 1}
{'type': 'loss', 'content': 0.04864486679434776, 'timestamp': '2025-10-02 00:22:33.334281', 'step': 6690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:33.400208', 'step': 6690, 'epoch': 1}
{'type': 'loss', 'content': 0.0361100509762764, 'timestamp': '2025-10-02 00:22:33.410941', 'step': 6691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:33.480350', 'step': 6691, 'epoch': 1}
{'type': 'loss', 'content': 0.18609486520290375, 'timestamp': '2025-10-02 00:22:33.500245', 'step': 6692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:33.579292', 'step': 6692, 'epoch': 1}
{'type': 'loss', 'content': 0.09123353660106659, 'timestamp': '2025-10-02 00:22:33.595223', 'step': 6693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:33.681280', 'step': 6693, 'epoch': 1}
{'type': 'loss', 'content': 0.05702021345496178, 'timestamp': '2025-10-02 00:22:33.694256', 'step': 6694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:33.764445', 'step': 6694, 'epoch': 1}
{'type': 'loss', 'content': 0.15064726769924164, 'timestamp': '2025-10-02 00:22:33.767880', 'step': 6695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:33.858409', 'step': 6695, 'epoch': 1}
{'type': 'loss', 'content': 0.036852460354566574, 'timestamp': '2025-10-02 00:22:33.866225', 'step': 6696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:33.949865', 'step': 6696, 'epoch': 1}
{'type': 'loss', 'content': 0.11391989886760712, 'timestamp': '2025-10-02 00:22:33.956807', 'step': 6697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:34.052647', 'step': 6697, 'epoch': 1}
{'type': 'loss', 'content': 0.20824061334133148, 'timestamp': '2025-10-02 00:22:34.061386', 'step': 6698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:34.156715', 'step': 6698, 'epoch': 1}
{'type': 'loss', 'content': 0.09456058591604233, 'timestamp': '2025-10-02 00:22:34.172865', 'step': 6699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:34.239561', 'step': 6699, 'epoch': 1}
{'type': 'loss', 'content': 0.04908721521496773, 'timestamp': '2025-10-02 00:22:34.254241', 'step': 6700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:34.336616', 'step': 6700, 'epoch': 1}
{'type': 'loss', 'content': 0.10760170221328735, 'timestamp': '2025-10-02 00:22:34.342708', 'step': 6701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:34.445235', 'step': 6701, 'epoch': 1}
{'type': 'loss', 'content': 0.08255098015069962, 'timestamp': '2025-10-02 00:22:34.456627', 'step': 6702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:34.527517', 'step': 6702, 'epoch': 1}
{'type': 'loss', 'content': 0.1920948475599289, 'timestamp': '2025-10-02 00:22:34.531327', 'step': 6703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:34.608415', 'step': 6703, 'epoch': 1}
{'type': 'loss', 'content': 0.06626570969820023, 'timestamp': '2025-10-02 00:22:34.624910', 'step': 6704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:34.710611', 'step': 6704, 'epoch': 1}
{'type': 'loss', 'content': 0.3318985402584076, 'timestamp': '2025-10-02 00:22:34.721958', 'step': 6705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:34.787066', 'step': 6705, 'epoch': 1}
{'type': 'loss', 'content': 0.0981421172618866, 'timestamp': '2025-10-02 00:22:34.797361', 'step': 6706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:34.863083', 'step': 6706, 'epoch': 1}
{'type': 'loss', 'content': 0.10756363719701767, 'timestamp': '2025-10-02 00:22:34.874007', 'step': 6707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:34.939015', 'step': 6707, 'epoch': 1}
{'type': 'loss', 'content': 0.2383638471364975, 'timestamp': '2025-10-02 00:22:34.951816', 'step': 6708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:35.029420', 'step': 6708, 'epoch': 1}
{'type': 'loss', 'content': 0.09570997953414917, 'timestamp': '2025-10-02 00:22:35.032413', 'step': 6709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:35.099544', 'step': 6709, 'epoch': 1}
{'type': 'loss', 'content': 0.06058940291404724, 'timestamp': '2025-10-02 00:22:35.109996', 'step': 6710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:35.175815', 'step': 6710, 'epoch': 1}
{'type': 'loss', 'content': 0.08580026030540466, 'timestamp': '2025-10-02 00:22:35.181346', 'step': 6711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:35.255949', 'step': 6711, 'epoch': 1}
{'type': 'loss', 'content': 0.04544522240757942, 'timestamp': '2025-10-02 00:22:35.262150', 'step': 6712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:22:35.325923', 'step': 6712, 'epoch': 1}
{'type': 'loss', 'content': 0.0549219511449337, 'timestamp': '2025-10-02 00:22:35.337966', 'step': 6713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:35.392365', 'step': 6713, 'epoch': 1}
{'type': 'loss', 'content': 0.1727384775876999, 'timestamp': '2025-10-02 00:22:35.407716', 'step': 6714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:35.471410', 'step': 6714, 'epoch': 1}
{'type': 'loss', 'content': 0.17076453566551208, 'timestamp': '2025-10-02 00:22:35.474269', 'step': 6715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:35.529183', 'step': 6715, 'epoch': 1}
{'type': 'loss', 'content': 0.08887019753456116, 'timestamp': '2025-10-02 00:22:35.535413', 'step': 6716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:35.593923', 'step': 6716, 'epoch': 1}
{'type': 'loss', 'content': 0.12022651731967926, 'timestamp': '2025-10-02 00:22:35.601805', 'step': 6717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:35.657517', 'step': 6717, 'epoch': 1}
{'type': 'loss', 'content': 0.029899312183260918, 'timestamp': '2025-10-02 00:22:35.667094', 'step': 6718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:35.724593', 'step': 6718, 'epoch': 1}
{'type': 'loss', 'content': 0.08545715361833572, 'timestamp': '2025-10-02 00:22:35.727475', 'step': 6719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:35.782569', 'step': 6719, 'epoch': 1}
{'type': 'loss', 'content': 0.06368071585893631, 'timestamp': '2025-10-02 00:22:35.792961', 'step': 6720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:35.854016', 'step': 6720, 'epoch': 1}
{'type': 'loss', 'content': 0.02371261827647686, 'timestamp': '2025-10-02 00:22:35.860233', 'step': 6721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:35.916826', 'step': 6721, 'epoch': 1}
{'type': 'loss', 'content': 0.054466612637043, 'timestamp': '2025-10-02 00:22:35.924598', 'step': 6722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:35.981688', 'step': 6722, 'epoch': 1}
{'type': 'loss', 'content': 0.2290204018354416, 'timestamp': '2025-10-02 00:22:35.984334', 'step': 6723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:36.040358', 'step': 6723, 'epoch': 1}
{'type': 'loss', 'content': 0.0792865976691246, 'timestamp': '2025-10-02 00:22:36.046001', 'step': 6724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:36.100022', 'step': 6724, 'epoch': 1}
{'type': 'loss', 'content': 0.1369808465242386, 'timestamp': '2025-10-02 00:22:36.106457', 'step': 6725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:36.165180', 'step': 6725, 'epoch': 1}
{'type': 'loss', 'content': 0.10244820266962051, 'timestamp': '2025-10-02 00:22:36.170021', 'step': 6726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:36.231288', 'step': 6726, 'epoch': 1}
{'type': 'loss', 'content': 0.10562901943922043, 'timestamp': '2025-10-02 00:22:36.235500', 'step': 6727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:22:36.305800', 'step': 6727, 'epoch': 1}
{'type': 'loss', 'content': 0.022097155451774597, 'timestamp': '2025-10-02 00:22:36.318846', 'step': 6728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:22:36.402928', 'step': 6728, 'epoch': 1}
{'type': 'loss', 'content': 0.015344250947237015, 'timestamp': '2025-10-02 00:22:36.414942', 'step': 6729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:36.487384', 'step': 6729, 'epoch': 1}
{'type': 'loss', 'content': 0.145241841673851, 'timestamp': '2025-10-02 00:22:36.491346', 'step': 6730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:36.551878', 'step': 6730, 'epoch': 1}
{'type': 'loss', 'content': 0.2566325068473816, 'timestamp': '2025-10-02 00:22:36.556221', 'step': 6731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:36.642935', 'step': 6731, 'epoch': 1}
{'type': 'loss', 'content': 0.043992314487695694, 'timestamp': '2025-10-02 00:22:36.654431', 'step': 6732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:36.743109', 'step': 6732, 'epoch': 1}
{'type': 'loss', 'content': 0.0759883001446724, 'timestamp': '2025-10-02 00:22:36.746948', 'step': 6733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:36.809442', 'step': 6733, 'epoch': 1}
{'type': 'loss', 'content': 0.11039342731237411, 'timestamp': '2025-10-02 00:22:36.813048', 'step': 6734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:36.892349', 'step': 6734, 'epoch': 1}
{'type': 'loss', 'content': 0.0329531691968441, 'timestamp': '2025-10-02 00:22:36.902138', 'step': 6735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:36.965662', 'step': 6735, 'epoch': 1}
{'type': 'loss', 'content': 0.03427821770310402, 'timestamp': '2025-10-02 00:22:36.976248', 'step': 6736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:37.046623', 'step': 6736, 'epoch': 1}
{'type': 'loss', 'content': 0.07630519568920135, 'timestamp': '2025-10-02 00:22:37.060013', 'step': 6737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:37.131587', 'step': 6737, 'epoch': 1}
{'type': 'loss', 'content': 0.08898425847291946, 'timestamp': '2025-10-02 00:22:37.137882', 'step': 6738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:37.200425', 'step': 6738, 'epoch': 1}
{'type': 'loss', 'content': 0.11520922183990479, 'timestamp': '2025-10-02 00:22:37.206386', 'step': 6739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:37.279245', 'step': 6739, 'epoch': 1}
{'type': 'loss', 'content': 0.16819468140602112, 'timestamp': '2025-10-02 00:22:37.287614', 'step': 6740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:37.368399', 'step': 6740, 'epoch': 1}
{'type': 'loss', 'content': 0.11006587743759155, 'timestamp': '2025-10-02 00:22:37.375273', 'step': 6741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:37.436939', 'step': 6741, 'epoch': 1}
{'type': 'loss', 'content': 0.11574061214923859, 'timestamp': '2025-10-02 00:22:37.448910', 'step': 6742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:37.523562', 'step': 6742, 'epoch': 1}
{'type': 'loss', 'content': 0.07370787858963013, 'timestamp': '2025-10-02 00:22:37.536592', 'step': 6743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:37.614720', 'step': 6743, 'epoch': 1}
{'type': 'loss', 'content': 0.042329199612140656, 'timestamp': '2025-10-02 00:22:37.628129', 'step': 6744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:37.701087', 'step': 6744, 'epoch': 1}
{'type': 'loss', 'content': 0.17202343046665192, 'timestamp': '2025-10-02 00:22:37.715683', 'step': 6745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:37.775995', 'step': 6745, 'epoch': 1}
{'type': 'loss', 'content': 0.08736813068389893, 'timestamp': '2025-10-02 00:22:37.786996', 'step': 6746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:37.843760', 'step': 6746, 'epoch': 1}
{'type': 'loss', 'content': 0.0847063958644867, 'timestamp': '2025-10-02 00:22:37.847441', 'step': 6747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:37.912240', 'step': 6747, 'epoch': 1}
{'type': 'loss', 'content': 0.05676880478858948, 'timestamp': '2025-10-02 00:22:37.926177', 'step': 6748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:37.995130', 'step': 6748, 'epoch': 1}
{'type': 'loss', 'content': 0.02285134606063366, 'timestamp': '2025-10-02 00:22:38.004871', 'step': 6749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:38.081944', 'step': 6749, 'epoch': 1}
{'type': 'loss', 'content': 0.16647501289844513, 'timestamp': '2025-10-02 00:22:38.090480', 'step': 6750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:38.160682', 'step': 6750, 'epoch': 1}
{'type': 'loss', 'content': 0.06757815182209015, 'timestamp': '2025-10-02 00:22:38.164481', 'step': 6751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:38.238388', 'step': 6751, 'epoch': 1}
{'type': 'loss', 'content': 0.0279803816229105, 'timestamp': '2025-10-02 00:22:38.245569', 'step': 6752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:38.310820', 'step': 6752, 'epoch': 1}
{'type': 'loss', 'content': 0.17759345471858978, 'timestamp': '2025-10-02 00:22:38.314029', 'step': 6753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:38.389250', 'step': 6753, 'epoch': 1}
{'type': 'loss', 'content': 0.07988929748535156, 'timestamp': '2025-10-02 00:22:38.399736', 'step': 6754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:38.474258', 'step': 6754, 'epoch': 1}
{'type': 'loss', 'content': 0.049186188727617264, 'timestamp': '2025-10-02 00:22:38.484070', 'step': 6755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:38.550155', 'step': 6755, 'epoch': 1}
{'type': 'loss', 'content': 0.045212648808956146, 'timestamp': '2025-10-02 00:22:38.558354', 'step': 6756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:38.616396', 'step': 6756, 'epoch': 1}
{'type': 'loss', 'content': 0.14437876641750336, 'timestamp': '2025-10-02 00:22:38.621116', 'step': 6757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:38.686954', 'step': 6757, 'epoch': 1}
{'type': 'loss', 'content': 0.1017797440290451, 'timestamp': '2025-10-02 00:22:38.697567', 'step': 6758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:38.778410', 'step': 6758, 'epoch': 1}
{'type': 'loss', 'content': 0.01956311985850334, 'timestamp': '2025-10-02 00:22:38.786503', 'step': 6759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:38.857466', 'step': 6759, 'epoch': 1}
{'type': 'loss', 'content': 0.1641445904970169, 'timestamp': '2025-10-02 00:22:38.869588', 'step': 6760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:38.928651', 'step': 6760, 'epoch': 1}
{'type': 'loss', 'content': 0.1398843675851822, 'timestamp': '2025-10-02 00:22:38.936795', 'step': 6761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:22:39.025348', 'step': 6761, 'epoch': 1}
{'type': 'loss', 'content': 0.06703478842973709, 'timestamp': '2025-10-02 00:22:39.038772', 'step': 6762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:39.104240', 'step': 6762, 'epoch': 1}
{'type': 'loss', 'content': 0.17525731027126312, 'timestamp': '2025-10-02 00:22:39.107401', 'step': 6763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:39.163362', 'step': 6763, 'epoch': 1}
{'type': 'loss', 'content': 0.1130492314696312, 'timestamp': '2025-10-02 00:22:39.169295', 'step': 6764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:39.236715', 'step': 6764, 'epoch': 1}
{'type': 'loss', 'content': 0.0992482453584671, 'timestamp': '2025-10-02 00:22:39.242167', 'step': 6765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:39.307609', 'step': 6765, 'epoch': 1}
{'type': 'loss', 'content': 0.029350122436881065, 'timestamp': '2025-10-02 00:22:39.311498', 'step': 6766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:39.369742', 'step': 6766, 'epoch': 1}
{'type': 'loss', 'content': 0.06814653426408768, 'timestamp': '2025-10-02 00:22:39.373234', 'step': 6767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:39.432056', 'step': 6767, 'epoch': 1}
{'type': 'loss', 'content': 0.0953180119395256, 'timestamp': '2025-10-02 00:22:39.439965', 'step': 6768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:39.502408', 'step': 6768, 'epoch': 1}
{'type': 'loss', 'content': 0.10517872124910355, 'timestamp': '2025-10-02 00:22:39.513612', 'step': 6769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:39.574993', 'step': 6769, 'epoch': 1}
{'type': 'loss', 'content': 0.10754196345806122, 'timestamp': '2025-10-02 00:22:39.578030', 'step': 6770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:39.637198', 'step': 6770, 'epoch': 1}
{'type': 'loss', 'content': 0.07663677632808685, 'timestamp': '2025-10-02 00:22:39.644488', 'step': 6771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:39.714340', 'step': 6771, 'epoch': 1}
{'type': 'loss', 'content': 0.10647830367088318, 'timestamp': '2025-10-02 00:22:39.720659', 'step': 6772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:39.778005', 'step': 6772, 'epoch': 1}
{'type': 'loss', 'content': 0.05576113238930702, 'timestamp': '2025-10-02 00:22:39.788514', 'step': 6773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:22:39.873475', 'step': 6773, 'epoch': 1}
{'type': 'loss', 'content': 0.03251812234520912, 'timestamp': '2025-10-02 00:22:39.885995', 'step': 6774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:39.950608', 'step': 6774, 'epoch': 1}
{'type': 'loss', 'content': 0.0503414012491703, 'timestamp': '2025-10-02 00:22:39.953233', 'step': 6775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:40.017614', 'step': 6775, 'epoch': 1}
{'type': 'loss', 'content': 0.1303555816411972, 'timestamp': '2025-10-02 00:22:40.023989', 'step': 6776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:40.082651', 'step': 6776, 'epoch': 1}
{'type': 'loss', 'content': 0.07089866697788239, 'timestamp': '2025-10-02 00:22:40.093158', 'step': 6777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:40.158153', 'step': 6777, 'epoch': 1}
{'type': 'loss', 'content': 0.0828540101647377, 'timestamp': '2025-10-02 00:22:40.168588', 'step': 6778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:40.237682', 'step': 6778, 'epoch': 1}
{'type': 'loss', 'content': 0.0728626474738121, 'timestamp': '2025-10-02 00:22:40.241719', 'step': 6779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:40.314154', 'step': 6779, 'epoch': 1}
{'type': 'loss', 'content': 0.10024665296077728, 'timestamp': '2025-10-02 00:22:40.324790', 'step': 6780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:40.388680', 'step': 6780, 'epoch': 1}
{'type': 'loss', 'content': 0.04925740510225296, 'timestamp': '2025-10-02 00:22:40.397227', 'step': 6781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:40.454046', 'step': 6781, 'epoch': 1}
{'type': 'loss', 'content': 0.08546850830316544, 'timestamp': '2025-10-02 00:22:40.461714', 'step': 6782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:40.546917', 'step': 6782, 'epoch': 1}
{'type': 'loss', 'content': 0.07328023761510849, 'timestamp': '2025-10-02 00:22:40.557326', 'step': 6783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:40.626655', 'step': 6783, 'epoch': 1}
{'type': 'loss', 'content': 0.035054612904787064, 'timestamp': '2025-10-02 00:22:40.633721', 'step': 6784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:40.697164', 'step': 6784, 'epoch': 1}
{'type': 'loss', 'content': 0.11777827888727188, 'timestamp': '2025-10-02 00:22:40.705118', 'step': 6785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:40.782558', 'step': 6785, 'epoch': 1}
{'type': 'loss', 'content': 0.0367933064699173, 'timestamp': '2025-10-02 00:22:40.790203', 'step': 6786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:40.853138', 'step': 6786, 'epoch': 1}
{'type': 'loss', 'content': 0.07988592982292175, 'timestamp': '2025-10-02 00:22:40.855742', 'step': 6787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:22:40.940538', 'step': 6787, 'epoch': 1}
{'type': 'loss', 'content': 0.013613658025860786, 'timestamp': '2025-10-02 00:22:40.953555', 'step': 6788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:41.013779', 'step': 6788, 'epoch': 1}
{'type': 'loss', 'content': 0.2274225354194641, 'timestamp': '2025-10-02 00:22:41.016614', 'step': 6789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:41.077241', 'step': 6789, 'epoch': 1}
{'type': 'loss', 'content': 0.14981889724731445, 'timestamp': '2025-10-02 00:22:41.081169', 'step': 6790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:41.143673', 'step': 6790, 'epoch': 1}
{'type': 'loss', 'content': 0.10781075060367584, 'timestamp': '2025-10-02 00:22:41.148216', 'step': 6791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:41.207340', 'step': 6791, 'epoch': 1}
{'type': 'loss', 'content': 0.10106749087572098, 'timestamp': '2025-10-02 00:22:41.214944', 'step': 6792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:41.273250', 'step': 6792, 'epoch': 1}
{'type': 'loss', 'content': 0.054213568568229675, 'timestamp': '2025-10-02 00:22:41.276325', 'step': 6793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:41.334470', 'step': 6793, 'epoch': 1}
{'type': 'loss', 'content': 0.061238475143909454, 'timestamp': '2025-10-02 00:22:41.337121', 'step': 6794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:41.393978', 'step': 6794, 'epoch': 1}
{'type': 'loss', 'content': 0.08654745668172836, 'timestamp': '2025-10-02 00:22:41.396454', 'step': 6795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:41.458834', 'step': 6795, 'epoch': 1}
{'type': 'loss', 'content': 0.1635153889656067, 'timestamp': '2025-10-02 00:22:41.464856', 'step': 6796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:41.524244', 'step': 6796, 'epoch': 1}
{'type': 'loss', 'content': 0.1451665461063385, 'timestamp': '2025-10-02 00:22:41.530005', 'step': 6797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:41.603142', 'step': 6797, 'epoch': 1}
{'type': 'loss', 'content': 0.2275054007768631, 'timestamp': '2025-10-02 00:22:41.610771', 'step': 6798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:41.682007', 'step': 6798, 'epoch': 1}
{'type': 'loss', 'content': 0.1350909024477005, 'timestamp': '2025-10-02 00:22:41.685060', 'step': 6799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:41.756867', 'step': 6799, 'epoch': 1}
{'type': 'loss', 'content': 0.07093503326177597, 'timestamp': '2025-10-02 00:22:41.764446', 'step': 6800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:41.822570', 'step': 6800, 'epoch': 1}
{'type': 'loss', 'content': 0.03356678783893585, 'timestamp': '2025-10-02 00:22:41.827089', 'step': 6801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:41.887130', 'step': 6801, 'epoch': 1}
{'type': 'loss', 'content': 0.21432292461395264, 'timestamp': '2025-10-02 00:22:41.891257', 'step': 6802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:41.949134', 'step': 6802, 'epoch': 1}
{'type': 'loss', 'content': 0.03612764552235603, 'timestamp': '2025-10-02 00:22:41.953927', 'step': 6803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:42.015157', 'step': 6803, 'epoch': 1}
{'type': 'loss', 'content': 0.053275756537914276, 'timestamp': '2025-10-02 00:22:42.026293', 'step': 6804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:42.086587', 'step': 6804, 'epoch': 1}
{'type': 'loss', 'content': 0.03259536623954773, 'timestamp': '2025-10-02 00:22:42.094076', 'step': 6805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:42.152885', 'step': 6805, 'epoch': 1}
{'type': 'loss', 'content': 0.05955730006098747, 'timestamp': '2025-10-02 00:22:42.162701', 'step': 6806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:42.220125', 'step': 6806, 'epoch': 1}
{'type': 'loss', 'content': 0.20976674556732178, 'timestamp': '2025-10-02 00:22:42.222978', 'step': 6807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:42.282439', 'step': 6807, 'epoch': 1}
{'type': 'loss', 'content': 0.11864259093999863, 'timestamp': '2025-10-02 00:22:42.292927', 'step': 6808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:42.354625', 'step': 6808, 'epoch': 1}
{'type': 'loss', 'content': 0.05001172050833702, 'timestamp': '2025-10-02 00:22:42.360858', 'step': 6809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:42.417697', 'step': 6809, 'epoch': 1}
{'type': 'loss', 'content': 0.1086539775133133, 'timestamp': '2025-10-02 00:22:42.419808', 'step': 6810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:42.479080', 'step': 6810, 'epoch': 1}
{'type': 'loss', 'content': 0.21950513124465942, 'timestamp': '2025-10-02 00:22:42.481086', 'step': 6811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:42.541572', 'step': 6811, 'epoch': 1}
{'type': 'loss', 'content': 0.04116246476769447, 'timestamp': '2025-10-02 00:22:42.551362', 'step': 6812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:42.614124', 'step': 6812, 'epoch': 1}
{'type': 'loss', 'content': 0.19728341698646545, 'timestamp': '2025-10-02 00:22:42.622368', 'step': 6813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:42.682460', 'step': 6813, 'epoch': 1}
{'type': 'loss', 'content': 0.035734351724386215, 'timestamp': '2025-10-02 00:22:42.692052', 'step': 6814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:42.759698', 'step': 6814, 'epoch': 1}
{'type': 'loss', 'content': 0.051715221256017685, 'timestamp': '2025-10-02 00:22:42.763749', 'step': 6815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:42.839418', 'step': 6815, 'epoch': 1}
{'type': 'loss', 'content': 0.06676853448152542, 'timestamp': '2025-10-02 00:22:42.846468', 'step': 6816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:42.904631', 'step': 6816, 'epoch': 1}
{'type': 'loss', 'content': 0.057480886578559875, 'timestamp': '2025-10-02 00:22:42.910728', 'step': 6817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:43.003754', 'step': 6817, 'epoch': 1}
{'type': 'loss', 'content': 0.051802776753902435, 'timestamp': '2025-10-02 00:22:43.009010', 'step': 6818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:43.082772', 'step': 6818, 'epoch': 1}
{'type': 'loss', 'content': 0.10373912751674652, 'timestamp': '2025-10-02 00:22:43.092403', 'step': 6819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:43.161131', 'step': 6819, 'epoch': 1}
{'type': 'loss', 'content': 0.17555411159992218, 'timestamp': '2025-10-02 00:22:43.175688', 'step': 6820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:43.237131', 'step': 6820, 'epoch': 1}
{'type': 'loss', 'content': 0.14270052313804626, 'timestamp': '2025-10-02 00:22:43.243249', 'step': 6821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:43.306117', 'step': 6821, 'epoch': 1}
{'type': 'loss', 'content': 0.12706832587718964, 'timestamp': '2025-10-02 00:22:43.309052', 'step': 6822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:43.374115', 'step': 6822, 'epoch': 1}
{'type': 'loss', 'content': 0.1037922203540802, 'timestamp': '2025-10-02 00:22:43.379619', 'step': 6823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:43.445010', 'step': 6823, 'epoch': 1}
{'type': 'loss', 'content': 0.0921257957816124, 'timestamp': '2025-10-02 00:22:43.453463', 'step': 6824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:43.518865', 'step': 6824, 'epoch': 1}
{'type': 'loss', 'content': 0.024175649508833885, 'timestamp': '2025-10-02 00:22:43.526593', 'step': 6825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:43.587879', 'step': 6825, 'epoch': 1}
{'type': 'loss', 'content': 0.08866710960865021, 'timestamp': '2025-10-02 00:22:43.592752', 'step': 6826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:43.654404', 'step': 6826, 'epoch': 1}
{'type': 'loss', 'content': 0.027472632005810738, 'timestamp': '2025-10-02 00:22:43.664195', 'step': 6827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:22:43.732997', 'step': 6827, 'epoch': 1}
{'type': 'loss', 'content': 0.01868063397705555, 'timestamp': '2025-10-02 00:22:43.744684', 'step': 6828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:43.808402', 'step': 6828, 'epoch': 1}
{'type': 'loss', 'content': 0.17335712909698486, 'timestamp': '2025-10-02 00:22:43.811249', 'step': 6829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:43.879858', 'step': 6829, 'epoch': 1}
{'type': 'loss', 'content': 0.03913472965359688, 'timestamp': '2025-10-02 00:22:43.890315', 'step': 6830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:43.954171', 'step': 6830, 'epoch': 1}
{'type': 'loss', 'content': 0.20187093317508698, 'timestamp': '2025-10-02 00:22:43.961014', 'step': 6831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:44.030427', 'step': 6831, 'epoch': 1}
{'type': 'loss', 'content': 0.05336076021194458, 'timestamp': '2025-10-02 00:22:44.041027', 'step': 6832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:44.110034', 'step': 6832, 'epoch': 1}
{'type': 'loss', 'content': 0.10594666749238968, 'timestamp': '2025-10-02 00:22:44.115198', 'step': 6833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:44.180050', 'step': 6833, 'epoch': 1}
{'type': 'loss', 'content': 0.09092967957258224, 'timestamp': '2025-10-02 00:22:44.184695', 'step': 6834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:44.242796', 'step': 6834, 'epoch': 1}
{'type': 'loss', 'content': 0.13221396505832672, 'timestamp': '2025-10-02 00:22:44.247293', 'step': 6835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:44.307303', 'step': 6835, 'epoch': 1}
{'type': 'loss', 'content': 0.13173526525497437, 'timestamp': '2025-10-02 00:22:44.317408', 'step': 6836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:44.378784', 'step': 6836, 'epoch': 1}
{'type': 'loss', 'content': 0.11442703753709793, 'timestamp': '2025-10-02 00:22:44.382518', 'step': 6837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:44.440480', 'step': 6837, 'epoch': 1}
{'type': 'loss', 'content': 0.1252356320619583, 'timestamp': '2025-10-02 00:22:44.443387', 'step': 6838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:44.507074', 'step': 6838, 'epoch': 1}
{'type': 'loss', 'content': 0.08779320865869522, 'timestamp': '2025-10-02 00:22:44.510633', 'step': 6839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:44.568431', 'step': 6839, 'epoch': 1}
{'type': 'loss', 'content': 0.056937262415885925, 'timestamp': '2025-10-02 00:22:44.579726', 'step': 6840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:44.647477', 'step': 6840, 'epoch': 1}
{'type': 'loss', 'content': 0.13227660953998566, 'timestamp': '2025-10-02 00:22:44.651979', 'step': 6841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:44.713168', 'step': 6841, 'epoch': 1}
{'type': 'loss', 'content': 0.08469309657812119, 'timestamp': '2025-10-02 00:22:44.716955', 'step': 6842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:44.778978', 'step': 6842, 'epoch': 1}
{'type': 'loss', 'content': 0.07491925358772278, 'timestamp': '2025-10-02 00:22:44.781427', 'step': 6843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:44.847652', 'step': 6843, 'epoch': 1}
{'type': 'loss', 'content': 0.01641533523797989, 'timestamp': '2025-10-02 00:22:44.856219', 'step': 6844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:44.918586', 'step': 6844, 'epoch': 1}
{'type': 'loss', 'content': 0.140583336353302, 'timestamp': '2025-10-02 00:22:44.922050', 'step': 6845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:44.980515', 'step': 6845, 'epoch': 1}
{'type': 'loss', 'content': 0.0372394323348999, 'timestamp': '2025-10-02 00:22:44.988271', 'step': 6846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:45.047593', 'step': 6846, 'epoch': 1}
{'type': 'loss', 'content': 0.0667065903544426, 'timestamp': '2025-10-02 00:22:45.053695', 'step': 6847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:45.112399', 'step': 6847, 'epoch': 1}
{'type': 'loss', 'content': 0.03031056933104992, 'timestamp': '2025-10-02 00:22:45.131052', 'step': 6848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:45.205059', 'step': 6848, 'epoch': 1}
{'type': 'loss', 'content': 0.061749935150146484, 'timestamp': '2025-10-02 00:22:45.209378', 'step': 6849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:45.284302', 'step': 6849, 'epoch': 1}
{'type': 'loss', 'content': 0.08657131344079971, 'timestamp': '2025-10-02 00:22:45.293892', 'step': 6850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:45.353951', 'step': 6850, 'epoch': 1}
{'type': 'loss', 'content': 0.05435456708073616, 'timestamp': '2025-10-02 00:22:45.363590', 'step': 6851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:45.457857', 'step': 6851, 'epoch': 1}
{'type': 'loss', 'content': 0.1116725504398346, 'timestamp': '2025-10-02 00:22:45.464109', 'step': 6852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:22:45.544957', 'step': 6852, 'epoch': 1}
{'type': 'loss', 'content': 0.013577400706708431, 'timestamp': '2025-10-02 00:22:45.559975', 'step': 6853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:45.637359', 'step': 6853, 'epoch': 1}
{'type': 'loss', 'content': 0.04931781068444252, 'timestamp': '2025-10-02 00:22:45.647628', 'step': 6854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:45.725087', 'step': 6854, 'epoch': 1}
{'type': 'loss', 'content': 0.1095123291015625, 'timestamp': '2025-10-02 00:22:45.733976', 'step': 6855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:45.797693', 'step': 6855, 'epoch': 1}
{'type': 'loss', 'content': 0.06523584574460983, 'timestamp': '2025-10-02 00:22:45.805657', 'step': 6856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:45.877003', 'step': 6856, 'epoch': 1}
{'type': 'loss', 'content': 0.03548703342676163, 'timestamp': '2025-10-02 00:22:45.882920', 'step': 6857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:45.960142', 'step': 6857, 'epoch': 1}
{'type': 'loss', 'content': 0.04996572807431221, 'timestamp': '2025-10-02 00:22:45.970632', 'step': 6858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:46.052401', 'step': 6858, 'epoch': 1}
{'type': 'loss', 'content': 0.05991995707154274, 'timestamp': '2025-10-02 00:22:46.055287', 'step': 6859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:22:46.112411', 'step': 6859, 'epoch': 1}
{'type': 'loss', 'content': 0.11278848350048065, 'timestamp': '2025-10-02 00:22:46.120184', 'step': 6860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:46.186525', 'step': 6860, 'epoch': 1}
{'type': 'loss', 'content': 0.05464507266879082, 'timestamp': '2025-10-02 00:22:46.190055', 'step': 6861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:46.252259', 'step': 6861, 'epoch': 1}
{'type': 'loss', 'content': 0.14570333063602448, 'timestamp': '2025-10-02 00:22:46.254934', 'step': 6862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:46.320085', 'step': 6862, 'epoch': 1}
{'type': 'loss', 'content': 0.0721336156129837, 'timestamp': '2025-10-02 00:22:46.323491', 'step': 6863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:46.379744', 'step': 6863, 'epoch': 1}
{'type': 'loss', 'content': 0.13771840929985046, 'timestamp': '2025-10-02 00:22:46.390049', 'step': 6864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:22:46.466861', 'step': 6864, 'epoch': 1}
{'type': 'loss', 'content': 0.059226226061582565, 'timestamp': '2025-10-02 00:22:46.481488', 'step': 6865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:46.556279', 'step': 6865, 'epoch': 1}
{'type': 'loss', 'content': 0.11668579280376434, 'timestamp': '2025-10-02 00:22:46.564039', 'step': 6866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:46.633442', 'step': 6866, 'epoch': 1}
{'type': 'loss', 'content': 0.12851747870445251, 'timestamp': '2025-10-02 00:22:46.637235', 'step': 6867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:46.701781', 'step': 6867, 'epoch': 1}
{'type': 'loss', 'content': 0.07062862813472748, 'timestamp': '2025-10-02 00:22:46.708555', 'step': 6868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:46.779982', 'step': 6868, 'epoch': 1}
{'type': 'loss', 'content': 0.1457771360874176, 'timestamp': '2025-10-02 00:22:46.789545', 'step': 6869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:46.855774', 'step': 6869, 'epoch': 1}
{'type': 'loss', 'content': 0.13584060966968536, 'timestamp': '2025-10-02 00:22:46.863037', 'step': 6870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:46.925993', 'step': 6870, 'epoch': 1}
{'type': 'loss', 'content': 0.12273086607456207, 'timestamp': '2025-10-02 00:22:46.935540', 'step': 6871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:47.005713', 'step': 6871, 'epoch': 1}
{'type': 'loss', 'content': 0.10814034193754196, 'timestamp': '2025-10-02 00:22:47.018160', 'step': 6872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:47.087612', 'step': 6872, 'epoch': 1}
{'type': 'loss', 'content': 0.048238568007946014, 'timestamp': '2025-10-02 00:22:47.095057', 'step': 6873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:22:47.177366', 'step': 6873, 'epoch': 1}
{'type': 'loss', 'content': 0.012469608336687088, 'timestamp': '2025-10-02 00:22:47.188425', 'step': 6874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:47.248449', 'step': 6874, 'epoch': 1}
{'type': 'loss', 'content': 0.045455001294612885, 'timestamp': '2025-10-02 00:22:47.254398', 'step': 6875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:47.332400', 'step': 6875, 'epoch': 1}
{'type': 'loss', 'content': 0.03919568657875061, 'timestamp': '2025-10-02 00:22:47.347889', 'step': 6876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:47.417898', 'step': 6876, 'epoch': 1}
{'type': 'loss', 'content': 0.06730900704860687, 'timestamp': '2025-10-02 00:22:47.422016', 'step': 6877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:47.499885', 'step': 6877, 'epoch': 1}
{'type': 'loss', 'content': 0.06605016440153122, 'timestamp': '2025-10-02 00:22:47.502949', 'step': 6878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:47.570430', 'step': 6878, 'epoch': 1}
{'type': 'loss', 'content': 0.12064281851053238, 'timestamp': '2025-10-02 00:22:47.576668', 'step': 6879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:47.655998', 'step': 6879, 'epoch': 1}
{'type': 'loss', 'content': 0.033950600773096085, 'timestamp': '2025-10-02 00:22:47.667336', 'step': 6880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:47.736114', 'step': 6880, 'epoch': 1}
{'type': 'loss', 'content': 0.09488654881715775, 'timestamp': '2025-10-02 00:22:47.743406', 'step': 6881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:47.814506', 'step': 6881, 'epoch': 1}
{'type': 'loss', 'content': 0.05234132707118988, 'timestamp': '2025-10-02 00:22:47.824004', 'step': 6882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:47.896453', 'step': 6882, 'epoch': 1}
{'type': 'loss', 'content': 0.08532193303108215, 'timestamp': '2025-10-02 00:22:47.902941', 'step': 6883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:47.975103', 'step': 6883, 'epoch': 1}
{'type': 'loss', 'content': 0.03314176946878433, 'timestamp': '2025-10-02 00:22:47.981902', 'step': 6884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:22:48.060955', 'step': 6884, 'epoch': 1}
{'type': 'loss', 'content': 0.24520820379257202, 'timestamp': '2025-10-02 00:22:48.064554', 'step': 6885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:48.126103', 'step': 6885, 'epoch': 1}
{'type': 'loss', 'content': 0.08451229333877563, 'timestamp': '2025-10-02 00:22:48.132210', 'step': 6886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:48.197455', 'step': 6886, 'epoch': 1}
{'type': 'loss', 'content': 0.17373496294021606, 'timestamp': '2025-10-02 00:22:48.200534', 'step': 6887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:48.267020', 'step': 6887, 'epoch': 1}
{'type': 'loss', 'content': 0.11823175102472305, 'timestamp': '2025-10-02 00:22:48.277418', 'step': 6888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:48.339348', 'step': 6888, 'epoch': 1}
{'type': 'loss', 'content': 0.07336268573999405, 'timestamp': '2025-10-02 00:22:48.344953', 'step': 6889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:48.412403', 'step': 6889, 'epoch': 1}
{'type': 'loss', 'content': 0.0342867411673069, 'timestamp': '2025-10-02 00:22:48.420194', 'step': 6890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:48.484742', 'step': 6890, 'epoch': 1}
{'type': 'loss', 'content': 0.13176409900188446, 'timestamp': '2025-10-02 00:22:48.490915', 'step': 6891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:48.551256', 'step': 6891, 'epoch': 1}
{'type': 'loss', 'content': 0.09688498824834824, 'timestamp': '2025-10-02 00:22:48.565084', 'step': 6892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:48.629095', 'step': 6892, 'epoch': 1}
{'type': 'loss', 'content': 0.10796814411878586, 'timestamp': '2025-10-02 00:22:48.634603', 'step': 6893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:48.706123', 'step': 6893, 'epoch': 1}
{'type': 'loss', 'content': 0.02622307650744915, 'timestamp': '2025-10-02 00:22:48.715929', 'step': 6894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:48.789622', 'step': 6894, 'epoch': 1}
{'type': 'loss', 'content': 0.19201774895191193, 'timestamp': '2025-10-02 00:22:48.793126', 'step': 6895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:48.858502', 'step': 6895, 'epoch': 1}
{'type': 'loss', 'content': 0.0742892175912857, 'timestamp': '2025-10-02 00:22:48.871691', 'step': 6896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:48.948957', 'step': 6896, 'epoch': 1}
{'type': 'loss', 'content': 0.12926925718784332, 'timestamp': '2025-10-02 00:22:48.952475', 'step': 6897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:49.021532', 'step': 6897, 'epoch': 1}
{'type': 'loss', 'content': 0.037692662328481674, 'timestamp': '2025-10-02 00:22:49.027585', 'step': 6898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:49.086561', 'step': 6898, 'epoch': 1}
{'type': 'loss', 'content': 0.08091920614242554, 'timestamp': '2025-10-02 00:22:49.091227', 'step': 6899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:49.172846', 'step': 6899, 'epoch': 1}
{'type': 'loss', 'content': 0.0965074747800827, 'timestamp': '2025-10-02 00:22:49.188328', 'step': 6900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:49.252341', 'step': 6900, 'epoch': 1}
{'type': 'loss', 'content': 0.13895748555660248, 'timestamp': '2025-10-02 00:22:49.262971', 'step': 6901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:22:49.342364', 'step': 6901, 'epoch': 1}
{'type': 'loss', 'content': 0.04914410412311554, 'timestamp': '2025-10-02 00:22:49.353020', 'step': 6902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:22:49.435497', 'step': 6902, 'epoch': 1}
{'type': 'loss', 'content': 0.007953852415084839, 'timestamp': '2025-10-02 00:22:49.449236', 'step': 6903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:49.512374', 'step': 6903, 'epoch': 1}
{'type': 'loss', 'content': 0.06786814332008362, 'timestamp': '2025-10-02 00:22:49.520840', 'step': 6904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:49.584482', 'step': 6904, 'epoch': 1}
{'type': 'loss', 'content': 0.12066465616226196, 'timestamp': '2025-10-02 00:22:49.590704', 'step': 6905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:49.647317', 'step': 6905, 'epoch': 1}
{'type': 'loss', 'content': 0.0583568811416626, 'timestamp': '2025-10-02 00:22:49.655938', 'step': 6906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:49.727069', 'step': 6906, 'epoch': 1}
{'type': 'loss', 'content': 0.06952507048845291, 'timestamp': '2025-10-02 00:22:49.736681', 'step': 6907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:49.797349', 'step': 6907, 'epoch': 1}
{'type': 'loss', 'content': 0.05397947132587433, 'timestamp': '2025-10-02 00:22:49.803539', 'step': 6908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:49.872681', 'step': 6908, 'epoch': 1}
{'type': 'loss', 'content': 0.03898556903004646, 'timestamp': '2025-10-02 00:22:49.883199', 'step': 6909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:49.963922', 'step': 6909, 'epoch': 1}
{'type': 'loss', 'content': 0.048646751791238785, 'timestamp': '2025-10-02 00:22:49.967058', 'step': 6910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:50.035412', 'step': 6910, 'epoch': 1}
{'type': 'loss', 'content': 0.19319483637809753, 'timestamp': '2025-10-02 00:22:50.038753', 'step': 6911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:50.101576', 'step': 6911, 'epoch': 1}
{'type': 'loss', 'content': 0.057801034301519394, 'timestamp': '2025-10-02 00:22:50.111964', 'step': 6912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:50.173895', 'step': 6912, 'epoch': 1}
{'type': 'loss', 'content': 0.07079537957906723, 'timestamp': '2025-10-02 00:22:50.181806', 'step': 6913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:50.252653', 'step': 6913, 'epoch': 1}
{'type': 'loss', 'content': 0.14319957792758942, 'timestamp': '2025-10-02 00:22:50.256998', 'step': 6914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:50.322183', 'step': 6914, 'epoch': 1}
{'type': 'loss', 'content': 0.06592455506324768, 'timestamp': '2025-10-02 00:22:50.330003', 'step': 6915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:50.401869', 'step': 6915, 'epoch': 1}
{'type': 'loss', 'content': 0.1179070919752121, 'timestamp': '2025-10-02 00:22:50.412634', 'step': 6916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:50.472119', 'step': 6916, 'epoch': 1}
{'type': 'loss', 'content': 0.1943449229001999, 'timestamp': '2025-10-02 00:22:50.479883', 'step': 6917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:50.555414', 'step': 6917, 'epoch': 1}
{'type': 'loss', 'content': 0.03571723401546478, 'timestamp': '2025-10-02 00:22:50.564995', 'step': 6918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:50.629365', 'step': 6918, 'epoch': 1}
{'type': 'loss', 'content': 0.06558595597743988, 'timestamp': '2025-10-02 00:22:50.635373', 'step': 6919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:50.694411', 'step': 6919, 'epoch': 1}
{'type': 'loss', 'content': 0.019213136285543442, 'timestamp': '2025-10-02 00:22:50.706473', 'step': 6920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:50.773407', 'step': 6920, 'epoch': 1}
{'type': 'loss', 'content': 0.07255344837903976, 'timestamp': '2025-10-02 00:22:50.781760', 'step': 6921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:50.854655', 'step': 6921, 'epoch': 1}
{'type': 'loss', 'content': 0.04526350647211075, 'timestamp': '2025-10-02 00:22:50.860890', 'step': 6922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:22:50.930834', 'step': 6922, 'epoch': 1}
{'type': 'loss', 'content': 0.19787472486495972, 'timestamp': '2025-10-02 00:22:50.934563', 'step': 6923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:50.994910', 'step': 6923, 'epoch': 1}
{'type': 'loss', 'content': 0.0711815282702446, 'timestamp': '2025-10-02 00:22:51.003586', 'step': 6924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:51.064422', 'step': 6924, 'epoch': 1}
{'type': 'loss', 'content': 0.11628693342208862, 'timestamp': '2025-10-02 00:22:51.067539', 'step': 6925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:51.133108', 'step': 6925, 'epoch': 1}
{'type': 'loss', 'content': 0.024178842082619667, 'timestamp': '2025-10-02 00:22:51.136117', 'step': 6926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:51.199918', 'step': 6926, 'epoch': 1}
{'type': 'loss', 'content': 0.0979093536734581, 'timestamp': '2025-10-02 00:22:51.202840', 'step': 6927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:51.257555', 'step': 6927, 'epoch': 1}
{'type': 'loss', 'content': 0.08715485036373138, 'timestamp': '2025-10-02 00:22:51.263941', 'step': 6928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:22:51.322710', 'step': 6928, 'epoch': 1}
{'type': 'loss', 'content': 0.11028368771076202, 'timestamp': '2025-10-02 00:22:51.329373', 'step': 6929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:51.384910', 'step': 6929, 'epoch': 1}
{'type': 'loss', 'content': 0.07811450213193893, 'timestamp': '2025-10-02 00:22:51.394517', 'step': 6930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:51.464243', 'step': 6930, 'epoch': 1}
{'type': 'loss', 'content': 0.03846772760152817, 'timestamp': '2025-10-02 00:22:51.473770', 'step': 6931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:51.538357', 'step': 6931, 'epoch': 1}
{'type': 'loss', 'content': 0.05923111364245415, 'timestamp': '2025-10-02 00:22:51.549566', 'step': 6932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:51.615887', 'step': 6932, 'epoch': 1}
{'type': 'loss', 'content': 0.10766157507896423, 'timestamp': '2025-10-02 00:22:51.628635', 'step': 6933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:51.702416', 'step': 6933, 'epoch': 1}
{'type': 'loss', 'content': 0.07452422380447388, 'timestamp': '2025-10-02 00:22:51.709940', 'step': 6934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:51.782363', 'step': 6934, 'epoch': 1}
{'type': 'loss', 'content': 0.1108093410730362, 'timestamp': '2025-10-02 00:22:51.785375', 'step': 6935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:51.867768', 'step': 6935, 'epoch': 1}
{'type': 'loss', 'content': 0.09354013949632645, 'timestamp': '2025-10-02 00:22:51.880749', 'step': 6936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:51.944548', 'step': 6936, 'epoch': 1}
{'type': 'loss', 'content': 0.21713252365589142, 'timestamp': '2025-10-02 00:22:51.953701', 'step': 6937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:52.018374', 'step': 6937, 'epoch': 1}
{'type': 'loss', 'content': 0.08794484287500381, 'timestamp': '2025-10-02 00:22:52.021296', 'step': 6938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:22:52.096942', 'step': 6938, 'epoch': 1}
{'type': 'loss', 'content': 0.11968827992677689, 'timestamp': '2025-10-02 00:22:52.101051', 'step': 6939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:52.190405', 'step': 6939, 'epoch': 1}
{'type': 'loss', 'content': 0.06624480336904526, 'timestamp': '2025-10-02 00:22:52.201976', 'step': 6940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:22:52.263768', 'step': 6940, 'epoch': 1}
{'type': 'loss', 'content': 0.061392247676849365, 'timestamp': '2025-10-02 00:22:52.267295', 'step': 6941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:22:52.327601', 'step': 6941, 'epoch': 1}
{'type': 'loss', 'content': 0.03198637068271637, 'timestamp': '2025-10-02 00:22:52.336406', 'step': 6942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:52.409399', 'step': 6942, 'epoch': 1}
{'type': 'loss', 'content': 0.06516171991825104, 'timestamp': '2025-10-02 00:22:52.413632', 'step': 6943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:22:52.488486', 'step': 6943, 'epoch': 1}
{'type': 'loss', 'content': 0.07588982582092285, 'timestamp': '2025-10-02 00:22:52.496515', 'step': 6944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:52.563795', 'step': 6944, 'epoch': 1}
{'type': 'loss', 'content': 0.11289612948894501, 'timestamp': '2025-10-02 00:22:52.567392', 'step': 6945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:52.630668', 'step': 6945, 'epoch': 1}
{'type': 'loss', 'content': 0.1537592113018036, 'timestamp': '2025-10-02 00:22:52.637151', 'step': 6946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:22:52.702997', 'step': 6946, 'epoch': 1}
{'type': 'loss', 'content': 0.04708950221538544, 'timestamp': '2025-10-02 00:22:52.708887', 'step': 6947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:52.769357', 'step': 6947, 'epoch': 1}
{'type': 'loss', 'content': 0.13731110095977783, 'timestamp': '2025-10-02 00:22:52.776270', 'step': 6948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:52.836182', 'step': 6948, 'epoch': 1}
{'type': 'loss', 'content': 0.10086218267679214, 'timestamp': '2025-10-02 00:22:52.841674', 'step': 6949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:52.905670', 'step': 6949, 'epoch': 1}
{'type': 'loss', 'content': 0.1341714709997177, 'timestamp': '2025-10-02 00:22:52.908453', 'step': 6950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:22:52.971048', 'step': 6950, 'epoch': 1}
{'type': 'loss', 'content': 0.03691316395998001, 'timestamp': '2025-10-02 00:22:52.974605', 'step': 6951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:53.032003', 'step': 6951, 'epoch': 1}
{'type': 'loss', 'content': 0.17171241343021393, 'timestamp': '2025-10-02 00:22:53.038582', 'step': 6952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:53.101887', 'step': 6952, 'epoch': 1}
{'type': 'loss', 'content': 0.16293367743492126, 'timestamp': '2025-10-02 00:22:53.105639', 'step': 6953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:22:53.181659', 'step': 6953, 'epoch': 1}
{'type': 'loss', 'content': 0.02997690439224243, 'timestamp': '2025-10-02 00:22:53.191574', 'step': 6954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:22:53.280835', 'step': 6954, 'epoch': 1}
{'type': 'loss', 'content': 0.0896558091044426, 'timestamp': '2025-10-02 00:22:53.291301', 'step': 6955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:22:53.366308', 'step': 6955, 'epoch': 1}
{'type': 'loss', 'content': 0.04039781168103218, 'timestamp': '2025-10-02 00:22:53.376674', 'step': 6956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:22:53.443707', 'step': 6956, 'epoch': 1}
{'type': 'loss', 'content': 0.03770504891872406, 'timestamp': '2025-10-02 00:22:53.453105', 'step': 6957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:22:53.524794', 'step': 6957, 'epoch': 1}
{'type': 'loss', 'content': 0.1176876500248909, 'timestamp': '2025-10-02 00:22:53.527074', 'step': 6958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:22:53.588396', 'step': 6958, 'epoch': 1}
{'type': 'loss', 'content': 0.11775068193674088, 'timestamp': '2025-10-02 00:22:53.596457', 'step': 6959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:22:53.669676', 'step': 6959, 'epoch': 1}
{'type': 'loss', 'content': 0.09390082210302353, 'timestamp': '2025-10-02 00:22:53.676851', 'step': 6960, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:23:22.483272', 'step': 6960, 'epoch': 1}
{'type': 'pplx', 'content': 87.39919072501566, 'timestamp': '2025-10-02 00:23:22.494366', 'step': 6960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:23:22.558494', 'step': 6960, 'epoch': 1}
{'type': 'loss', 'content': 0.15292945504188538, 'timestamp': '2025-10-02 00:23:22.565054', 'step': 6961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:22.636325', 'step': 6961, 'epoch': 1}
{'type': 'loss', 'content': 0.06431664526462555, 'timestamp': '2025-10-02 00:23:22.642959', 'step': 6962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:22.709934', 'step': 6962, 'epoch': 1}
{'type': 'loss', 'content': 0.14195753633975983, 'timestamp': '2025-10-02 00:23:22.711957', 'step': 6963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:22.766222', 'step': 6963, 'epoch': 1}
{'type': 'loss', 'content': 0.13514924049377441, 'timestamp': '2025-10-02 00:23:22.772221', 'step': 6964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:22.826122', 'step': 6964, 'epoch': 1}
{'type': 'loss', 'content': 0.12433180212974548, 'timestamp': '2025-10-02 00:23:22.828385', 'step': 6965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:22.882576', 'step': 6965, 'epoch': 1}
{'type': 'loss', 'content': 0.22367827594280243, 'timestamp': '2025-10-02 00:23:22.884596', 'step': 6966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:22.938614', 'step': 6966, 'epoch': 1}
{'type': 'loss', 'content': 0.026474962010979652, 'timestamp': '2025-10-02 00:23:22.944405', 'step': 6967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:22.998298', 'step': 6967, 'epoch': 1}
{'type': 'loss', 'content': 0.09763862937688828, 'timestamp': '2025-10-02 00:23:23.004947', 'step': 6968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:23.058769', 'step': 6968, 'epoch': 1}
{'type': 'loss', 'content': 0.037004292011260986, 'timestamp': '2025-10-02 00:23:23.064688', 'step': 6969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:23.119250', 'step': 6969, 'epoch': 1}
{'type': 'loss', 'content': 0.06536160409450531, 'timestamp': '2025-10-02 00:23:23.126779', 'step': 6970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:23.181301', 'step': 6970, 'epoch': 1}
{'type': 'loss', 'content': 0.14684388041496277, 'timestamp': '2025-10-02 00:23:23.183332', 'step': 6971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:23.236920', 'step': 6971, 'epoch': 1}
{'type': 'loss', 'content': 0.05308714136481285, 'timestamp': '2025-10-02 00:23:23.242592', 'step': 6972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:23.296182', 'step': 6972, 'epoch': 1}
{'type': 'loss', 'content': 0.014726361259818077, 'timestamp': '2025-10-02 00:23:23.304105', 'step': 6973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:23.358777', 'step': 6973, 'epoch': 1}
{'type': 'loss', 'content': 0.04770566523075104, 'timestamp': '2025-10-02 00:23:23.364798', 'step': 6974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:23.421135', 'step': 6974, 'epoch': 1}
{'type': 'loss', 'content': 0.029979918152093887, 'timestamp': '2025-10-02 00:23:23.426832', 'step': 6975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:23.482487', 'step': 6975, 'epoch': 1}
{'type': 'loss', 'content': 0.04130706563591957, 'timestamp': '2025-10-02 00:23:23.492834', 'step': 6976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:23.550231', 'step': 6976, 'epoch': 1}
{'type': 'loss', 'content': 0.04774804040789604, 'timestamp': '2025-10-02 00:23:23.559804', 'step': 6977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:23.615668', 'step': 6977, 'epoch': 1}
{'type': 'loss', 'content': 0.02092185989022255, 'timestamp': '2025-10-02 00:23:23.625085', 'step': 6978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:23.688489', 'step': 6978, 'epoch': 1}
{'type': 'loss', 'content': 0.0076446449384093285, 'timestamp': '2025-10-02 00:23:23.699146', 'step': 6979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:23.753921', 'step': 6979, 'epoch': 1}
{'type': 'loss', 'content': 0.11419398337602615, 'timestamp': '2025-10-02 00:23:23.759439', 'step': 6980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:23.812292', 'step': 6980, 'epoch': 1}
{'type': 'loss', 'content': 0.06629098951816559, 'timestamp': '2025-10-02 00:23:23.814670', 'step': 6981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:23.868405', 'step': 6981, 'epoch': 1}
{'type': 'loss', 'content': 0.029601342976093292, 'timestamp': '2025-10-02 00:23:23.875983', 'step': 6982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:23.931015', 'step': 6982, 'epoch': 1}
{'type': 'loss', 'content': 0.2396172136068344, 'timestamp': '2025-10-02 00:23:23.933468', 'step': 6983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:23.986839', 'step': 6983, 'epoch': 1}
{'type': 'loss', 'content': 0.0850970596075058, 'timestamp': '2025-10-02 00:23:23.995176', 'step': 6984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:24.048205', 'step': 6984, 'epoch': 1}
{'type': 'loss', 'content': 0.08287461847066879, 'timestamp': '2025-10-02 00:23:24.050554', 'step': 6985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:24.106103', 'step': 6985, 'epoch': 1}
{'type': 'loss', 'content': 0.020447546616196632, 'timestamp': '2025-10-02 00:23:24.115622', 'step': 6986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:24.169008', 'step': 6986, 'epoch': 1}
{'type': 'loss', 'content': 0.21320416033267975, 'timestamp': '2025-10-02 00:23:24.171018', 'step': 6987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:24.224395', 'step': 6987, 'epoch': 1}
{'type': 'loss', 'content': 0.09784526377916336, 'timestamp': '2025-10-02 00:23:24.231106', 'step': 6988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:24.283511', 'step': 6988, 'epoch': 1}
{'type': 'loss', 'content': 0.06486047059297562, 'timestamp': '2025-10-02 00:23:24.285515', 'step': 6989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:24.339339', 'step': 6989, 'epoch': 1}
{'type': 'loss', 'content': 0.10684232413768768, 'timestamp': '2025-10-02 00:23:24.341706', 'step': 6990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:24.395592', 'step': 6990, 'epoch': 1}
{'type': 'loss', 'content': 0.09512794762849808, 'timestamp': '2025-10-02 00:23:24.402981', 'step': 6991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:24.457344', 'step': 6991, 'epoch': 1}
{'type': 'loss', 'content': 0.04579826071858406, 'timestamp': '2025-10-02 00:23:24.467676', 'step': 6992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:24.521638', 'step': 6992, 'epoch': 1}
{'type': 'loss', 'content': 0.09122780710458755, 'timestamp': '2025-10-02 00:23:24.524027', 'step': 6993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:24.579213', 'step': 6993, 'epoch': 1}
{'type': 'loss', 'content': 0.07865304499864578, 'timestamp': '2025-10-02 00:23:24.582167', 'step': 6994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:24.638812', 'step': 6994, 'epoch': 1}
{'type': 'loss', 'content': 0.03219889476895332, 'timestamp': '2025-10-02 00:23:24.648361', 'step': 6995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:24.703036', 'step': 6995, 'epoch': 1}
{'type': 'loss', 'content': 0.025148840621113777, 'timestamp': '2025-10-02 00:23:24.711977', 'step': 6996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:24.766650', 'step': 6996, 'epoch': 1}
{'type': 'loss', 'content': 0.1026628166437149, 'timestamp': '2025-10-02 00:23:24.769002', 'step': 6997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:24.822927', 'step': 6997, 'epoch': 1}
{'type': 'loss', 'content': 0.05639781430363655, 'timestamp': '2025-10-02 00:23:24.825348', 'step': 6998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:24.879945', 'step': 6998, 'epoch': 1}
{'type': 'loss', 'content': 0.11165643483400345, 'timestamp': '2025-10-02 00:23:24.885561', 'step': 6999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:24.939617', 'step': 6999, 'epoch': 1}
{'type': 'loss', 'content': 0.16226910054683685, 'timestamp': '2025-10-02 00:23:24.945158', 'step': 7000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-10-02 00:23:25.365161', 'step': 7000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:23:25.431112', 'step': 7000, 'epoch': 1}
{'type': 'loss', 'content': 0.06674710661172867, 'timestamp': '2025-10-02 00:23:25.444019', 'step': 7001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:25.499200', 'step': 7001, 'epoch': 1}
{'type': 'loss', 'content': 0.04297367110848427, 'timestamp': '2025-10-02 00:23:25.508291', 'step': 7002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:25.563921', 'step': 7002, 'epoch': 1}
{'type': 'loss', 'content': 0.037757858633995056, 'timestamp': '2025-10-02 00:23:25.566758', 'step': 7003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:25.621528', 'step': 7003, 'epoch': 1}
{'type': 'loss', 'content': 0.15795211493968964, 'timestamp': '2025-10-02 00:23:25.629367', 'step': 7004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:25.683892', 'step': 7004, 'epoch': 1}
{'type': 'loss', 'content': 0.11163686960935593, 'timestamp': '2025-10-02 00:23:25.686027', 'step': 7005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:25.740849', 'step': 7005, 'epoch': 1}
{'type': 'loss', 'content': 0.0584096759557724, 'timestamp': '2025-10-02 00:23:25.750096', 'step': 7006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:25.805509', 'step': 7006, 'epoch': 1}
{'type': 'loss', 'content': 0.13543128967285156, 'timestamp': '2025-10-02 00:23:25.808023', 'step': 7007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:25.862527', 'step': 7007, 'epoch': 1}
{'type': 'loss', 'content': 0.07502785325050354, 'timestamp': '2025-10-02 00:23:25.870624', 'step': 7008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:25.924474', 'step': 7008, 'epoch': 1}
{'type': 'loss', 'content': 0.10514898598194122, 'timestamp': '2025-10-02 00:23:25.926629', 'step': 7009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:25.980497', 'step': 7009, 'epoch': 1}
{'type': 'loss', 'content': 0.13917911052703857, 'timestamp': '2025-10-02 00:23:25.986284', 'step': 7010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:26.040448', 'step': 7010, 'epoch': 1}
{'type': 'loss', 'content': 0.05896667018532753, 'timestamp': '2025-10-02 00:23:26.049633', 'step': 7011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:26.104910', 'step': 7011, 'epoch': 1}
{'type': 'loss', 'content': 0.04375404864549637, 'timestamp': '2025-10-02 00:23:26.111203', 'step': 7012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:26.164811', 'step': 7012, 'epoch': 1}
{'type': 'loss', 'content': 0.11776679009199142, 'timestamp': '2025-10-02 00:23:26.170760', 'step': 7013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:26.224759', 'step': 7013, 'epoch': 1}
{'type': 'loss', 'content': 0.14054444432258606, 'timestamp': '2025-10-02 00:23:26.226852', 'step': 7014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:26.281036', 'step': 7014, 'epoch': 1}
{'type': 'loss', 'content': 0.01915089599788189, 'timestamp': '2025-10-02 00:23:26.283238', 'step': 7015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:26.336654', 'step': 7015, 'epoch': 1}
{'type': 'loss', 'content': 0.14876316487789154, 'timestamp': '2025-10-02 00:23:26.342515', 'step': 7016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:26.395472', 'step': 7016, 'epoch': 1}
{'type': 'loss', 'content': 0.1108701080083847, 'timestamp': '2025-10-02 00:23:26.397631', 'step': 7017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:26.451277', 'step': 7017, 'epoch': 1}
{'type': 'loss', 'content': 0.04473806172609329, 'timestamp': '2025-10-02 00:23:26.458629', 'step': 7018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:26.512657', 'step': 7018, 'epoch': 1}
{'type': 'loss', 'content': 0.10279654711484909, 'timestamp': '2025-10-02 00:23:26.514792', 'step': 7019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:26.569889', 'step': 7019, 'epoch': 1}
{'type': 'loss', 'content': 0.04753255099058151, 'timestamp': '2025-10-02 00:23:26.580208', 'step': 7020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:26.634893', 'step': 7020, 'epoch': 1}
{'type': 'loss', 'content': 0.15683656930923462, 'timestamp': '2025-10-02 00:23:26.637379', 'step': 7021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:26.692197', 'step': 7021, 'epoch': 1}
{'type': 'loss', 'content': 0.1273537129163742, 'timestamp': '2025-10-02 00:23:26.698091', 'step': 7022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:26.754093', 'step': 7022, 'epoch': 1}
{'type': 'loss', 'content': 0.0774761289358139, 'timestamp': '2025-10-02 00:23:26.763646', 'step': 7023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:26.817522', 'step': 7023, 'epoch': 1}
{'type': 'loss', 'content': 0.11895725876092911, 'timestamp': '2025-10-02 00:23:26.822926', 'step': 7024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:26.876569', 'step': 7024, 'epoch': 1}
{'type': 'loss', 'content': 0.06570003926753998, 'timestamp': '2025-10-02 00:23:26.883854', 'step': 7025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:26.940046', 'step': 7025, 'epoch': 1}
{'type': 'loss', 'content': 0.07676396518945694, 'timestamp': '2025-10-02 00:23:26.942814', 'step': 7026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:26.998139', 'step': 7026, 'epoch': 1}
{'type': 'loss', 'content': 0.11108144372701645, 'timestamp': '2025-10-02 00:23:27.000738', 'step': 7027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:27.054755', 'step': 7027, 'epoch': 1}
{'type': 'loss', 'content': 0.1297697126865387, 'timestamp': '2025-10-02 00:23:27.061081', 'step': 7028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:27.116352', 'step': 7028, 'epoch': 1}
{'type': 'loss', 'content': 0.04609309881925583, 'timestamp': '2025-10-02 00:23:27.121045', 'step': 7029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:27.187304', 'step': 7029, 'epoch': 1}
{'type': 'loss', 'content': 0.09596432000398636, 'timestamp': '2025-10-02 00:23:27.189528', 'step': 7030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:27.255962', 'step': 7030, 'epoch': 1}
{'type': 'loss', 'content': 0.026525752618908882, 'timestamp': '2025-10-02 00:23:27.261521', 'step': 7031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:27.317670', 'step': 7031, 'epoch': 1}
{'type': 'loss', 'content': 0.088541179895401, 'timestamp': '2025-10-02 00:23:27.324271', 'step': 7032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:27.379145', 'step': 7032, 'epoch': 1}
{'type': 'loss', 'content': 0.05945080891251564, 'timestamp': '2025-10-02 00:23:27.381868', 'step': 7033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:27.439381', 'step': 7033, 'epoch': 1}
{'type': 'loss', 'content': 0.02990959770977497, 'timestamp': '2025-10-02 00:23:27.448571', 'step': 7034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:27.506185', 'step': 7034, 'epoch': 1}
{'type': 'loss', 'content': 0.0404914915561676, 'timestamp': '2025-10-02 00:23:27.509445', 'step': 7035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:23:27.585607', 'step': 7035, 'epoch': 1}
{'type': 'loss', 'content': 0.05607571825385094, 'timestamp': '2025-10-02 00:23:27.599762', 'step': 7036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:27.655150', 'step': 7036, 'epoch': 1}
{'type': 'loss', 'content': 0.18651995062828064, 'timestamp': '2025-10-02 00:23:27.657644', 'step': 7037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:27.715750', 'step': 7037, 'epoch': 1}
{'type': 'loss', 'content': 0.07213771343231201, 'timestamp': '2025-10-02 00:23:27.718101', 'step': 7038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:27.773596', 'step': 7038, 'epoch': 1}
{'type': 'loss', 'content': 0.20323419570922852, 'timestamp': '2025-10-02 00:23:27.775670', 'step': 7039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:27.831598', 'step': 7039, 'epoch': 1}
{'type': 'loss', 'content': 0.03361094743013382, 'timestamp': '2025-10-02 00:23:27.837962', 'step': 7040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:27.892853', 'step': 7040, 'epoch': 1}
{'type': 'loss', 'content': 0.07090461999177933, 'timestamp': '2025-10-02 00:23:27.898756', 'step': 7041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:27.956072', 'step': 7041, 'epoch': 1}
{'type': 'loss', 'content': 0.21911250054836273, 'timestamp': '2025-10-02 00:23:27.958953', 'step': 7042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:28.014662', 'step': 7042, 'epoch': 1}
{'type': 'loss', 'content': 0.160726860165596, 'timestamp': '2025-10-02 00:23:28.017222', 'step': 7043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:28.073123', 'step': 7043, 'epoch': 1}
{'type': 'loss', 'content': 0.10950155556201935, 'timestamp': '2025-10-02 00:23:28.079026', 'step': 7044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:28.133290', 'step': 7044, 'epoch': 1}
{'type': 'loss', 'content': 0.13855616748332977, 'timestamp': '2025-10-02 00:23:28.135993', 'step': 7045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:28.190681', 'step': 7045, 'epoch': 1}
{'type': 'loss', 'content': 0.08125561475753784, 'timestamp': '2025-10-02 00:23:28.193517', 'step': 7046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:28.248951', 'step': 7046, 'epoch': 1}
{'type': 'loss', 'content': 0.05083658546209335, 'timestamp': '2025-10-02 00:23:28.251540', 'step': 7047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:28.306604', 'step': 7047, 'epoch': 1}
{'type': 'loss', 'content': 0.03873960301280022, 'timestamp': '2025-10-02 00:23:28.312382', 'step': 7048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:28.367118', 'step': 7048, 'epoch': 1}
{'type': 'loss', 'content': 0.13835479319095612, 'timestamp': '2025-10-02 00:23:28.369551', 'step': 7049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:28.425131', 'step': 7049, 'epoch': 1}
{'type': 'loss', 'content': 0.016455236822366714, 'timestamp': '2025-10-02 00:23:28.432760', 'step': 7050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:28.491542', 'step': 7050, 'epoch': 1}
{'type': 'loss', 'content': 0.07665861397981644, 'timestamp': '2025-10-02 00:23:28.494118', 'step': 7051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:28.548413', 'step': 7051, 'epoch': 1}
{'type': 'loss', 'content': 0.17079921066761017, 'timestamp': '2025-10-02 00:23:28.555279', 'step': 7052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:23:28.622822', 'step': 7052, 'epoch': 1}
{'type': 'loss', 'content': 0.02767440490424633, 'timestamp': '2025-10-02 00:23:28.635816', 'step': 7053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:28.693119', 'step': 7053, 'epoch': 1}
{'type': 'loss', 'content': 0.07220182567834854, 'timestamp': '2025-10-02 00:23:28.698928', 'step': 7054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:28.755112', 'step': 7054, 'epoch': 1}
{'type': 'loss', 'content': 0.05950102210044861, 'timestamp': '2025-10-02 00:23:28.757608', 'step': 7055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:28.814481', 'step': 7055, 'epoch': 1}
{'type': 'loss', 'content': 0.01799236424267292, 'timestamp': '2025-10-02 00:23:28.821098', 'step': 7056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:28.876168', 'step': 7056, 'epoch': 1}
{'type': 'loss', 'content': 0.10119561105966568, 'timestamp': '2025-10-02 00:23:28.881914', 'step': 7057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:28.936073', 'step': 7057, 'epoch': 1}
{'type': 'loss', 'content': 0.1653406322002411, 'timestamp': '2025-10-02 00:23:28.938749', 'step': 7058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:28.995107', 'step': 7058, 'epoch': 1}
{'type': 'loss', 'content': 0.08971661329269409, 'timestamp': '2025-10-02 00:23:29.000490', 'step': 7059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:29.055607', 'step': 7059, 'epoch': 1}
{'type': 'loss', 'content': 0.04374248906970024, 'timestamp': '2025-10-02 00:23:29.061484', 'step': 7060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:29.115443', 'step': 7060, 'epoch': 1}
{'type': 'loss', 'content': 0.06809515506029129, 'timestamp': '2025-10-02 00:23:29.122838', 'step': 7061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:29.176528', 'step': 7061, 'epoch': 1}
{'type': 'loss', 'content': 0.1245654821395874, 'timestamp': '2025-10-02 00:23:29.178856', 'step': 7062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:29.237600', 'step': 7062, 'epoch': 1}
{'type': 'loss', 'content': 0.05940741300582886, 'timestamp': '2025-10-02 00:23:29.239800', 'step': 7063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:29.295124', 'step': 7063, 'epoch': 1}
{'type': 'loss', 'content': 0.0889865830540657, 'timestamp': '2025-10-02 00:23:29.305440', 'step': 7064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:29.358201', 'step': 7064, 'epoch': 1}
{'type': 'loss', 'content': 0.18514184653759003, 'timestamp': '2025-10-02 00:23:29.361335', 'step': 7065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:29.420366', 'step': 7065, 'epoch': 1}
{'type': 'loss', 'content': 0.04925369843840599, 'timestamp': '2025-10-02 00:23:29.422581', 'step': 7066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:29.478470', 'step': 7066, 'epoch': 1}
{'type': 'loss', 'content': 0.09522660076618195, 'timestamp': '2025-10-02 00:23:29.480850', 'step': 7067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:29.540130', 'step': 7067, 'epoch': 1}
{'type': 'loss', 'content': 0.08000556379556656, 'timestamp': '2025-10-02 00:23:29.546697', 'step': 7068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:29.601183', 'step': 7068, 'epoch': 1}
{'type': 'loss', 'content': 0.013140769675374031, 'timestamp': '2025-10-02 00:23:29.610535', 'step': 7069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:23:29.682852', 'step': 7069, 'epoch': 1}
{'type': 'loss', 'content': 0.0362534373998642, 'timestamp': '2025-10-02 00:23:29.695559', 'step': 7070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:29.749797', 'step': 7070, 'epoch': 1}
{'type': 'loss', 'content': 0.13678722083568573, 'timestamp': '2025-10-02 00:23:29.752035', 'step': 7071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:29.806787', 'step': 7071, 'epoch': 1}
{'type': 'loss', 'content': 0.11031000316143036, 'timestamp': '2025-10-02 00:23:29.812378', 'step': 7072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:29.871177', 'step': 7072, 'epoch': 1}
{'type': 'loss', 'content': 0.18019917607307434, 'timestamp': '2025-10-02 00:23:29.873460', 'step': 7073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:29.927585', 'step': 7073, 'epoch': 1}
{'type': 'loss', 'content': 0.1311594694852829, 'timestamp': '2025-10-02 00:23:29.929898', 'step': 7074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:29.987831', 'step': 7074, 'epoch': 1}
{'type': 'loss', 'content': 0.04768994823098183, 'timestamp': '2025-10-02 00:23:29.990433', 'step': 7075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:30.046837', 'step': 7075, 'epoch': 1}
{'type': 'loss', 'content': 0.06159941107034683, 'timestamp': '2025-10-02 00:23:30.052943', 'step': 7076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:30.118321', 'step': 7076, 'epoch': 1}
{'type': 'loss', 'content': 0.12918134033679962, 'timestamp': '2025-10-02 00:23:30.133309', 'step': 7077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:30.209267', 'step': 7077, 'epoch': 1}
{'type': 'loss', 'content': 0.04628203809261322, 'timestamp': '2025-10-02 00:23:30.213707', 'step': 7078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:30.284605', 'step': 7078, 'epoch': 1}
{'type': 'loss', 'content': 0.0690259039402008, 'timestamp': '2025-10-02 00:23:30.289141', 'step': 7079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:30.359953', 'step': 7079, 'epoch': 1}
{'type': 'loss', 'content': 0.04766768962144852, 'timestamp': '2025-10-02 00:23:30.369427', 'step': 7080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:30.428831', 'step': 7080, 'epoch': 1}
{'type': 'loss', 'content': 0.11997910588979721, 'timestamp': '2025-10-02 00:23:30.432351', 'step': 7081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:30.528436', 'step': 7081, 'epoch': 1}
{'type': 'loss', 'content': 0.07103367149829865, 'timestamp': '2025-10-02 00:23:30.533484', 'step': 7082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:30.595115', 'step': 7082, 'epoch': 1}
{'type': 'loss', 'content': 0.1005343347787857, 'timestamp': '2025-10-02 00:23:30.598842', 'step': 7083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:30.654486', 'step': 7083, 'epoch': 1}
{'type': 'loss', 'content': 0.07083386927843094, 'timestamp': '2025-10-02 00:23:30.664613', 'step': 7084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:30.721359', 'step': 7084, 'epoch': 1}
{'type': 'loss', 'content': 0.13423700630664825, 'timestamp': '2025-10-02 00:23:30.728034', 'step': 7085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:30.787033', 'step': 7085, 'epoch': 1}
{'type': 'loss', 'content': 0.025312768295407295, 'timestamp': '2025-10-02 00:23:30.792974', 'step': 7086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:30.855879', 'step': 7086, 'epoch': 1}
{'type': 'loss', 'content': 0.028104782104492188, 'timestamp': '2025-10-02 00:23:30.861712', 'step': 7087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:30.920936', 'step': 7087, 'epoch': 1}
{'type': 'loss', 'content': 0.04002334550023079, 'timestamp': '2025-10-02 00:23:30.927899', 'step': 7088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:30.987868', 'step': 7088, 'epoch': 1}
{'type': 'loss', 'content': 0.02934136427938938, 'timestamp': '2025-10-02 00:23:30.997328', 'step': 7089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:31.055742', 'step': 7089, 'epoch': 1}
{'type': 'loss', 'content': 0.0744951069355011, 'timestamp': '2025-10-02 00:23:31.064899', 'step': 7090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:31.126099', 'step': 7090, 'epoch': 1}
{'type': 'loss', 'content': 0.06795521080493927, 'timestamp': '2025-10-02 00:23:31.133500', 'step': 7091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:31.189245', 'step': 7091, 'epoch': 1}
{'type': 'loss', 'content': 0.04145044460892677, 'timestamp': '2025-10-02 00:23:31.195584', 'step': 7092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:31.253944', 'step': 7092, 'epoch': 1}
{'type': 'loss', 'content': 0.04917708784341812, 'timestamp': '2025-10-02 00:23:31.261403', 'step': 7093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:31.317409', 'step': 7093, 'epoch': 1}
{'type': 'loss', 'content': 0.18921999633312225, 'timestamp': '2025-10-02 00:23:31.321052', 'step': 7094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:31.377865', 'step': 7094, 'epoch': 1}
{'type': 'loss', 'content': 0.023830777034163475, 'timestamp': '2025-10-02 00:23:31.380998', 'step': 7095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:31.437496', 'step': 7095, 'epoch': 1}
{'type': 'loss', 'content': 0.10522785037755966, 'timestamp': '2025-10-02 00:23:31.443797', 'step': 7096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:31.503401', 'step': 7096, 'epoch': 1}
{'type': 'loss', 'content': 0.014546004123985767, 'timestamp': '2025-10-02 00:23:31.513695', 'step': 7097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:31.570677', 'step': 7097, 'epoch': 1}
{'type': 'loss', 'content': 0.05887417867779732, 'timestamp': '2025-10-02 00:23:31.574035', 'step': 7098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:23:31.642811', 'step': 7098, 'epoch': 1}
{'type': 'loss', 'content': 0.06247733160853386, 'timestamp': '2025-10-02 00:23:31.654794', 'step': 7099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:31.715391', 'step': 7099, 'epoch': 1}
{'type': 'loss', 'content': 0.17027394473552704, 'timestamp': '2025-10-02 00:23:31.722201', 'step': 7100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:31.780579', 'step': 7100, 'epoch': 1}
{'type': 'loss', 'content': 0.2126869559288025, 'timestamp': '2025-10-02 00:23:31.783642', 'step': 7101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:31.847795', 'step': 7101, 'epoch': 1}
{'type': 'loss', 'content': 0.11323884129524231, 'timestamp': '2025-10-02 00:23:31.853622', 'step': 7102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:31.921588', 'step': 7102, 'epoch': 1}
{'type': 'loss', 'content': 0.02189248986542225, 'timestamp': '2025-10-02 00:23:31.924357', 'step': 7103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:31.979998', 'step': 7103, 'epoch': 1}
{'type': 'loss', 'content': 0.04550192132592201, 'timestamp': '2025-10-02 00:23:31.988211', 'step': 7104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:32.045315', 'step': 7104, 'epoch': 1}
{'type': 'loss', 'content': 0.08004267513751984, 'timestamp': '2025-10-02 00:23:32.051107', 'step': 7105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:23:32.115014', 'step': 7105, 'epoch': 1}
{'type': 'loss', 'content': 0.031243057921528816, 'timestamp': '2025-10-02 00:23:32.125474', 'step': 7106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:32.180985', 'step': 7106, 'epoch': 1}
{'type': 'loss', 'content': 0.02387455850839615, 'timestamp': '2025-10-02 00:23:32.188252', 'step': 7107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:32.244434', 'step': 7107, 'epoch': 1}
{'type': 'loss', 'content': 0.20489400625228882, 'timestamp': '2025-10-02 00:23:32.251104', 'step': 7108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:32.312491', 'step': 7108, 'epoch': 1}
{'type': 'loss', 'content': 0.17628292739391327, 'timestamp': '2025-10-02 00:23:32.315280', 'step': 7109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:32.371122', 'step': 7109, 'epoch': 1}
{'type': 'loss', 'content': 0.16063016653060913, 'timestamp': '2025-10-02 00:23:32.374626', 'step': 7110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:32.433106', 'step': 7110, 'epoch': 1}
{'type': 'loss', 'content': 0.04410536214709282, 'timestamp': '2025-10-02 00:23:32.440236', 'step': 7111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:32.496672', 'step': 7111, 'epoch': 1}
{'type': 'loss', 'content': 0.025875519961118698, 'timestamp': '2025-10-02 00:23:32.503435', 'step': 7112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:32.559250', 'step': 7112, 'epoch': 1}
{'type': 'loss', 'content': 0.07142166793346405, 'timestamp': '2025-10-02 00:23:32.562212', 'step': 7113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:32.623234', 'step': 7113, 'epoch': 1}
{'type': 'loss', 'content': 0.06517965346574783, 'timestamp': '2025-10-02 00:23:32.627100', 'step': 7114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:32.682912', 'step': 7114, 'epoch': 1}
{'type': 'loss', 'content': 0.03118756413459778, 'timestamp': '2025-10-02 00:23:32.686451', 'step': 7115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:32.743976', 'step': 7115, 'epoch': 1}
{'type': 'loss', 'content': 0.0797167420387268, 'timestamp': '2025-10-02 00:23:32.749873', 'step': 7116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:32.814740', 'step': 7116, 'epoch': 1}
{'type': 'loss', 'content': 0.031914617866277695, 'timestamp': '2025-10-02 00:23:32.825668', 'step': 7117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:32.883766', 'step': 7117, 'epoch': 1}
{'type': 'loss', 'content': 0.058657366782426834, 'timestamp': '2025-10-02 00:23:32.887098', 'step': 7118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:32.943221', 'step': 7118, 'epoch': 1}
{'type': 'loss', 'content': 0.06457672268152237, 'timestamp': '2025-10-02 00:23:32.950704', 'step': 7119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:33.006906', 'step': 7119, 'epoch': 1}
{'type': 'loss', 'content': 0.07626795023679733, 'timestamp': '2025-10-02 00:23:33.017494', 'step': 7120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:33.075759', 'step': 7120, 'epoch': 1}
{'type': 'loss', 'content': 0.1340322047472, 'timestamp': '2025-10-02 00:23:33.086665', 'step': 7121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:33.142415', 'step': 7121, 'epoch': 1}
{'type': 'loss', 'content': 0.06613414734601974, 'timestamp': '2025-10-02 00:23:33.146055', 'step': 7122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:33.203745', 'step': 7122, 'epoch': 1}
{'type': 'loss', 'content': 0.11302509158849716, 'timestamp': '2025-10-02 00:23:33.205961', 'step': 7123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:33.262218', 'step': 7123, 'epoch': 1}
{'type': 'loss', 'content': 0.031157290562987328, 'timestamp': '2025-10-02 00:23:33.270524', 'step': 7124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:23:33.335580', 'step': 7124, 'epoch': 1}
{'type': 'loss', 'content': 0.05723483860492706, 'timestamp': '2025-10-02 00:23:33.346877', 'step': 7125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:33.403667', 'step': 7125, 'epoch': 1}
{'type': 'loss', 'content': 0.09028533846139908, 'timestamp': '2025-10-02 00:23:33.405999', 'step': 7126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:33.473086', 'step': 7126, 'epoch': 1}
{'type': 'loss', 'content': 0.025683319196105003, 'timestamp': '2025-10-02 00:23:33.483244', 'step': 7127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:33.543339', 'step': 7127, 'epoch': 1}
{'type': 'loss', 'content': 0.08596251904964447, 'timestamp': '2025-10-02 00:23:33.551829', 'step': 7128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:33.612410', 'step': 7128, 'epoch': 1}
{'type': 'loss', 'content': 0.05467972904443741, 'timestamp': '2025-10-02 00:23:33.622692', 'step': 7129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:33.685115', 'step': 7129, 'epoch': 1}
{'type': 'loss', 'content': 0.020649613812565804, 'timestamp': '2025-10-02 00:23:33.689854', 'step': 7130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:33.753856', 'step': 7130, 'epoch': 1}
{'type': 'loss', 'content': 0.14233528077602386, 'timestamp': '2025-10-02 00:23:33.756987', 'step': 7131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:33.812667', 'step': 7131, 'epoch': 1}
{'type': 'loss', 'content': 0.09003783017396927, 'timestamp': '2025-10-02 00:23:33.823546', 'step': 7132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:33.880422', 'step': 7132, 'epoch': 1}
{'type': 'loss', 'content': 0.09507019817829132, 'timestamp': '2025-10-02 00:23:33.882875', 'step': 7133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:33.940181', 'step': 7133, 'epoch': 1}
{'type': 'loss', 'content': 0.115596242249012, 'timestamp': '2025-10-02 00:23:33.943012', 'step': 7134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:33.998803', 'step': 7134, 'epoch': 1}
{'type': 'loss', 'content': 0.028943564742803574, 'timestamp': '2025-10-02 00:23:34.008164', 'step': 7135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:34.063702', 'step': 7135, 'epoch': 1}
{'type': 'loss', 'content': 0.174651101231575, 'timestamp': '2025-10-02 00:23:34.069572', 'step': 7136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:23:34.130450', 'step': 7136, 'epoch': 1}
{'type': 'loss', 'content': 0.21032696962356567, 'timestamp': '2025-10-02 00:23:34.133169', 'step': 7137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:34.195483', 'step': 7137, 'epoch': 1}
{'type': 'loss', 'content': 0.08165436238050461, 'timestamp': '2025-10-02 00:23:34.198227', 'step': 7138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:23:34.260285', 'step': 7138, 'epoch': 1}
{'type': 'loss', 'content': 0.03480689600110054, 'timestamp': '2025-10-02 00:23:34.270780', 'step': 7139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:34.332033', 'step': 7139, 'epoch': 1}
{'type': 'loss', 'content': 0.03696452081203461, 'timestamp': '2025-10-02 00:23:34.340189', 'step': 7140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:34.397840', 'step': 7140, 'epoch': 1}
{'type': 'loss', 'content': 0.0241001695394516, 'timestamp': '2025-10-02 00:23:34.408096', 'step': 7141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:34.470012', 'step': 7141, 'epoch': 1}
{'type': 'loss', 'content': 0.20408153533935547, 'timestamp': '2025-10-02 00:23:34.480238', 'step': 7142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:34.546764', 'step': 7142, 'epoch': 1}
{'type': 'loss', 'content': 0.14593292772769928, 'timestamp': '2025-10-02 00:23:34.551232', 'step': 7143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:34.609559', 'step': 7143, 'epoch': 1}
{'type': 'loss', 'content': 0.07896050810813904, 'timestamp': '2025-10-02 00:23:34.619690', 'step': 7144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:34.675482', 'step': 7144, 'epoch': 1}
{'type': 'loss', 'content': 0.13566258549690247, 'timestamp': '2025-10-02 00:23:34.678576', 'step': 7145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:34.737176', 'step': 7145, 'epoch': 1}
{'type': 'loss', 'content': 0.013150161132216454, 'timestamp': '2025-10-02 00:23:34.746738', 'step': 7146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:34.804644', 'step': 7146, 'epoch': 1}
{'type': 'loss', 'content': 0.2071942389011383, 'timestamp': '2025-10-02 00:23:34.808094', 'step': 7147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:34.863601', 'step': 7147, 'epoch': 1}
{'type': 'loss', 'content': 0.06766833364963531, 'timestamp': '2025-10-02 00:23:34.871858', 'step': 7148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:34.929482', 'step': 7148, 'epoch': 1}
{'type': 'loss', 'content': 0.06096649542450905, 'timestamp': '2025-10-02 00:23:34.937054', 'step': 7149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:34.992619', 'step': 7149, 'epoch': 1}
{'type': 'loss', 'content': 0.0795569196343422, 'timestamp': '2025-10-02 00:23:34.995774', 'step': 7150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:35.051757', 'step': 7150, 'epoch': 1}
{'type': 'loss', 'content': 0.07862270623445511, 'timestamp': '2025-10-02 00:23:35.054620', 'step': 7151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:35.111407', 'step': 7151, 'epoch': 1}
{'type': 'loss', 'content': 0.058390289545059204, 'timestamp': '2025-10-02 00:23:35.118178', 'step': 7152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:35.173011', 'step': 7152, 'epoch': 1}
{'type': 'loss', 'content': 0.015491382218897343, 'timestamp': '2025-10-02 00:23:35.175481', 'step': 7153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:23:35.239562', 'step': 7153, 'epoch': 1}
{'type': 'loss', 'content': 0.07197674363851547, 'timestamp': '2025-10-02 00:23:35.250363', 'step': 7154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:35.310839', 'step': 7154, 'epoch': 1}
{'type': 'loss', 'content': 0.18039639294147491, 'timestamp': '2025-10-02 00:23:35.313409', 'step': 7155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:35.368885', 'step': 7155, 'epoch': 1}
{'type': 'loss', 'content': 0.04720768705010414, 'timestamp': '2025-10-02 00:23:35.377139', 'step': 7156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:35.432844', 'step': 7156, 'epoch': 1}
{'type': 'loss', 'content': 0.05136657878756523, 'timestamp': '2025-10-02 00:23:35.436332', 'step': 7157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:35.493654', 'step': 7157, 'epoch': 1}
{'type': 'loss', 'content': 0.0425347238779068, 'timestamp': '2025-10-02 00:23:35.496223', 'step': 7158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:35.553888', 'step': 7158, 'epoch': 1}
{'type': 'loss', 'content': 0.036713942885398865, 'timestamp': '2025-10-02 00:23:35.556369', 'step': 7159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:35.611893', 'step': 7159, 'epoch': 1}
{'type': 'loss', 'content': 0.03448254615068436, 'timestamp': '2025-10-02 00:23:35.619358', 'step': 7160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:35.685760', 'step': 7160, 'epoch': 1}
{'type': 'loss', 'content': 0.034182023257017136, 'timestamp': '2025-10-02 00:23:35.688252', 'step': 7161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:23:35.752409', 'step': 7161, 'epoch': 1}
{'type': 'loss', 'content': 0.03804837912321091, 'timestamp': '2025-10-02 00:23:35.763269', 'step': 7162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:35.823284', 'step': 7162, 'epoch': 1}
{'type': 'loss', 'content': 0.3112359046936035, 'timestamp': '2025-10-02 00:23:35.825862', 'step': 7163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:35.884959', 'step': 7163, 'epoch': 1}
{'type': 'loss', 'content': 0.14810305833816528, 'timestamp': '2025-10-02 00:23:35.895526', 'step': 7164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:35.950903', 'step': 7164, 'epoch': 1}
{'type': 'loss', 'content': 0.16901378333568573, 'timestamp': '2025-10-02 00:23:35.953505', 'step': 7165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:36.009645', 'step': 7165, 'epoch': 1}
{'type': 'loss', 'content': 0.06794722378253937, 'timestamp': '2025-10-02 00:23:36.016900', 'step': 7166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:36.074767', 'step': 7166, 'epoch': 1}
{'type': 'loss', 'content': 0.044224873185157776, 'timestamp': '2025-10-02 00:23:36.084005', 'step': 7167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:36.145156', 'step': 7167, 'epoch': 1}
{'type': 'loss', 'content': 0.07350058853626251, 'timestamp': '2025-10-02 00:23:36.151602', 'step': 7168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:36.209613', 'step': 7168, 'epoch': 1}
{'type': 'loss', 'content': 0.20522046089172363, 'timestamp': '2025-10-02 00:23:36.212674', 'step': 7169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:36.274070', 'step': 7169, 'epoch': 1}
{'type': 'loss', 'content': 0.05118235573172569, 'timestamp': '2025-10-02 00:23:36.281460', 'step': 7170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:36.338385', 'step': 7170, 'epoch': 1}
{'type': 'loss', 'content': 0.1390170007944107, 'timestamp': '2025-10-02 00:23:36.345734', 'step': 7171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:36.402849', 'step': 7171, 'epoch': 1}
{'type': 'loss', 'content': 0.07482092827558517, 'timestamp': '2025-10-02 00:23:36.409723', 'step': 7172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:36.465775', 'step': 7172, 'epoch': 1}
{'type': 'loss', 'content': 0.1878661960363388, 'timestamp': '2025-10-02 00:23:36.468553', 'step': 7173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:36.525734', 'step': 7173, 'epoch': 1}
{'type': 'loss', 'content': 0.05599907413125038, 'timestamp': '2025-10-02 00:23:36.532730', 'step': 7174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:36.592162', 'step': 7174, 'epoch': 1}
{'type': 'loss', 'content': 0.03674246370792389, 'timestamp': '2025-10-02 00:23:36.600556', 'step': 7175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:36.663023', 'step': 7175, 'epoch': 1}
{'type': 'loss', 'content': 0.07645532488822937, 'timestamp': '2025-10-02 00:23:36.670165', 'step': 7176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:36.726157', 'step': 7176, 'epoch': 1}
{'type': 'loss', 'content': 0.09583340585231781, 'timestamp': '2025-10-02 00:23:36.729121', 'step': 7177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:36.790361', 'step': 7177, 'epoch': 1}
{'type': 'loss', 'content': 0.09428893774747849, 'timestamp': '2025-10-02 00:23:36.794218', 'step': 7178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:36.850827', 'step': 7178, 'epoch': 1}
{'type': 'loss', 'content': 0.05344739928841591, 'timestamp': '2025-10-02 00:23:36.856746', 'step': 7179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:36.914513', 'step': 7179, 'epoch': 1}
{'type': 'loss', 'content': 0.04824334755539894, 'timestamp': '2025-10-02 00:23:36.921285', 'step': 7180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:36.982044', 'step': 7180, 'epoch': 1}
{'type': 'loss', 'content': 0.06557144969701767, 'timestamp': '2025-10-02 00:23:36.989560', 'step': 7181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:37.049577', 'step': 7181, 'epoch': 1}
{'type': 'loss', 'content': 0.04643583670258522, 'timestamp': '2025-10-02 00:23:37.052583', 'step': 7182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:37.113883', 'step': 7182, 'epoch': 1}
{'type': 'loss', 'content': 0.10023408383131027, 'timestamp': '2025-10-02 00:23:37.123225', 'step': 7183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:37.180228', 'step': 7183, 'epoch': 1}
{'type': 'loss', 'content': 0.04741346463561058, 'timestamp': '2025-10-02 00:23:37.186489', 'step': 7184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:37.255190', 'step': 7184, 'epoch': 1}
{'type': 'loss', 'content': 0.09482771903276443, 'timestamp': '2025-10-02 00:23:37.258592', 'step': 7185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:37.320900', 'step': 7185, 'epoch': 1}
{'type': 'loss', 'content': 0.020877361297607422, 'timestamp': '2025-10-02 00:23:37.330252', 'step': 7186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:23:37.393290', 'step': 7186, 'epoch': 1}
{'type': 'loss', 'content': 0.181693434715271, 'timestamp': '2025-10-02 00:23:37.397918', 'step': 7187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:37.461849', 'step': 7187, 'epoch': 1}
{'type': 'loss', 'content': 0.21575981378555298, 'timestamp': '2025-10-02 00:23:37.469583', 'step': 7188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:23:37.543704', 'step': 7188, 'epoch': 1}
{'type': 'loss', 'content': 0.03349905461072922, 'timestamp': '2025-10-02 00:23:37.557074', 'step': 7189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:37.614581', 'step': 7189, 'epoch': 1}
{'type': 'loss', 'content': 0.1453395038843155, 'timestamp': '2025-10-02 00:23:37.618700', 'step': 7190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:23:37.683163', 'step': 7190, 'epoch': 1}
{'type': 'loss', 'content': 0.025826536118984222, 'timestamp': '2025-10-02 00:23:37.693590', 'step': 7191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:37.753602', 'step': 7191, 'epoch': 1}
{'type': 'loss', 'content': 0.15542396903038025, 'timestamp': '2025-10-02 00:23:37.761304', 'step': 7192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:37.817827', 'step': 7192, 'epoch': 1}
{'type': 'loss', 'content': 0.0808953195810318, 'timestamp': '2025-10-02 00:23:37.820510', 'step': 7193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:37.876686', 'step': 7193, 'epoch': 1}
{'type': 'loss', 'content': 0.1372918039560318, 'timestamp': '2025-10-02 00:23:37.879554', 'step': 7194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:37.947954', 'step': 7194, 'epoch': 1}
{'type': 'loss', 'content': 0.04661606624722481, 'timestamp': '2025-10-02 00:23:37.957304', 'step': 7195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:38.049352', 'step': 7195, 'epoch': 1}
{'type': 'loss', 'content': 0.03794684633612633, 'timestamp': '2025-10-02 00:23:38.056285', 'step': 7196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:38.117062', 'step': 7196, 'epoch': 1}
{'type': 'loss', 'content': 0.04489704966545105, 'timestamp': '2025-10-02 00:23:38.123231', 'step': 7197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:38.195217', 'step': 7197, 'epoch': 1}
{'type': 'loss', 'content': 0.059661369770765305, 'timestamp': '2025-10-02 00:23:38.198454', 'step': 7198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:38.257228', 'step': 7198, 'epoch': 1}
{'type': 'loss', 'content': 0.04206673428416252, 'timestamp': '2025-10-02 00:23:38.262998', 'step': 7199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:38.322200', 'step': 7199, 'epoch': 1}
{'type': 'loss', 'content': 0.19057142734527588, 'timestamp': '2025-10-02 00:23:38.335932', 'step': 7200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:38.394734', 'step': 7200, 'epoch': 1}
{'type': 'loss', 'content': 0.17792759835720062, 'timestamp': '2025-10-02 00:23:38.398306', 'step': 7201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:38.461882', 'step': 7201, 'epoch': 1}
{'type': 'loss', 'content': 0.039136048406362534, 'timestamp': '2025-10-02 00:23:38.470662', 'step': 7202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:38.527942', 'step': 7202, 'epoch': 1}
{'type': 'loss', 'content': 0.14521268010139465, 'timestamp': '2025-10-02 00:23:38.531252', 'step': 7203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:38.589597', 'step': 7203, 'epoch': 1}
{'type': 'loss', 'content': 0.11619090288877487, 'timestamp': '2025-10-02 00:23:38.595864', 'step': 7204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:38.651148', 'step': 7204, 'epoch': 1}
{'type': 'loss', 'content': 0.1577080935239792, 'timestamp': '2025-10-02 00:23:38.653751', 'step': 7205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:38.711144', 'step': 7205, 'epoch': 1}
{'type': 'loss', 'content': 0.029274869710206985, 'timestamp': '2025-10-02 00:23:38.718609', 'step': 7206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:38.781227', 'step': 7206, 'epoch': 1}
{'type': 'loss', 'content': 0.15728849172592163, 'timestamp': '2025-10-02 00:23:38.783851', 'step': 7207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:38.840929', 'step': 7207, 'epoch': 1}
{'type': 'loss', 'content': 0.1496322900056839, 'timestamp': '2025-10-02 00:23:38.849149', 'step': 7208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:38.913440', 'step': 7208, 'epoch': 1}
{'type': 'loss', 'content': 0.06285898387432098, 'timestamp': '2025-10-02 00:23:38.916487', 'step': 7209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:38.974666', 'step': 7209, 'epoch': 1}
{'type': 'loss', 'content': 0.01214065682142973, 'timestamp': '2025-10-02 00:23:38.983979', 'step': 7210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:39.042520', 'step': 7210, 'epoch': 1}
{'type': 'loss', 'content': 0.02293027937412262, 'timestamp': '2025-10-02 00:23:39.054340', 'step': 7211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:39.111358', 'step': 7211, 'epoch': 1}
{'type': 'loss', 'content': 0.042627159506082535, 'timestamp': '2025-10-02 00:23:39.118102', 'step': 7212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:39.175062', 'step': 7212, 'epoch': 1}
{'type': 'loss', 'content': 0.2569974660873413, 'timestamp': '2025-10-02 00:23:39.178003', 'step': 7213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:39.246115', 'step': 7213, 'epoch': 1}
{'type': 'loss', 'content': 0.02504585310816765, 'timestamp': '2025-10-02 00:23:39.256254', 'step': 7214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:39.318588', 'step': 7214, 'epoch': 1}
{'type': 'loss', 'content': 0.03026372566819191, 'timestamp': '2025-10-02 00:23:39.325407', 'step': 7215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:39.386620', 'step': 7215, 'epoch': 1}
{'type': 'loss', 'content': 0.04065113142132759, 'timestamp': '2025-10-02 00:23:39.394809', 'step': 7216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:39.457649', 'step': 7216, 'epoch': 1}
{'type': 'loss', 'content': 0.06105424091219902, 'timestamp': '2025-10-02 00:23:39.465242', 'step': 7217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:39.544388', 'step': 7217, 'epoch': 1}
{'type': 'loss', 'content': 0.036461006850004196, 'timestamp': '2025-10-02 00:23:39.550408', 'step': 7218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:39.634082', 'step': 7218, 'epoch': 1}
{'type': 'loss', 'content': 0.052643220871686935, 'timestamp': '2025-10-02 00:23:39.637670', 'step': 7219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:39.698394', 'step': 7219, 'epoch': 1}
{'type': 'loss', 'content': 0.08062158524990082, 'timestamp': '2025-10-02 00:23:39.705102', 'step': 7220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:39.761922', 'step': 7220, 'epoch': 1}
{'type': 'loss', 'content': 0.12387732416391373, 'timestamp': '2025-10-02 00:23:39.765012', 'step': 7221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:39.823136', 'step': 7221, 'epoch': 1}
{'type': 'loss', 'content': 0.02263474650681019, 'timestamp': '2025-10-02 00:23:39.829093', 'step': 7222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:39.886810', 'step': 7222, 'epoch': 1}
{'type': 'loss', 'content': 0.10726005584001541, 'timestamp': '2025-10-02 00:23:39.889885', 'step': 7223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:39.946849', 'step': 7223, 'epoch': 1}
{'type': 'loss', 'content': 0.11729294061660767, 'timestamp': '2025-10-02 00:23:39.956898', 'step': 7224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:40.012624', 'step': 7224, 'epoch': 1}
{'type': 'loss', 'content': 0.07892690598964691, 'timestamp': '2025-10-02 00:23:40.019958', 'step': 7225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:23:40.092023', 'step': 7225, 'epoch': 1}
{'type': 'loss', 'content': 0.010178967379033566, 'timestamp': '2025-10-02 00:23:40.102907', 'step': 7226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:40.160528', 'step': 7226, 'epoch': 1}
{'type': 'loss', 'content': 0.05213586986064911, 'timestamp': '2025-10-02 00:23:40.163052', 'step': 7227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:40.221151', 'step': 7227, 'epoch': 1}
{'type': 'loss', 'content': 0.05453475937247276, 'timestamp': '2025-10-02 00:23:40.228270', 'step': 7228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:40.285356', 'step': 7228, 'epoch': 1}
{'type': 'loss', 'content': 0.04428340494632721, 'timestamp': '2025-10-02 00:23:40.288408', 'step': 7229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:40.349479', 'step': 7229, 'epoch': 1}
{'type': 'loss', 'content': 0.026531441137194633, 'timestamp': '2025-10-02 00:23:40.359643', 'step': 7230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:40.420225', 'step': 7230, 'epoch': 1}
{'type': 'loss', 'content': 0.02736230567097664, 'timestamp': '2025-10-02 00:23:40.423116', 'step': 7231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:40.480074', 'step': 7231, 'epoch': 1}
{'type': 'loss', 'content': 0.08052688091993332, 'timestamp': '2025-10-02 00:23:40.486532', 'step': 7232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:23:40.544163', 'step': 7232, 'epoch': 1}
{'type': 'loss', 'content': 0.07108491659164429, 'timestamp': '2025-10-02 00:23:40.547325', 'step': 7233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:40.606894', 'step': 7233, 'epoch': 1}
{'type': 'loss', 'content': 0.021883362904191017, 'timestamp': '2025-10-02 00:23:40.616462', 'step': 7234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:40.673205', 'step': 7234, 'epoch': 1}
{'type': 'loss', 'content': 0.04963499307632446, 'timestamp': '2025-10-02 00:23:40.675841', 'step': 7235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:40.733412', 'step': 7235, 'epoch': 1}
{'type': 'loss', 'content': 0.07668692618608475, 'timestamp': '2025-10-02 00:23:40.740963', 'step': 7236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:40.799452', 'step': 7236, 'epoch': 1}
{'type': 'loss', 'content': 0.05493227764964104, 'timestamp': '2025-10-02 00:23:40.802418', 'step': 7237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:40.865359', 'step': 7237, 'epoch': 1}
{'type': 'loss', 'content': 0.1344524323940277, 'timestamp': '2025-10-02 00:23:40.868605', 'step': 7238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:40.944440', 'step': 7238, 'epoch': 1}
{'type': 'loss', 'content': 0.14465537667274475, 'timestamp': '2025-10-02 00:23:40.947852', 'step': 7239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:41.003316', 'step': 7239, 'epoch': 1}
{'type': 'loss', 'content': 0.15567778050899506, 'timestamp': '2025-10-02 00:23:41.009624', 'step': 7240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:41.070252', 'step': 7240, 'epoch': 1}
{'type': 'loss', 'content': 0.07636944949626923, 'timestamp': '2025-10-02 00:23:41.076108', 'step': 7241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:41.133476', 'step': 7241, 'epoch': 1}
{'type': 'loss', 'content': 0.03849002718925476, 'timestamp': '2025-10-02 00:23:41.141720', 'step': 7242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:41.213166', 'step': 7242, 'epoch': 1}
{'type': 'loss', 'content': 0.09859591722488403, 'timestamp': '2025-10-02 00:23:41.217780', 'step': 7243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:41.275640', 'step': 7243, 'epoch': 1}
{'type': 'loss', 'content': 0.05741162225604057, 'timestamp': '2025-10-02 00:23:41.283150', 'step': 7244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:41.339075', 'step': 7244, 'epoch': 1}
{'type': 'loss', 'content': 0.07324089854955673, 'timestamp': '2025-10-02 00:23:41.342449', 'step': 7245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:41.404648', 'step': 7245, 'epoch': 1}
{'type': 'loss', 'content': 0.0378524586558342, 'timestamp': '2025-10-02 00:23:41.414867', 'step': 7246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:41.472848', 'step': 7246, 'epoch': 1}
{'type': 'loss', 'content': 0.2188260555267334, 'timestamp': '2025-10-02 00:23:41.475049', 'step': 7247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:41.538264', 'step': 7247, 'epoch': 1}
{'type': 'loss', 'content': 0.20547328889369965, 'timestamp': '2025-10-02 00:23:41.545017', 'step': 7248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:41.600090', 'step': 7248, 'epoch': 1}
{'type': 'loss', 'content': 0.1891886293888092, 'timestamp': '2025-10-02 00:23:41.602996', 'step': 7249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:41.664235', 'step': 7249, 'epoch': 1}
{'type': 'loss', 'content': 0.022466624155640602, 'timestamp': '2025-10-02 00:23:41.674432', 'step': 7250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:41.738723', 'step': 7250, 'epoch': 1}
{'type': 'loss', 'content': 0.01624463126063347, 'timestamp': '2025-10-02 00:23:41.749397', 'step': 7251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:41.815454', 'step': 7251, 'epoch': 1}
{'type': 'loss', 'content': 0.05041741579771042, 'timestamp': '2025-10-02 00:23:41.821502', 'step': 7252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:41.879978', 'step': 7252, 'epoch': 1}
{'type': 'loss', 'content': 0.14734743535518646, 'timestamp': '2025-10-02 00:23:41.882381', 'step': 7253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:41.939146', 'step': 7253, 'epoch': 1}
{'type': 'loss', 'content': 0.2097596824169159, 'timestamp': '2025-10-02 00:23:41.942343', 'step': 7254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:42.005078', 'step': 7254, 'epoch': 1}
{'type': 'loss', 'content': 0.04229655861854553, 'timestamp': '2025-10-02 00:23:42.014631', 'step': 7255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:42.074063', 'step': 7255, 'epoch': 1}
{'type': 'loss', 'content': 0.032294344156980515, 'timestamp': '2025-10-02 00:23:42.084406', 'step': 7256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:42.143819', 'step': 7256, 'epoch': 1}
{'type': 'loss', 'content': 0.04336981847882271, 'timestamp': '2025-10-02 00:23:42.151413', 'step': 7257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:42.208297', 'step': 7257, 'epoch': 1}
{'type': 'loss', 'content': 0.1396828293800354, 'timestamp': '2025-10-02 00:23:42.211082', 'step': 7258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:42.271231', 'step': 7258, 'epoch': 1}
{'type': 'loss', 'content': 0.05597924813628197, 'timestamp': '2025-10-02 00:23:42.274644', 'step': 7259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:42.340746', 'step': 7259, 'epoch': 1}
{'type': 'loss', 'content': 0.0845753625035286, 'timestamp': '2025-10-02 00:23:42.352181', 'step': 7260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:42.414292', 'step': 7260, 'epoch': 1}
{'type': 'loss', 'content': 0.1841977834701538, 'timestamp': '2025-10-02 00:23:42.416892', 'step': 7261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:42.473434', 'step': 7261, 'epoch': 1}
{'type': 'loss', 'content': 0.14454089105129242, 'timestamp': '2025-10-02 00:23:42.476922', 'step': 7262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:42.540437', 'step': 7262, 'epoch': 1}
{'type': 'loss', 'content': 0.117290198802948, 'timestamp': '2025-10-02 00:23:42.544067', 'step': 7263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:42.600024', 'step': 7263, 'epoch': 1}
{'type': 'loss', 'content': 0.14652419090270996, 'timestamp': '2025-10-02 00:23:42.607259', 'step': 7264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:42.663138', 'step': 7264, 'epoch': 1}
{'type': 'loss', 'content': 0.11483067274093628, 'timestamp': '2025-10-02 00:23:42.666736', 'step': 7265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:42.738798', 'step': 7265, 'epoch': 1}
{'type': 'loss', 'content': 0.027695614844560623, 'timestamp': '2025-10-02 00:23:42.749471', 'step': 7266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:42.807075', 'step': 7266, 'epoch': 1}
{'type': 'loss', 'content': 0.1543683260679245, 'timestamp': '2025-10-02 00:23:42.810327', 'step': 7267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:42.873714', 'step': 7267, 'epoch': 1}
{'type': 'loss', 'content': 0.1226956769824028, 'timestamp': '2025-10-02 00:23:42.880959', 'step': 7268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:42.937916', 'step': 7268, 'epoch': 1}
{'type': 'loss', 'content': 0.13849277794361115, 'timestamp': '2025-10-02 00:23:42.940662', 'step': 7269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:43.010557', 'step': 7269, 'epoch': 1}
{'type': 'loss', 'content': 0.10036720335483551, 'timestamp': '2025-10-02 00:23:43.013803', 'step': 7270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:43.075699', 'step': 7270, 'epoch': 1}
{'type': 'loss', 'content': 0.05143008381128311, 'timestamp': '2025-10-02 00:23:43.081122', 'step': 7271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:43.147115', 'step': 7271, 'epoch': 1}
{'type': 'loss', 'content': 0.1470010131597519, 'timestamp': '2025-10-02 00:23:43.157514', 'step': 7272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:43.226821', 'step': 7272, 'epoch': 1}
{'type': 'loss', 'content': 0.22814065217971802, 'timestamp': '2025-10-02 00:23:43.230822', 'step': 7273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:43.293641', 'step': 7273, 'epoch': 1}
{'type': 'loss', 'content': 0.02509474754333496, 'timestamp': '2025-10-02 00:23:43.303173', 'step': 7274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:43.360359', 'step': 7274, 'epoch': 1}
{'type': 'loss', 'content': 0.04397759586572647, 'timestamp': '2025-10-02 00:23:43.369434', 'step': 7275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:43.425167', 'step': 7275, 'epoch': 1}
{'type': 'loss', 'content': 0.14318804442882538, 'timestamp': '2025-10-02 00:23:43.432368', 'step': 7276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:23:43.499386', 'step': 7276, 'epoch': 1}
{'type': 'loss', 'content': 0.03870588541030884, 'timestamp': '2025-10-02 00:23:43.511150', 'step': 7277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:23:43.575503', 'step': 7277, 'epoch': 1}
{'type': 'loss', 'content': 0.10236421972513199, 'timestamp': '2025-10-02 00:23:43.586002', 'step': 7278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:43.645995', 'step': 7278, 'epoch': 1}
{'type': 'loss', 'content': 0.16539840400218964, 'timestamp': '2025-10-02 00:23:43.648933', 'step': 7279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:43.706491', 'step': 7279, 'epoch': 1}
{'type': 'loss', 'content': 0.1169559434056282, 'timestamp': '2025-10-02 00:23:43.713029', 'step': 7280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:43.770017', 'step': 7280, 'epoch': 1}
{'type': 'loss', 'content': 0.1376693993806839, 'timestamp': '2025-10-02 00:23:43.773621', 'step': 7281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:43.831074', 'step': 7281, 'epoch': 1}
{'type': 'loss', 'content': 0.05625653266906738, 'timestamp': '2025-10-02 00:23:43.834105', 'step': 7282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:43.900440', 'step': 7282, 'epoch': 1}
{'type': 'loss', 'content': 0.10577382147312164, 'timestamp': '2025-10-02 00:23:43.902800', 'step': 7283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:43.959516', 'step': 7283, 'epoch': 1}
{'type': 'loss', 'content': 0.09526241570711136, 'timestamp': '2025-10-02 00:23:43.966424', 'step': 7284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:44.028109', 'step': 7284, 'epoch': 1}
{'type': 'loss', 'content': 0.05366133525967598, 'timestamp': '2025-10-02 00:23:44.030987', 'step': 7285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:44.091794', 'step': 7285, 'epoch': 1}
{'type': 'loss', 'content': 0.09886915981769562, 'timestamp': '2025-10-02 00:23:44.094948', 'step': 7286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:44.156777', 'step': 7286, 'epoch': 1}
{'type': 'loss', 'content': 0.06734903901815414, 'timestamp': '2025-10-02 00:23:44.166123', 'step': 7287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:44.237715', 'step': 7287, 'epoch': 1}
{'type': 'loss', 'content': 0.05635960400104523, 'timestamp': '2025-10-02 00:23:44.249092', 'step': 7288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:23:44.315184', 'step': 7288, 'epoch': 1}
{'type': 'loss', 'content': 0.0418110266327858, 'timestamp': '2025-10-02 00:23:44.326490', 'step': 7289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:44.383754', 'step': 7289, 'epoch': 1}
{'type': 'loss', 'content': 0.11045139282941818, 'timestamp': '2025-10-02 00:23:44.387332', 'step': 7290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:44.452071', 'step': 7290, 'epoch': 1}
{'type': 'loss', 'content': 0.02311580628156662, 'timestamp': '2025-10-02 00:23:44.455799', 'step': 7291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:44.511959', 'step': 7291, 'epoch': 1}
{'type': 'loss', 'content': 0.053745336830616, 'timestamp': '2025-10-02 00:23:44.520131', 'step': 7292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:44.575176', 'step': 7292, 'epoch': 1}
{'type': 'loss', 'content': 0.11497075110673904, 'timestamp': '2025-10-02 00:23:44.578075', 'step': 7293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:44.634796', 'step': 7293, 'epoch': 1}
{'type': 'loss', 'content': 0.05543263629078865, 'timestamp': '2025-10-02 00:23:44.644279', 'step': 7294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:44.708486', 'step': 7294, 'epoch': 1}
{'type': 'loss', 'content': 0.055605195462703705, 'timestamp': '2025-10-02 00:23:44.718042', 'step': 7295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:23:44.797028', 'step': 7295, 'epoch': 1}
{'type': 'loss', 'content': 0.025101888924837112, 'timestamp': '2025-10-02 00:23:44.809809', 'step': 7296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:44.865473', 'step': 7296, 'epoch': 1}
{'type': 'loss', 'content': 0.038597479462623596, 'timestamp': '2025-10-02 00:23:44.867938', 'step': 7297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:44.922627', 'step': 7297, 'epoch': 1}
{'type': 'loss', 'content': 0.052600689232349396, 'timestamp': '2025-10-02 00:23:44.925048', 'step': 7298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:44.979151', 'step': 7298, 'epoch': 1}
{'type': 'loss', 'content': 0.060145068913698196, 'timestamp': '2025-10-02 00:23:44.981531', 'step': 7299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:45.035826', 'step': 7299, 'epoch': 1}
{'type': 'loss', 'content': 0.04989944025874138, 'timestamp': '2025-10-02 00:23:45.041932', 'step': 7300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:45.096965', 'step': 7300, 'epoch': 1}
{'type': 'loss', 'content': 0.09040877968072891, 'timestamp': '2025-10-02 00:23:45.099508', 'step': 7301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:45.154861', 'step': 7301, 'epoch': 1}
{'type': 'loss', 'content': 0.06618878245353699, 'timestamp': '2025-10-02 00:23:45.157476', 'step': 7302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:45.211781', 'step': 7302, 'epoch': 1}
{'type': 'loss', 'content': 0.05547476187348366, 'timestamp': '2025-10-02 00:23:45.217634', 'step': 7303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:23:45.272010', 'step': 7303, 'epoch': 1}
{'type': 'loss', 'content': 0.1285538226366043, 'timestamp': '2025-10-02 00:23:45.277955', 'step': 7304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:45.336001', 'step': 7304, 'epoch': 1}
{'type': 'loss', 'content': 0.044085994362831116, 'timestamp': '2025-10-02 00:23:45.347009', 'step': 7305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:45.400921', 'step': 7305, 'epoch': 1}
{'type': 'loss', 'content': 0.1475263237953186, 'timestamp': '2025-10-02 00:23:45.406780', 'step': 7306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:45.461747', 'step': 7306, 'epoch': 1}
{'type': 'loss', 'content': 0.2509199380874634, 'timestamp': '2025-10-02 00:23:45.463890', 'step': 7307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:45.519313', 'step': 7307, 'epoch': 1}
{'type': 'loss', 'content': 0.09871344268321991, 'timestamp': '2025-10-02 00:23:45.525578', 'step': 7308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:45.579629', 'step': 7308, 'epoch': 1}
{'type': 'loss', 'content': 0.060590483248233795, 'timestamp': '2025-10-02 00:23:45.585478', 'step': 7309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:45.642216', 'step': 7309, 'epoch': 1}
{'type': 'loss', 'content': 0.02034694328904152, 'timestamp': '2025-10-02 00:23:45.649734', 'step': 7310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:45.704373', 'step': 7310, 'epoch': 1}
{'type': 'loss', 'content': 0.06785660982131958, 'timestamp': '2025-10-02 00:23:45.707505', 'step': 7311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:45.763684', 'step': 7311, 'epoch': 1}
{'type': 'loss', 'content': 0.055796101689338684, 'timestamp': '2025-10-02 00:23:45.771917', 'step': 7312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:45.826843', 'step': 7312, 'epoch': 1}
{'type': 'loss', 'content': 0.04749646782875061, 'timestamp': '2025-10-02 00:23:45.829388', 'step': 7313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:45.883480', 'step': 7313, 'epoch': 1}
{'type': 'loss', 'content': 0.19533899426460266, 'timestamp': '2025-10-02 00:23:45.885826', 'step': 7314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:45.942673', 'step': 7314, 'epoch': 1}
{'type': 'loss', 'content': 0.08194888383150101, 'timestamp': '2025-10-02 00:23:45.945540', 'step': 7315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:46.000593', 'step': 7315, 'epoch': 1}
{'type': 'loss', 'content': 0.03264647722244263, 'timestamp': '2025-10-02 00:23:46.007212', 'step': 7316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:46.061201', 'step': 7316, 'epoch': 1}
{'type': 'loss', 'content': 0.04030020162463188, 'timestamp': '2025-10-02 00:23:46.063456', 'step': 7317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:46.117820', 'step': 7317, 'epoch': 1}
{'type': 'loss', 'content': 0.1568419486284256, 'timestamp': '2025-10-02 00:23:46.120399', 'step': 7318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:46.174807', 'step': 7318, 'epoch': 1}
{'type': 'loss', 'content': 0.08059604465961456, 'timestamp': '2025-10-02 00:23:46.177978', 'step': 7319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:46.232045', 'step': 7319, 'epoch': 1}
{'type': 'loss', 'content': 0.20741307735443115, 'timestamp': '2025-10-02 00:23:46.238086', 'step': 7320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:46.292458', 'step': 7320, 'epoch': 1}
{'type': 'loss', 'content': 0.04334736615419388, 'timestamp': '2025-10-02 00:23:46.295324', 'step': 7321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:23:46.362472', 'step': 7321, 'epoch': 1}
{'type': 'loss', 'content': 0.07417543977499008, 'timestamp': '2025-10-02 00:23:46.374473', 'step': 7322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:46.428832', 'step': 7322, 'epoch': 1}
{'type': 'loss', 'content': 0.10914105921983719, 'timestamp': '2025-10-02 00:23:46.438257', 'step': 7323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:46.494094', 'step': 7323, 'epoch': 1}
{'type': 'loss', 'content': 0.07459335029125214, 'timestamp': '2025-10-02 00:23:46.504463', 'step': 7324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:23:46.564422', 'step': 7324, 'epoch': 1}
{'type': 'loss', 'content': 0.047613706439733505, 'timestamp': '2025-10-02 00:23:46.575800', 'step': 7325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:46.632334', 'step': 7325, 'epoch': 1}
{'type': 'loss', 'content': 0.17377398908138275, 'timestamp': '2025-10-02 00:23:46.634831', 'step': 7326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:46.689610', 'step': 7326, 'epoch': 1}
{'type': 'loss', 'content': 0.10427221655845642, 'timestamp': '2025-10-02 00:23:46.691781', 'step': 7327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:46.746000', 'step': 7327, 'epoch': 1}
{'type': 'loss', 'content': 0.07915262877941132, 'timestamp': '2025-10-02 00:23:46.752215', 'step': 7328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:23:46.805203', 'step': 7328, 'epoch': 1}
{'type': 'loss', 'content': 0.1379767805337906, 'timestamp': '2025-10-02 00:23:46.808373', 'step': 7329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:46.862727', 'step': 7329, 'epoch': 1}
{'type': 'loss', 'content': 0.158524289727211, 'timestamp': '2025-10-02 00:23:46.865789', 'step': 7330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:46.922037', 'step': 7330, 'epoch': 1}
{'type': 'loss', 'content': 0.04405038803815842, 'timestamp': '2025-10-02 00:23:46.931355', 'step': 7331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:46.986094', 'step': 7331, 'epoch': 1}
{'type': 'loss', 'content': 0.11215110123157501, 'timestamp': '2025-10-02 00:23:46.994181', 'step': 7332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:47.048213', 'step': 7332, 'epoch': 1}
{'type': 'loss', 'content': 0.1223897710442543, 'timestamp': '2025-10-02 00:23:47.051293', 'step': 7333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:47.105311', 'step': 7333, 'epoch': 1}
{'type': 'loss', 'content': 0.027305249124765396, 'timestamp': '2025-10-02 00:23:47.107705', 'step': 7334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:47.162965', 'step': 7334, 'epoch': 1}
{'type': 'loss', 'content': 0.1399935781955719, 'timestamp': '2025-10-02 00:23:47.165807', 'step': 7335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:47.221641', 'step': 7335, 'epoch': 1}
{'type': 'loss', 'content': 0.09291676431894302, 'timestamp': '2025-10-02 00:23:47.227616', 'step': 7336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:47.288344', 'step': 7336, 'epoch': 1}
{'type': 'loss', 'content': 0.10247749090194702, 'timestamp': '2025-10-02 00:23:47.299872', 'step': 7337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:23:47.377077', 'step': 7337, 'epoch': 1}
{'type': 'loss', 'content': 0.03122045285999775, 'timestamp': '2025-10-02 00:23:47.390736', 'step': 7338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:47.445690', 'step': 7338, 'epoch': 1}
{'type': 'loss', 'content': 0.03796596825122833, 'timestamp': '2025-10-02 00:23:47.448413', 'step': 7339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:47.503199', 'step': 7339, 'epoch': 1}
{'type': 'loss', 'content': 0.03782768175005913, 'timestamp': '2025-10-02 00:23:47.509053', 'step': 7340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:47.563857', 'step': 7340, 'epoch': 1}
{'type': 'loss', 'content': 0.1784411072731018, 'timestamp': '2025-10-02 00:23:47.565909', 'step': 7341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:47.628282', 'step': 7341, 'epoch': 1}
{'type': 'loss', 'content': 0.023841343820095062, 'timestamp': '2025-10-02 00:23:47.639000', 'step': 7342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:47.697295', 'step': 7342, 'epoch': 1}
{'type': 'loss', 'content': 0.040556129068136215, 'timestamp': '2025-10-02 00:23:47.700136', 'step': 7343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:47.755742', 'step': 7343, 'epoch': 1}
{'type': 'loss', 'content': 0.14670614898204803, 'timestamp': '2025-10-02 00:23:47.762532', 'step': 7344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:47.819624', 'step': 7344, 'epoch': 1}
{'type': 'loss', 'content': 0.08541415631771088, 'timestamp': '2025-10-02 00:23:47.822123', 'step': 7345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:47.876706', 'step': 7345, 'epoch': 1}
{'type': 'loss', 'content': 0.1523280143737793, 'timestamp': '2025-10-02 00:23:47.879078', 'step': 7346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:47.933788', 'step': 7346, 'epoch': 1}
{'type': 'loss', 'content': 0.041760627180337906, 'timestamp': '2025-10-02 00:23:47.939578', 'step': 7347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:47.993877', 'step': 7347, 'epoch': 1}
{'type': 'loss', 'content': 0.04926614835858345, 'timestamp': '2025-10-02 00:23:47.999740', 'step': 7348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:48.053747', 'step': 7348, 'epoch': 1}
{'type': 'loss', 'content': 0.12041237950325012, 'timestamp': '2025-10-02 00:23:48.056003', 'step': 7349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:48.114927', 'step': 7349, 'epoch': 1}
{'type': 'loss', 'content': 0.039453983306884766, 'timestamp': '2025-10-02 00:23:48.125150', 'step': 7350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:23:48.194268', 'step': 7350, 'epoch': 1}
{'type': 'loss', 'content': 0.05297994986176491, 'timestamp': '2025-10-02 00:23:48.206547', 'step': 7351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:48.260743', 'step': 7351, 'epoch': 1}
{'type': 'loss', 'content': 0.12344600260257721, 'timestamp': '2025-10-02 00:23:48.266803', 'step': 7352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:48.333637', 'step': 7352, 'epoch': 1}
{'type': 'loss', 'content': 0.07580319792032242, 'timestamp': '2025-10-02 00:23:48.335936', 'step': 7353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:48.391366', 'step': 7353, 'epoch': 1}
{'type': 'loss', 'content': 0.018032336607575417, 'timestamp': '2025-10-02 00:23:48.393878', 'step': 7354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:48.448821', 'step': 7354, 'epoch': 1}
{'type': 'loss', 'content': 0.07289402931928635, 'timestamp': '2025-10-02 00:23:48.451425', 'step': 7355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:23:48.506852', 'step': 7355, 'epoch': 1}
{'type': 'loss', 'content': 0.14853085577487946, 'timestamp': '2025-10-02 00:23:48.513523', 'step': 7356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:48.570635', 'step': 7356, 'epoch': 1}
{'type': 'loss', 'content': 0.04921441152691841, 'timestamp': '2025-10-02 00:23:48.573350', 'step': 7357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:48.630266', 'step': 7357, 'epoch': 1}
{'type': 'loss', 'content': 0.17610420286655426, 'timestamp': '2025-10-02 00:23:48.633157', 'step': 7358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:48.688379', 'step': 7358, 'epoch': 1}
{'type': 'loss', 'content': 0.030258942395448685, 'timestamp': '2025-10-02 00:23:48.693945', 'step': 7359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:48.751042', 'step': 7359, 'epoch': 1}
{'type': 'loss', 'content': 0.05993608757853508, 'timestamp': '2025-10-02 00:23:48.761359', 'step': 7360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:48.816987', 'step': 7360, 'epoch': 1}
{'type': 'loss', 'content': 0.053677670657634735, 'timestamp': '2025-10-02 00:23:48.819470', 'step': 7361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:48.882704', 'step': 7361, 'epoch': 1}
{'type': 'loss', 'content': 0.055286794900894165, 'timestamp': '2025-10-02 00:23:48.893381', 'step': 7362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:48.948979', 'step': 7362, 'epoch': 1}
{'type': 'loss', 'content': 0.1237095296382904, 'timestamp': '2025-10-02 00:23:48.951943', 'step': 7363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:49.006649', 'step': 7363, 'epoch': 1}
{'type': 'loss', 'content': 0.14698387682437897, 'timestamp': '2025-10-02 00:23:49.012621', 'step': 7364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:49.071112', 'step': 7364, 'epoch': 1}
{'type': 'loss', 'content': 0.030545519664883614, 'timestamp': '2025-10-02 00:23:49.082071', 'step': 7365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:49.137625', 'step': 7365, 'epoch': 1}
{'type': 'loss', 'content': 0.06430632621049881, 'timestamp': '2025-10-02 00:23:49.140548', 'step': 7366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:49.200193', 'step': 7366, 'epoch': 1}
{'type': 'loss', 'content': 0.03639419376850128, 'timestamp': '2025-10-02 00:23:49.210357', 'step': 7367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:23:49.273893', 'step': 7367, 'epoch': 1}
{'type': 'loss', 'content': 0.04227470979094505, 'timestamp': '2025-10-02 00:23:49.285063', 'step': 7368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:49.339708', 'step': 7368, 'epoch': 1}
{'type': 'loss', 'content': 0.047302745282649994, 'timestamp': '2025-10-02 00:23:49.348865', 'step': 7369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:49.405123', 'step': 7369, 'epoch': 1}
{'type': 'loss', 'content': 0.059693142771720886, 'timestamp': '2025-10-02 00:23:49.407493', 'step': 7370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:49.462781', 'step': 7370, 'epoch': 1}
{'type': 'loss', 'content': 0.0402991883456707, 'timestamp': '2025-10-02 00:23:49.465115', 'step': 7371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:49.519550', 'step': 7371, 'epoch': 1}
{'type': 'loss', 'content': 0.1068257987499237, 'timestamp': '2025-10-02 00:23:49.525645', 'step': 7372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:49.586646', 'step': 7372, 'epoch': 1}
{'type': 'loss', 'content': 0.0329873152077198, 'timestamp': '2025-10-02 00:23:49.593993', 'step': 7373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:49.650085', 'step': 7373, 'epoch': 1}
{'type': 'loss', 'content': 0.05073818936944008, 'timestamp': '2025-10-02 00:23:49.659383', 'step': 7374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:49.714192', 'step': 7374, 'epoch': 1}
{'type': 'loss', 'content': 0.059895794838666916, 'timestamp': '2025-10-02 00:23:49.723506', 'step': 7375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:49.784864', 'step': 7375, 'epoch': 1}
{'type': 'loss', 'content': 0.024388574063777924, 'timestamp': '2025-10-02 00:23:49.795895', 'step': 7376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:49.852850', 'step': 7376, 'epoch': 1}
{'type': 'loss', 'content': 0.1293933391571045, 'timestamp': '2025-10-02 00:23:49.863154', 'step': 7377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:49.918639', 'step': 7377, 'epoch': 1}
{'type': 'loss', 'content': 0.2146454155445099, 'timestamp': '2025-10-02 00:23:49.920702', 'step': 7378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:23:49.983267', 'step': 7378, 'epoch': 1}
{'type': 'loss', 'content': 0.02718966081738472, 'timestamp': '2025-10-02 00:23:49.993731', 'step': 7379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:50.050984', 'step': 7379, 'epoch': 1}
{'type': 'loss', 'content': 0.11115357279777527, 'timestamp': '2025-10-02 00:23:50.057403', 'step': 7380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:50.114407', 'step': 7380, 'epoch': 1}
{'type': 'loss', 'content': 0.05107209086418152, 'timestamp': '2025-10-02 00:23:50.117371', 'step': 7381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:50.173717', 'step': 7381, 'epoch': 1}
{'type': 'loss', 'content': 0.03636683523654938, 'timestamp': '2025-10-02 00:23:50.177113', 'step': 7382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:50.233146', 'step': 7382, 'epoch': 1}
{'type': 'loss', 'content': 0.047351572662591934, 'timestamp': '2025-10-02 00:23:50.236829', 'step': 7383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:50.295834', 'step': 7383, 'epoch': 1}
{'type': 'loss', 'content': 0.06728260964155197, 'timestamp': '2025-10-02 00:23:50.302540', 'step': 7384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:50.356066', 'step': 7384, 'epoch': 1}
{'type': 'loss', 'content': 0.21600471436977386, 'timestamp': '2025-10-02 00:23:50.358520', 'step': 7385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:50.413135', 'step': 7385, 'epoch': 1}
{'type': 'loss', 'content': 0.060438208281993866, 'timestamp': '2025-10-02 00:23:50.420703', 'step': 7386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:50.480400', 'step': 7386, 'epoch': 1}
{'type': 'loss', 'content': 0.0501311793923378, 'timestamp': '2025-10-02 00:23:50.489753', 'step': 7387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:50.547824', 'step': 7387, 'epoch': 1}
{'type': 'loss', 'content': 0.09092243760824203, 'timestamp': '2025-10-02 00:23:50.555039', 'step': 7388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:23:50.618961', 'step': 7388, 'epoch': 1}
{'type': 'loss', 'content': 0.21100512146949768, 'timestamp': '2025-10-02 00:23:50.630292', 'step': 7389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:50.687105', 'step': 7389, 'epoch': 1}
{'type': 'loss', 'content': 0.1066344678401947, 'timestamp': '2025-10-02 00:23:50.689481', 'step': 7390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:50.751363', 'step': 7390, 'epoch': 1}
{'type': 'loss', 'content': 0.059457048773765564, 'timestamp': '2025-10-02 00:23:50.761588', 'step': 7391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:50.817805', 'step': 7391, 'epoch': 1}
{'type': 'loss', 'content': 0.08928487449884415, 'timestamp': '2025-10-02 00:23:50.824895', 'step': 7392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:50.880124', 'step': 7392, 'epoch': 1}
{'type': 'loss', 'content': 0.06171527877449989, 'timestamp': '2025-10-02 00:23:50.887719', 'step': 7393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:50.946023', 'step': 7393, 'epoch': 1}
{'type': 'loss', 'content': 0.0817476212978363, 'timestamp': '2025-10-02 00:23:50.949120', 'step': 7394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:51.006299', 'step': 7394, 'epoch': 1}
{'type': 'loss', 'content': 0.07626161724328995, 'timestamp': '2025-10-02 00:23:51.008777', 'step': 7395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:51.065886', 'step': 7395, 'epoch': 1}
{'type': 'loss', 'content': 0.09872069954872131, 'timestamp': '2025-10-02 00:23:51.072063', 'step': 7396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:51.127911', 'step': 7396, 'epoch': 1}
{'type': 'loss', 'content': 0.13768370449543, 'timestamp': '2025-10-02 00:23:51.130742', 'step': 7397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:51.188448', 'step': 7397, 'epoch': 1}
{'type': 'loss', 'content': 0.19944395124912262, 'timestamp': '2025-10-02 00:23:51.191522', 'step': 7398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:51.251137', 'step': 7398, 'epoch': 1}
{'type': 'loss', 'content': 0.1853579729795456, 'timestamp': '2025-10-02 00:23:51.254196', 'step': 7399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:51.310047', 'step': 7399, 'epoch': 1}
{'type': 'loss', 'content': 0.04405071213841438, 'timestamp': '2025-10-02 00:23:51.320166', 'step': 7400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:51.376224', 'step': 7400, 'epoch': 1}
{'type': 'loss', 'content': 0.12457095831632614, 'timestamp': '2025-10-02 00:23:51.379532', 'step': 7401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:51.445261', 'step': 7401, 'epoch': 1}
{'type': 'loss', 'content': 0.05261537805199623, 'timestamp': '2025-10-02 00:23:51.455913', 'step': 7402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:51.511790', 'step': 7402, 'epoch': 1}
{'type': 'loss', 'content': 0.07082948088645935, 'timestamp': '2025-10-02 00:23:51.514478', 'step': 7403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:51.568848', 'step': 7403, 'epoch': 1}
{'type': 'loss', 'content': 0.04137614741921425, 'timestamp': '2025-10-02 00:23:51.574974', 'step': 7404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:51.628259', 'step': 7404, 'epoch': 1}
{'type': 'loss', 'content': 0.2117389738559723, 'timestamp': '2025-10-02 00:23:51.630586', 'step': 7405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:51.686059', 'step': 7405, 'epoch': 1}
{'type': 'loss', 'content': 0.05966603383421898, 'timestamp': '2025-10-02 00:23:51.688605', 'step': 7406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:51.743324', 'step': 7406, 'epoch': 1}
{'type': 'loss', 'content': 0.15257541835308075, 'timestamp': '2025-10-02 00:23:51.745422', 'step': 7407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:51.799628', 'step': 7407, 'epoch': 1}
{'type': 'loss', 'content': 0.06952914595603943, 'timestamp': '2025-10-02 00:23:51.805479', 'step': 7408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:51.859278', 'step': 7408, 'epoch': 1}
{'type': 'loss', 'content': 0.10209040343761444, 'timestamp': '2025-10-02 00:23:51.866661', 'step': 7409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:51.920701', 'step': 7409, 'epoch': 1}
{'type': 'loss', 'content': 0.08525142818689346, 'timestamp': '2025-10-02 00:23:51.922861', 'step': 7410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:51.982556', 'step': 7410, 'epoch': 1}
{'type': 'loss', 'content': 0.01436428353190422, 'timestamp': '2025-10-02 00:23:51.992734', 'step': 7411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:52.047806', 'step': 7411, 'epoch': 1}
{'type': 'loss', 'content': 0.031704992055892944, 'timestamp': '2025-10-02 00:23:52.053577', 'step': 7412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:52.107043', 'step': 7412, 'epoch': 1}
{'type': 'loss', 'content': 0.06447018682956696, 'timestamp': '2025-10-02 00:23:52.109476', 'step': 7413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:52.164890', 'step': 7413, 'epoch': 1}
{'type': 'loss', 'content': 0.03670499101281166, 'timestamp': '2025-10-02 00:23:52.174440', 'step': 7414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:52.228836', 'step': 7414, 'epoch': 1}
{'type': 'loss', 'content': 0.12208875268697739, 'timestamp': '2025-10-02 00:23:52.231163', 'step': 7415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:52.286204', 'step': 7415, 'epoch': 1}
{'type': 'loss', 'content': 0.1349303424358368, 'timestamp': '2025-10-02 00:23:52.292258', 'step': 7416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:52.346073', 'step': 7416, 'epoch': 1}
{'type': 'loss', 'content': 0.09818869084119797, 'timestamp': '2025-10-02 00:23:52.348600', 'step': 7417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:52.403806', 'step': 7417, 'epoch': 1}
{'type': 'loss', 'content': 0.03403259813785553, 'timestamp': '2025-10-02 00:23:52.413153', 'step': 7418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:52.468924', 'step': 7418, 'epoch': 1}
{'type': 'loss', 'content': 0.1592753529548645, 'timestamp': '2025-10-02 00:23:52.471357', 'step': 7419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:52.525338', 'step': 7419, 'epoch': 1}
{'type': 'loss', 'content': 0.21263869106769562, 'timestamp': '2025-10-02 00:23:52.531948', 'step': 7420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:52.586626', 'step': 7420, 'epoch': 1}
{'type': 'loss', 'content': 0.06263834983110428, 'timestamp': '2025-10-02 00:23:52.589274', 'step': 7421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:52.643821', 'step': 7421, 'epoch': 1}
{'type': 'loss', 'content': 0.08941350132226944, 'timestamp': '2025-10-02 00:23:52.646608', 'step': 7422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:52.701052', 'step': 7422, 'epoch': 1}
{'type': 'loss', 'content': 0.12934887409210205, 'timestamp': '2025-10-02 00:23:52.703589', 'step': 7423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:52.759649', 'step': 7423, 'epoch': 1}
{'type': 'loss', 'content': 0.02574056386947632, 'timestamp': '2025-10-02 00:23:52.765492', 'step': 7424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:52.820136', 'step': 7424, 'epoch': 1}
{'type': 'loss', 'content': 0.11484235525131226, 'timestamp': '2025-10-02 00:23:52.822512', 'step': 7425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:52.876906', 'step': 7425, 'epoch': 1}
{'type': 'loss', 'content': 0.1582675278186798, 'timestamp': '2025-10-02 00:23:52.886235', 'step': 7426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:52.946006', 'step': 7426, 'epoch': 1}
{'type': 'loss', 'content': 0.01475664135068655, 'timestamp': '2025-10-02 00:23:52.956190', 'step': 7427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:53.010507', 'step': 7427, 'epoch': 1}
{'type': 'loss', 'content': 0.1390349417924881, 'timestamp': '2025-10-02 00:23:53.016444', 'step': 7428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:53.071501', 'step': 7428, 'epoch': 1}
{'type': 'loss', 'content': 0.10910714417695999, 'timestamp': '2025-10-02 00:23:53.073755', 'step': 7429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:53.128907', 'step': 7429, 'epoch': 1}
{'type': 'loss', 'content': 0.013893886469304562, 'timestamp': '2025-10-02 00:23:53.134947', 'step': 7430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:53.190656', 'step': 7430, 'epoch': 1}
{'type': 'loss', 'content': 0.04416117072105408, 'timestamp': '2025-10-02 00:23:53.196488', 'step': 7431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:53.251820', 'step': 7431, 'epoch': 1}
{'type': 'loss', 'content': 0.09366602450609207, 'timestamp': '2025-10-02 00:23:53.258414', 'step': 7432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:53.316341', 'step': 7432, 'epoch': 1}
{'type': 'loss', 'content': 0.019075898453593254, 'timestamp': '2025-10-02 00:23:53.327295', 'step': 7433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:53.382787', 'step': 7433, 'epoch': 1}
{'type': 'loss', 'content': 0.046874549239873886, 'timestamp': '2025-10-02 00:23:53.392359', 'step': 7434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:53.448034', 'step': 7434, 'epoch': 1}
{'type': 'loss', 'content': 0.15890488028526306, 'timestamp': '2025-10-02 00:23:53.451009', 'step': 7435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:53.505707', 'step': 7435, 'epoch': 1}
{'type': 'loss', 'content': 0.09590041637420654, 'timestamp': '2025-10-02 00:23:53.511565', 'step': 7436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:53.565879', 'step': 7436, 'epoch': 1}
{'type': 'loss', 'content': 0.197482630610466, 'timestamp': '2025-10-02 00:23:53.568225', 'step': 7437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:53.624814', 'step': 7437, 'epoch': 1}
{'type': 'loss', 'content': 0.05768998712301254, 'timestamp': '2025-10-02 00:23:53.634379', 'step': 7438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:53.689764', 'step': 7438, 'epoch': 1}
{'type': 'loss', 'content': 0.15629936754703522, 'timestamp': '2025-10-02 00:23:53.693121', 'step': 7439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:53.747611', 'step': 7439, 'epoch': 1}
{'type': 'loss', 'content': 0.08908799290657043, 'timestamp': '2025-10-02 00:23:53.753488', 'step': 7440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:53.807065', 'step': 7440, 'epoch': 1}
{'type': 'loss', 'content': 0.18020901083946228, 'timestamp': '2025-10-02 00:23:53.809636', 'step': 7441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:53.864033', 'step': 7441, 'epoch': 1}
{'type': 'loss', 'content': 0.1102321669459343, 'timestamp': '2025-10-02 00:23:53.869901', 'step': 7442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:53.925640', 'step': 7442, 'epoch': 1}
{'type': 'loss', 'content': 0.12198398262262344, 'timestamp': '2025-10-02 00:23:53.927856', 'step': 7443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:23:53.990949', 'step': 7443, 'epoch': 1}
{'type': 'loss', 'content': 0.021080227568745613, 'timestamp': '2025-10-02 00:23:54.002562', 'step': 7444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:54.056650', 'step': 7444, 'epoch': 1}
{'type': 'loss', 'content': 0.12087738513946533, 'timestamp': '2025-10-02 00:23:54.058843', 'step': 7445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:54.112723', 'step': 7445, 'epoch': 1}
{'type': 'loss', 'content': 0.16542741656303406, 'timestamp': '2025-10-02 00:23:54.115512', 'step': 7446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:54.170598', 'step': 7446, 'epoch': 1}
{'type': 'loss', 'content': 0.06264077126979828, 'timestamp': '2025-10-02 00:23:54.172853', 'step': 7447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:54.227501', 'step': 7447, 'epoch': 1}
{'type': 'loss', 'content': 0.12250982224941254, 'timestamp': '2025-10-02 00:23:54.233671', 'step': 7448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:54.289007', 'step': 7448, 'epoch': 1}
{'type': 'loss', 'content': 0.03345001861453056, 'timestamp': '2025-10-02 00:23:54.291334', 'step': 7449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:54.345835', 'step': 7449, 'epoch': 1}
{'type': 'loss', 'content': 0.014818856492638588, 'timestamp': '2025-10-02 00:23:54.353469', 'step': 7450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:54.409711', 'step': 7450, 'epoch': 1}
{'type': 'loss', 'content': 0.01292911171913147, 'timestamp': '2025-10-02 00:23:54.419251', 'step': 7451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:54.481099', 'step': 7451, 'epoch': 1}
{'type': 'loss', 'content': 0.0861818939447403, 'timestamp': '2025-10-02 00:23:54.486680', 'step': 7452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:54.542583', 'step': 7452, 'epoch': 1}
{'type': 'loss', 'content': 0.08442823588848114, 'timestamp': '2025-10-02 00:23:54.544926', 'step': 7453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:54.599401', 'step': 7453, 'epoch': 1}
{'type': 'loss', 'content': 0.0531463623046875, 'timestamp': '2025-10-02 00:23:54.602236', 'step': 7454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:54.660478', 'step': 7454, 'epoch': 1}
{'type': 'loss', 'content': 0.15156368911266327, 'timestamp': '2025-10-02 00:23:54.662963', 'step': 7455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:54.718149', 'step': 7455, 'epoch': 1}
{'type': 'loss', 'content': 0.08952785283327103, 'timestamp': '2025-10-02 00:23:54.724071', 'step': 7456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:54.777428', 'step': 7456, 'epoch': 1}
{'type': 'loss', 'content': 0.05971907079219818, 'timestamp': '2025-10-02 00:23:54.785006', 'step': 7457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:54.839621', 'step': 7457, 'epoch': 1}
{'type': 'loss', 'content': 0.08317435532808304, 'timestamp': '2025-10-02 00:23:54.841843', 'step': 7458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:54.896583', 'step': 7458, 'epoch': 1}
{'type': 'loss', 'content': 0.03130464255809784, 'timestamp': '2025-10-02 00:23:54.905936', 'step': 7459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:54.959810', 'step': 7459, 'epoch': 1}
{'type': 'loss', 'content': 0.1590002328157425, 'timestamp': '2025-10-02 00:23:54.965736', 'step': 7460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:55.019862', 'step': 7460, 'epoch': 1}
{'type': 'loss', 'content': 0.07682020962238312, 'timestamp': '2025-10-02 00:23:55.024315', 'step': 7461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:55.086142', 'step': 7461, 'epoch': 1}
{'type': 'loss', 'content': 0.10101287066936493, 'timestamp': '2025-10-02 00:23:55.096788', 'step': 7462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:55.151678', 'step': 7462, 'epoch': 1}
{'type': 'loss', 'content': 0.15836665034294128, 'timestamp': '2025-10-02 00:23:55.154103', 'step': 7463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:55.208062', 'step': 7463, 'epoch': 1}
{'type': 'loss', 'content': 0.14022786915302277, 'timestamp': '2025-10-02 00:23:55.214546', 'step': 7464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:55.274469', 'step': 7464, 'epoch': 1}
{'type': 'loss', 'content': 0.038769014179706573, 'timestamp': '2025-10-02 00:23:55.285457', 'step': 7465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:55.340728', 'step': 7465, 'epoch': 1}
{'type': 'loss', 'content': 0.028278842568397522, 'timestamp': '2025-10-02 00:23:55.343408', 'step': 7466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:23:55.416222', 'step': 7466, 'epoch': 1}
{'type': 'loss', 'content': 0.026940152049064636, 'timestamp': '2025-10-02 00:23:55.428845', 'step': 7467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:55.483224', 'step': 7467, 'epoch': 1}
{'type': 'loss', 'content': 0.12992386519908905, 'timestamp': '2025-10-02 00:23:55.491388', 'step': 7468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:55.552685', 'step': 7468, 'epoch': 1}
{'type': 'loss', 'content': 0.088840551674366, 'timestamp': '2025-10-02 00:23:55.555023', 'step': 7469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:23:55.614091', 'step': 7469, 'epoch': 1}
{'type': 'loss', 'content': 0.02900201827287674, 'timestamp': '2025-10-02 00:23:55.624254', 'step': 7470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:23:55.687640', 'step': 7470, 'epoch': 1}
{'type': 'loss', 'content': 0.06797318160533905, 'timestamp': '2025-10-02 00:23:55.698475', 'step': 7471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:55.753492', 'step': 7471, 'epoch': 1}
{'type': 'loss', 'content': 0.059093233197927475, 'timestamp': '2025-10-02 00:23:55.759634', 'step': 7472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:55.813707', 'step': 7472, 'epoch': 1}
{'type': 'loss', 'content': 0.024131273850798607, 'timestamp': '2025-10-02 00:23:55.821187', 'step': 7473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:55.875668', 'step': 7473, 'epoch': 1}
{'type': 'loss', 'content': 0.08567953109741211, 'timestamp': '2025-10-02 00:23:55.878767', 'step': 7474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:55.934566', 'step': 7474, 'epoch': 1}
{'type': 'loss', 'content': 0.037286534905433655, 'timestamp': '2025-10-02 00:23:55.941962', 'step': 7475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:23:56.015882', 'step': 7475, 'epoch': 1}
{'type': 'loss', 'content': 0.041155774146318436, 'timestamp': '2025-10-02 00:23:56.029892', 'step': 7476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:56.084137', 'step': 7476, 'epoch': 1}
{'type': 'loss', 'content': 0.09369553625583649, 'timestamp': '2025-10-02 00:23:56.086785', 'step': 7477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:56.141907', 'step': 7477, 'epoch': 1}
{'type': 'loss', 'content': 0.050874244421720505, 'timestamp': '2025-10-02 00:23:56.144275', 'step': 7478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:56.198817', 'step': 7478, 'epoch': 1}
{'type': 'loss', 'content': 0.0700259655714035, 'timestamp': '2025-10-02 00:23:56.200981', 'step': 7479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:56.257163', 'step': 7479, 'epoch': 1}
{'type': 'loss', 'content': 0.09906967729330063, 'timestamp': '2025-10-02 00:23:56.267502', 'step': 7480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:56.321577', 'step': 7480, 'epoch': 1}
{'type': 'loss', 'content': 0.18094448745250702, 'timestamp': '2025-10-02 00:23:56.323744', 'step': 7481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:23:56.377222', 'step': 7481, 'epoch': 1}
{'type': 'loss', 'content': 0.2063473016023636, 'timestamp': '2025-10-02 00:23:56.379414', 'step': 7482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:56.434241', 'step': 7482, 'epoch': 1}
{'type': 'loss', 'content': 0.0397789292037487, 'timestamp': '2025-10-02 00:23:56.439850', 'step': 7483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:56.494585', 'step': 7483, 'epoch': 1}
{'type': 'loss', 'content': 0.1314094364643097, 'timestamp': '2025-10-02 00:23:56.500651', 'step': 7484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:23:56.555457', 'step': 7484, 'epoch': 1}
{'type': 'loss', 'content': 0.047433990985155106, 'timestamp': '2025-10-02 00:23:56.564949', 'step': 7485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:23:56.619780', 'step': 7485, 'epoch': 1}
{'type': 'loss', 'content': 0.079086534678936, 'timestamp': '2025-10-02 00:23:56.622433', 'step': 7486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:56.677065', 'step': 7486, 'epoch': 1}
{'type': 'loss', 'content': 0.03898142650723457, 'timestamp': '2025-10-02 00:23:56.682706', 'step': 7487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:56.738942', 'step': 7487, 'epoch': 1}
{'type': 'loss', 'content': 0.10369428247213364, 'timestamp': '2025-10-02 00:23:56.749234', 'step': 7488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:23:56.821086', 'step': 7488, 'epoch': 1}
{'type': 'loss', 'content': 0.0217167679220438, 'timestamp': '2025-10-02 00:23:56.835501', 'step': 7489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:23:56.904406', 'step': 7489, 'epoch': 1}
{'type': 'loss', 'content': 0.011341114528477192, 'timestamp': '2025-10-02 00:23:56.916406', 'step': 7490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:23:56.972728', 'step': 7490, 'epoch': 1}
{'type': 'loss', 'content': 0.10202381014823914, 'timestamp': '2025-10-02 00:23:56.982295', 'step': 7491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:57.037150', 'step': 7491, 'epoch': 1}
{'type': 'loss', 'content': 0.2002975195646286, 'timestamp': '2025-10-02 00:23:57.043054', 'step': 7492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:57.097513', 'step': 7492, 'epoch': 1}
{'type': 'loss', 'content': 0.08073826134204865, 'timestamp': '2025-10-02 00:23:57.099865', 'step': 7493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:57.155093', 'step': 7493, 'epoch': 1}
{'type': 'loss', 'content': 0.2024163156747818, 'timestamp': '2025-10-02 00:23:57.157649', 'step': 7494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:57.211902', 'step': 7494, 'epoch': 1}
{'type': 'loss', 'content': 0.11743422597646713, 'timestamp': '2025-10-02 00:23:57.213828', 'step': 7495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:57.268036', 'step': 7495, 'epoch': 1}
{'type': 'loss', 'content': 0.05836543068289757, 'timestamp': '2025-10-02 00:23:57.274388', 'step': 7496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:57.327452', 'step': 7496, 'epoch': 1}
{'type': 'loss', 'content': 0.07329761981964111, 'timestamp': '2025-10-02 00:23:57.334916', 'step': 7497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:57.388594', 'step': 7497, 'epoch': 1}
{'type': 'loss', 'content': 0.09036636352539062, 'timestamp': '2025-10-02 00:23:57.390896', 'step': 7498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:23:57.445474', 'step': 7498, 'epoch': 1}
{'type': 'loss', 'content': 0.23791275918483734, 'timestamp': '2025-10-02 00:23:57.447898', 'step': 7499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:57.502997', 'step': 7499, 'epoch': 1}
{'type': 'loss', 'content': 0.06293017417192459, 'timestamp': '2025-10-02 00:23:57.508806', 'step': 7500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 7500', 'timestamp': '2025-10-02 00:23:58.061391', 'step': 7500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:58.119962', 'step': 7500, 'epoch': 1}
{'type': 'loss', 'content': 0.0766311064362526, 'timestamp': '2025-10-02 00:23:58.122561', 'step': 7501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:58.177639', 'step': 7501, 'epoch': 1}
{'type': 'loss', 'content': 0.1352979838848114, 'timestamp': '2025-10-02 00:23:58.180180', 'step': 7502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:58.234654', 'step': 7502, 'epoch': 1}
{'type': 'loss', 'content': 0.10474110394716263, 'timestamp': '2025-10-02 00:23:58.237239', 'step': 7503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:58.292809', 'step': 7503, 'epoch': 1}
{'type': 'loss', 'content': 0.1490211933851242, 'timestamp': '2025-10-02 00:23:58.298732', 'step': 7504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:58.353824', 'step': 7504, 'epoch': 1}
{'type': 'loss', 'content': 0.03034079819917679, 'timestamp': '2025-10-02 00:23:58.356396', 'step': 7505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:58.410782', 'step': 7505, 'epoch': 1}
{'type': 'loss', 'content': 0.10813362896442413, 'timestamp': '2025-10-02 00:23:58.418195', 'step': 7506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:58.473348', 'step': 7506, 'epoch': 1}
{'type': 'loss', 'content': 0.12129632383584976, 'timestamp': '2025-10-02 00:23:58.475688', 'step': 7507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:58.530113', 'step': 7507, 'epoch': 1}
{'type': 'loss', 'content': 0.10348635911941528, 'timestamp': '2025-10-02 00:23:58.536245', 'step': 7508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:23:58.590771', 'step': 7508, 'epoch': 1}
{'type': 'loss', 'content': 0.05026965215802193, 'timestamp': '2025-10-02 00:23:58.593346', 'step': 7509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:58.649034', 'step': 7509, 'epoch': 1}
{'type': 'loss', 'content': 0.025527089834213257, 'timestamp': '2025-10-02 00:23:58.654864', 'step': 7510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:23:58.709191', 'step': 7510, 'epoch': 1}
{'type': 'loss', 'content': 0.054855845868587494, 'timestamp': '2025-10-02 00:23:58.714832', 'step': 7511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:58.770479', 'step': 7511, 'epoch': 1}
{'type': 'loss', 'content': 0.06074662134051323, 'timestamp': '2025-10-02 00:23:58.776322', 'step': 7512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:23:58.848600', 'step': 7512, 'epoch': 1}
{'type': 'loss', 'content': 0.039774492383003235, 'timestamp': '2025-10-02 00:23:58.863307', 'step': 7513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:58.917909', 'step': 7513, 'epoch': 1}
{'type': 'loss', 'content': 0.06728394329547882, 'timestamp': '2025-10-02 00:23:58.921140', 'step': 7514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:58.978139', 'step': 7514, 'epoch': 1}
{'type': 'loss', 'content': 0.03933665528893471, 'timestamp': '2025-10-02 00:23:58.981223', 'step': 7515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:59.037426', 'step': 7515, 'epoch': 1}
{'type': 'loss', 'content': 0.3681497275829315, 'timestamp': '2025-10-02 00:23:59.043455', 'step': 7516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:59.099471', 'step': 7516, 'epoch': 1}
{'type': 'loss', 'content': 0.08333395421504974, 'timestamp': '2025-10-02 00:23:59.102548', 'step': 7517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:23:59.160488', 'step': 7517, 'epoch': 1}
{'type': 'loss', 'content': 0.05248801410198212, 'timestamp': '2025-10-02 00:23:59.165356', 'step': 7518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:23:59.237090', 'step': 7518, 'epoch': 1}
{'type': 'loss', 'content': 0.006744981277734041, 'timestamp': '2025-10-02 00:23:59.249341', 'step': 7519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:23:59.305715', 'step': 7519, 'epoch': 1}
{'type': 'loss', 'content': 0.06924287229776382, 'timestamp': '2025-10-02 00:23:59.311557', 'step': 7520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:59.368471', 'step': 7520, 'epoch': 1}
{'type': 'loss', 'content': 0.1148313358426094, 'timestamp': '2025-10-02 00:23:59.370726', 'step': 7521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:23:59.425080', 'step': 7521, 'epoch': 1}
{'type': 'loss', 'content': 0.2045443058013916, 'timestamp': '2025-10-02 00:23:59.429227', 'step': 7522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:23:59.493858', 'step': 7522, 'epoch': 1}
{'type': 'loss', 'content': 0.012435605749487877, 'timestamp': '2025-10-02 00:23:59.504519', 'step': 7523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:59.561608', 'step': 7523, 'epoch': 1}
{'type': 'loss', 'content': 0.039139505475759506, 'timestamp': '2025-10-02 00:23:59.568263', 'step': 7524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:23:59.624134', 'step': 7524, 'epoch': 1}
{'type': 'loss', 'content': 0.0641414225101471, 'timestamp': '2025-10-02 00:23:59.626881', 'step': 7525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:59.684500', 'step': 7525, 'epoch': 1}
{'type': 'loss', 'content': 0.12848049402236938, 'timestamp': '2025-10-02 00:23:59.686893', 'step': 7526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:23:59.742925', 'step': 7526, 'epoch': 1}
{'type': 'loss', 'content': 0.06986279040575027, 'timestamp': '2025-10-02 00:23:59.750227', 'step': 7527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:23:59.806493', 'step': 7527, 'epoch': 1}
{'type': 'loss', 'content': 0.2442048043012619, 'timestamp': '2025-10-02 00:23:59.813837', 'step': 7528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:23:59.869889', 'step': 7528, 'epoch': 1}
{'type': 'loss', 'content': 0.16655369102954865, 'timestamp': '2025-10-02 00:23:59.873385', 'step': 7529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:23:59.928116', 'step': 7529, 'epoch': 1}
{'type': 'loss', 'content': 0.08735626935958862, 'timestamp': '2025-10-02 00:23:59.931228', 'step': 7530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:23:59.989366', 'step': 7530, 'epoch': 1}
{'type': 'loss', 'content': 0.08997385203838348, 'timestamp': '2025-10-02 00:23:59.992406', 'step': 7531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:00.048432', 'step': 7531, 'epoch': 1}
{'type': 'loss', 'content': 0.08145824819803238, 'timestamp': '2025-10-02 00:24:00.055141', 'step': 7532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:00.109130', 'step': 7532, 'epoch': 1}
{'type': 'loss', 'content': 0.22815895080566406, 'timestamp': '2025-10-02 00:24:00.112652', 'step': 7533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:00.168964', 'step': 7533, 'epoch': 1}
{'type': 'loss', 'content': 0.12412314862012863, 'timestamp': '2025-10-02 00:24:00.172896', 'step': 7534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:00.230698', 'step': 7534, 'epoch': 1}
{'type': 'loss', 'content': 0.04835643991827965, 'timestamp': '2025-10-02 00:24:00.237893', 'step': 7535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:00.294209', 'step': 7535, 'epoch': 1}
{'type': 'loss', 'content': 0.08456113934516907, 'timestamp': '2025-10-02 00:24:00.300260', 'step': 7536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:00.354313', 'step': 7536, 'epoch': 1}
{'type': 'loss', 'content': 0.06755136698484421, 'timestamp': '2025-10-02 00:24:00.364576', 'step': 7537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:00.421843', 'step': 7537, 'epoch': 1}
{'type': 'loss', 'content': 0.13323302567005157, 'timestamp': '2025-10-02 00:24:00.424240', 'step': 7538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:00.479336', 'step': 7538, 'epoch': 1}
{'type': 'loss', 'content': 0.14636676013469696, 'timestamp': '2025-10-02 00:24:00.484884', 'step': 7539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:00.539248', 'step': 7539, 'epoch': 1}
{'type': 'loss', 'content': 0.061616234481334686, 'timestamp': '2025-10-02 00:24:00.546088', 'step': 7540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:00.606408', 'step': 7540, 'epoch': 1}
{'type': 'loss', 'content': 0.10110926628112793, 'timestamp': '2025-10-02 00:24:00.617759', 'step': 7541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:00.677084', 'step': 7541, 'epoch': 1}
{'type': 'loss', 'content': 0.06103459373116493, 'timestamp': '2025-10-02 00:24:00.687253', 'step': 7542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:00.742867', 'step': 7542, 'epoch': 1}
{'type': 'loss', 'content': 0.057520344853401184, 'timestamp': '2025-10-02 00:24:00.745553', 'step': 7543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:00.801803', 'step': 7543, 'epoch': 1}
{'type': 'loss', 'content': 0.08017662167549133, 'timestamp': '2025-10-02 00:24:00.807793', 'step': 7544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:00.861658', 'step': 7544, 'epoch': 1}
{'type': 'loss', 'content': 0.10509490221738815, 'timestamp': '2025-10-02 00:24:00.864014', 'step': 7545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:00.918712', 'step': 7545, 'epoch': 1}
{'type': 'loss', 'content': 0.06443988531827927, 'timestamp': '2025-10-02 00:24:00.924651', 'step': 7546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:00.980216', 'step': 7546, 'epoch': 1}
{'type': 'loss', 'content': 0.0349554605782032, 'timestamp': '2025-10-02 00:24:00.989744', 'step': 7547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:01.046992', 'step': 7547, 'epoch': 1}
{'type': 'loss', 'content': 0.07249156385660172, 'timestamp': '2025-10-02 00:24:01.055139', 'step': 7548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:01.108889', 'step': 7548, 'epoch': 1}
{'type': 'loss', 'content': 0.18140244483947754, 'timestamp': '2025-10-02 00:24:01.111171', 'step': 7549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:01.165166', 'step': 7549, 'epoch': 1}
{'type': 'loss', 'content': 0.24636133015155792, 'timestamp': '2025-10-02 00:24:01.167509', 'step': 7550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:01.222486', 'step': 7550, 'epoch': 1}
{'type': 'loss', 'content': 0.05805157870054245, 'timestamp': '2025-10-02 00:24:01.224568', 'step': 7551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:01.279016', 'step': 7551, 'epoch': 1}
{'type': 'loss', 'content': 0.0404941663146019, 'timestamp': '2025-10-02 00:24:01.287174', 'step': 7552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:01.340788', 'step': 7552, 'epoch': 1}
{'type': 'loss', 'content': 0.07159089297056198, 'timestamp': '2025-10-02 00:24:01.346861', 'step': 7553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:01.401603', 'step': 7553, 'epoch': 1}
{'type': 'loss', 'content': 0.02452966384589672, 'timestamp': '2025-10-02 00:24:01.410979', 'step': 7554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:01.468560', 'step': 7554, 'epoch': 1}
{'type': 'loss', 'content': 0.030378131195902824, 'timestamp': '2025-10-02 00:24:01.470733', 'step': 7555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:01.524806', 'step': 7555, 'epoch': 1}
{'type': 'loss', 'content': 0.08608632534742355, 'timestamp': '2025-10-02 00:24:01.530762', 'step': 7556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:01.584088', 'step': 7556, 'epoch': 1}
{'type': 'loss', 'content': 0.10555821657180786, 'timestamp': '2025-10-02 00:24:01.586723', 'step': 7557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:01.641042', 'step': 7557, 'epoch': 1}
{'type': 'loss', 'content': 0.21612757444381714, 'timestamp': '2025-10-02 00:24:01.643404', 'step': 7558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:01.697359', 'step': 7558, 'epoch': 1}
{'type': 'loss', 'content': 0.2539833188056946, 'timestamp': '2025-10-02 00:24:01.702236', 'step': 7559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:01.758665', 'step': 7559, 'epoch': 1}
{'type': 'loss', 'content': 0.09191352128982544, 'timestamp': '2025-10-02 00:24:01.764523', 'step': 7560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:01.818392', 'step': 7560, 'epoch': 1}
{'type': 'loss', 'content': 0.06330416351556778, 'timestamp': '2025-10-02 00:24:01.828111', 'step': 7561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:01.885065', 'step': 7561, 'epoch': 1}
{'type': 'loss', 'content': 0.009602510370314121, 'timestamp': '2025-10-02 00:24:01.887359', 'step': 7562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:24:01.942107', 'step': 7562, 'epoch': 1}
{'type': 'loss', 'content': 0.0820702612400055, 'timestamp': '2025-10-02 00:24:01.944524', 'step': 7563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:01.999305', 'step': 7563, 'epoch': 1}
{'type': 'loss', 'content': 0.12802675366401672, 'timestamp': '2025-10-02 00:24:02.005092', 'step': 7564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:02.059009', 'step': 7564, 'epoch': 1}
{'type': 'loss', 'content': 0.07008102536201477, 'timestamp': '2025-10-02 00:24:02.066360', 'step': 7565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:02.120896', 'step': 7565, 'epoch': 1}
{'type': 'loss', 'content': 0.09910772740840912, 'timestamp': '2025-10-02 00:24:02.123142', 'step': 7566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:02.177066', 'step': 7566, 'epoch': 1}
{'type': 'loss', 'content': 0.13360480964183807, 'timestamp': '2025-10-02 00:24:02.179393', 'step': 7567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:02.245894', 'step': 7567, 'epoch': 1}
{'type': 'loss', 'content': 0.03396125137805939, 'timestamp': '2025-10-02 00:24:02.256895', 'step': 7568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:02.310696', 'step': 7568, 'epoch': 1}
{'type': 'loss', 'content': 0.17031769454479218, 'timestamp': '2025-10-02 00:24:02.313370', 'step': 7569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:02.367229', 'step': 7569, 'epoch': 1}
{'type': 'loss', 'content': 0.11637619137763977, 'timestamp': '2025-10-02 00:24:02.369666', 'step': 7570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:02.423802', 'step': 7570, 'epoch': 1}
{'type': 'loss', 'content': 0.14926007390022278, 'timestamp': '2025-10-02 00:24:02.425980', 'step': 7571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:02.480316', 'step': 7571, 'epoch': 1}
{'type': 'loss', 'content': 0.13947905600070953, 'timestamp': '2025-10-02 00:24:02.485805', 'step': 7572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:02.540262', 'step': 7572, 'epoch': 1}
{'type': 'loss', 'content': 0.05213326960802078, 'timestamp': '2025-10-02 00:24:02.550494', 'step': 7573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:02.605456', 'step': 7573, 'epoch': 1}
{'type': 'loss', 'content': 0.019442128017544746, 'timestamp': '2025-10-02 00:24:02.614761', 'step': 7574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:02.668868', 'step': 7574, 'epoch': 1}
{'type': 'loss', 'content': 0.1334839016199112, 'timestamp': '2025-10-02 00:24:02.671298', 'step': 7575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:02.724920', 'step': 7575, 'epoch': 1}
{'type': 'loss', 'content': 0.11312025785446167, 'timestamp': '2025-10-02 00:24:02.730809', 'step': 7576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:02.788335', 'step': 7576, 'epoch': 1}
{'type': 'loss', 'content': 0.04590824991464615, 'timestamp': '2025-10-02 00:24:02.799298', 'step': 7577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:24:02.860681', 'step': 7577, 'epoch': 1}
{'type': 'loss', 'content': 0.08418039232492447, 'timestamp': '2025-10-02 00:24:02.871308', 'step': 7578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:02.926465', 'step': 7578, 'epoch': 1}
{'type': 'loss', 'content': 0.060831572860479355, 'timestamp': '2025-10-02 00:24:02.928687', 'step': 7579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:02.983676', 'step': 7579, 'epoch': 1}
{'type': 'loss', 'content': 0.049680765718221664, 'timestamp': '2025-10-02 00:24:02.990084', 'step': 7580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:24:03.051728', 'step': 7580, 'epoch': 1}
{'type': 'loss', 'content': 0.043702319264411926, 'timestamp': '2025-10-02 00:24:03.063260', 'step': 7581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:03.117624', 'step': 7581, 'epoch': 1}
{'type': 'loss', 'content': 0.04910612106323242, 'timestamp': '2025-10-02 00:24:03.125135', 'step': 7582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:03.179646', 'step': 7582, 'epoch': 1}
{'type': 'loss', 'content': 0.06614785641431808, 'timestamp': '2025-10-02 00:24:03.181821', 'step': 7583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:03.236409', 'step': 7583, 'epoch': 1}
{'type': 'loss', 'content': 0.10249000042676926, 'timestamp': '2025-10-02 00:24:03.242905', 'step': 7584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:03.297875', 'step': 7584, 'epoch': 1}
{'type': 'loss', 'content': 0.11787886917591095, 'timestamp': '2025-10-02 00:24:03.300329', 'step': 7585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:03.385299', 'step': 7585, 'epoch': 1}
{'type': 'loss', 'content': 0.03969268500804901, 'timestamp': '2025-10-02 00:24:03.395767', 'step': 7586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:03.450885', 'step': 7586, 'epoch': 1}
{'type': 'loss', 'content': 0.1265629082918167, 'timestamp': '2025-10-02 00:24:03.453572', 'step': 7587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:03.522665', 'step': 7587, 'epoch': 1}
{'type': 'loss', 'content': 0.024509340524673462, 'timestamp': '2025-10-02 00:24:03.528857', 'step': 7588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:03.582806', 'step': 7588, 'epoch': 1}
{'type': 'loss', 'content': 0.06791682541370392, 'timestamp': '2025-10-02 00:24:03.585925', 'step': 7589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:03.643316', 'step': 7589, 'epoch': 1}
{'type': 'loss', 'content': 0.06770709156990051, 'timestamp': '2025-10-02 00:24:03.645854', 'step': 7590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:03.700879', 'step': 7590, 'epoch': 1}
{'type': 'loss', 'content': 0.18553970754146576, 'timestamp': '2025-10-02 00:24:03.703270', 'step': 7591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:03.757594', 'step': 7591, 'epoch': 1}
{'type': 'loss', 'content': 0.13727959990501404, 'timestamp': '2025-10-02 00:24:03.763284', 'step': 7592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:03.817684', 'step': 7592, 'epoch': 1}
{'type': 'loss', 'content': 0.029691219329833984, 'timestamp': '2025-10-02 00:24:03.826553', 'step': 7593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:03.903303', 'step': 7593, 'epoch': 1}
{'type': 'loss', 'content': 0.09638682752847672, 'timestamp': '2025-10-02 00:24:03.908799', 'step': 7594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:03.963783', 'step': 7594, 'epoch': 1}
{'type': 'loss', 'content': 0.06815437972545624, 'timestamp': '2025-10-02 00:24:03.966352', 'step': 7595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:04.022792', 'step': 7595, 'epoch': 1}
{'type': 'loss', 'content': 0.19621799886226654, 'timestamp': '2025-10-02 00:24:04.028632', 'step': 7596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:24:04.096320', 'step': 7596, 'epoch': 1}
{'type': 'loss', 'content': 0.050690777599811554, 'timestamp': '2025-10-02 00:24:04.107865', 'step': 7597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:04.164646', 'step': 7597, 'epoch': 1}
{'type': 'loss', 'content': 0.06215357780456543, 'timestamp': '2025-10-02 00:24:04.166915', 'step': 7598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:04.220557', 'step': 7598, 'epoch': 1}
{'type': 'loss', 'content': 0.1448984593153, 'timestamp': '2025-10-02 00:24:04.224628', 'step': 7599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:04.286699', 'step': 7599, 'epoch': 1}
{'type': 'loss', 'content': 0.12110964208841324, 'timestamp': '2025-10-02 00:24:04.292617', 'step': 7600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:04.350005', 'step': 7600, 'epoch': 1}
{'type': 'loss', 'content': 0.0505690760910511, 'timestamp': '2025-10-02 00:24:04.352330', 'step': 7601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:04.406638', 'step': 7601, 'epoch': 1}
{'type': 'loss', 'content': 0.05282820016145706, 'timestamp': '2025-10-02 00:24:04.412335', 'step': 7602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:04.466722', 'step': 7602, 'epoch': 1}
{'type': 'loss', 'content': 0.09020036458969116, 'timestamp': '2025-10-02 00:24:04.472437', 'step': 7603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:04.533351', 'step': 7603, 'epoch': 1}
{'type': 'loss', 'content': 0.045423515141010284, 'timestamp': '2025-10-02 00:24:04.543704', 'step': 7604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:04.598473', 'step': 7604, 'epoch': 1}
{'type': 'loss', 'content': 0.05869843065738678, 'timestamp': '2025-10-02 00:24:04.600834', 'step': 7605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:04.656583', 'step': 7605, 'epoch': 1}
{'type': 'loss', 'content': 0.08692017197608948, 'timestamp': '2025-10-02 00:24:04.661395', 'step': 7606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:04.715822', 'step': 7606, 'epoch': 1}
{'type': 'loss', 'content': 0.10434199124574661, 'timestamp': '2025-10-02 00:24:04.718252', 'step': 7607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:04.774261', 'step': 7607, 'epoch': 1}
{'type': 'loss', 'content': 0.03336961567401886, 'timestamp': '2025-10-02 00:24:04.779897', 'step': 7608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:04.838956', 'step': 7608, 'epoch': 1}
{'type': 'loss', 'content': 0.08725976198911667, 'timestamp': '2025-10-02 00:24:04.849918', 'step': 7609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:04.904255', 'step': 7609, 'epoch': 1}
{'type': 'loss', 'content': 0.0312890000641346, 'timestamp': '2025-10-02 00:24:04.906549', 'step': 7610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:04.960864', 'step': 7610, 'epoch': 1}
{'type': 'loss', 'content': 0.05660462751984596, 'timestamp': '2025-10-02 00:24:04.966590', 'step': 7611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:05.020306', 'step': 7611, 'epoch': 1}
{'type': 'loss', 'content': 0.07335171103477478, 'timestamp': '2025-10-02 00:24:05.026458', 'step': 7612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:05.080077', 'step': 7612, 'epoch': 1}
{'type': 'loss', 'content': 0.31344130635261536, 'timestamp': '2025-10-02 00:24:05.082089', 'step': 7613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:05.135900', 'step': 7613, 'epoch': 1}
{'type': 'loss', 'content': 0.03453487902879715, 'timestamp': '2025-10-02 00:24:05.141800', 'step': 7614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:05.196016', 'step': 7614, 'epoch': 1}
{'type': 'loss', 'content': 0.10627283900976181, 'timestamp': '2025-10-02 00:24:05.198230', 'step': 7615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:05.253249', 'step': 7615, 'epoch': 1}
{'type': 'loss', 'content': 0.029125388711690903, 'timestamp': '2025-10-02 00:24:05.259636', 'step': 7616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:05.313808', 'step': 7616, 'epoch': 1}
{'type': 'loss', 'content': 0.08739569038152695, 'timestamp': '2025-10-02 00:24:05.316121', 'step': 7617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:05.370151', 'step': 7617, 'epoch': 1}
{'type': 'loss', 'content': 0.08652746677398682, 'timestamp': '2025-10-02 00:24:05.372771', 'step': 7618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:05.428412', 'step': 7618, 'epoch': 1}
{'type': 'loss', 'content': 0.031918320804834366, 'timestamp': '2025-10-02 00:24:05.434238', 'step': 7619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:05.489606', 'step': 7619, 'epoch': 1}
{'type': 'loss', 'content': 0.2110661268234253, 'timestamp': '2025-10-02 00:24:05.495404', 'step': 7620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:05.550290', 'step': 7620, 'epoch': 1}
{'type': 'loss', 'content': 0.12048421800136566, 'timestamp': '2025-10-02 00:24:05.552552', 'step': 7621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:05.606758', 'step': 7621, 'epoch': 1}
{'type': 'loss', 'content': 0.05985007807612419, 'timestamp': '2025-10-02 00:24:05.609114', 'step': 7622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:05.663866', 'step': 7622, 'epoch': 1}
{'type': 'loss', 'content': 0.022922109812498093, 'timestamp': '2025-10-02 00:24:05.671140', 'step': 7623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:05.726431', 'step': 7623, 'epoch': 1}
{'type': 'loss', 'content': 0.06649331003427505, 'timestamp': '2025-10-02 00:24:05.732404', 'step': 7624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:05.786070', 'step': 7624, 'epoch': 1}
{'type': 'loss', 'content': 0.06717473268508911, 'timestamp': '2025-10-02 00:24:05.787917', 'step': 7625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:05.842524', 'step': 7625, 'epoch': 1}
{'type': 'loss', 'content': 0.056735437363386154, 'timestamp': '2025-10-02 00:24:05.844630', 'step': 7626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:05.899196', 'step': 7626, 'epoch': 1}
{'type': 'loss', 'content': 0.032502103596925735, 'timestamp': '2025-10-02 00:24:05.904894', 'step': 7627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:05.958829', 'step': 7627, 'epoch': 1}
{'type': 'loss', 'content': 0.11382772773504257, 'timestamp': '2025-10-02 00:24:05.964622', 'step': 7628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:06.018085', 'step': 7628, 'epoch': 1}
{'type': 'loss', 'content': 0.1439373940229416, 'timestamp': '2025-10-02 00:24:06.022038', 'step': 7629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:06.076784', 'step': 7629, 'epoch': 1}
{'type': 'loss', 'content': 0.11364297568798065, 'timestamp': '2025-10-02 00:24:06.078869', 'step': 7630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:06.133773', 'step': 7630, 'epoch': 1}
{'type': 'loss', 'content': 0.1634482890367508, 'timestamp': '2025-10-02 00:24:06.136227', 'step': 7631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:06.191367', 'step': 7631, 'epoch': 1}
{'type': 'loss', 'content': 0.014709369279444218, 'timestamp': '2025-10-02 00:24:06.197852', 'step': 7632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:06.251965', 'step': 7632, 'epoch': 1}
{'type': 'loss', 'content': 0.12832961976528168, 'timestamp': '2025-10-02 00:24:06.254356', 'step': 7633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:24:06.316356', 'step': 7633, 'epoch': 1}
{'type': 'loss', 'content': 0.03020894154906273, 'timestamp': '2025-10-02 00:24:06.327180', 'step': 7634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:06.382095', 'step': 7634, 'epoch': 1}
{'type': 'loss', 'content': 0.04165996238589287, 'timestamp': '2025-10-02 00:24:06.384377', 'step': 7635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:06.438663', 'step': 7635, 'epoch': 1}
{'type': 'loss', 'content': 0.12014863640069962, 'timestamp': '2025-10-02 00:24:06.444646', 'step': 7636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:06.498885', 'step': 7636, 'epoch': 1}
{'type': 'loss', 'content': 0.0408593975007534, 'timestamp': '2025-10-02 00:24:06.509165', 'step': 7637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:06.562993', 'step': 7637, 'epoch': 1}
{'type': 'loss', 'content': 0.1272614747285843, 'timestamp': '2025-10-02 00:24:06.565129', 'step': 7638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:06.625296', 'step': 7638, 'epoch': 1}
{'type': 'loss', 'content': 0.03382010757923126, 'timestamp': '2025-10-02 00:24:06.635482', 'step': 7639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:06.689496', 'step': 7639, 'epoch': 1}
{'type': 'loss', 'content': 0.16325685381889343, 'timestamp': '2025-10-02 00:24:06.695257', 'step': 7640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:06.749296', 'step': 7640, 'epoch': 1}
{'type': 'loss', 'content': 0.027625810354948044, 'timestamp': '2025-10-02 00:24:06.755153', 'step': 7641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:06.811049', 'step': 7641, 'epoch': 1}
{'type': 'loss', 'content': 0.0466749407351017, 'timestamp': '2025-10-02 00:24:06.816812', 'step': 7642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:06.873741', 'step': 7642, 'epoch': 1}
{'type': 'loss', 'content': 0.012952410615980625, 'timestamp': '2025-10-02 00:24:06.875948', 'step': 7643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:06.930113', 'step': 7643, 'epoch': 1}
{'type': 'loss', 'content': 0.10027267783880234, 'timestamp': '2025-10-02 00:24:06.935850', 'step': 7644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:06.989486', 'step': 7644, 'epoch': 1}
{'type': 'loss', 'content': 0.04276508465409279, 'timestamp': '2025-10-02 00:24:06.995423', 'step': 7645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:07.049167', 'step': 7645, 'epoch': 1}
{'type': 'loss', 'content': 0.1498926877975464, 'timestamp': '2025-10-02 00:24:07.051664', 'step': 7646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:07.107344', 'step': 7646, 'epoch': 1}
{'type': 'loss', 'content': 0.006744544021785259, 'timestamp': '2025-10-02 00:24:07.114692', 'step': 7647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:07.170498', 'step': 7647, 'epoch': 1}
{'type': 'loss', 'content': 0.08449862897396088, 'timestamp': '2025-10-02 00:24:07.176430', 'step': 7648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:07.232618', 'step': 7648, 'epoch': 1}
{'type': 'loss', 'content': 0.15861813724040985, 'timestamp': '2025-10-02 00:24:07.235158', 'step': 7649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:07.289658', 'step': 7649, 'epoch': 1}
{'type': 'loss', 'content': 0.024324623867869377, 'timestamp': '2025-10-02 00:24:07.292139', 'step': 7650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:07.346479', 'step': 7650, 'epoch': 1}
{'type': 'loss', 'content': 0.0688614547252655, 'timestamp': '2025-10-02 00:24:07.348952', 'step': 7651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:07.402977', 'step': 7651, 'epoch': 1}
{'type': 'loss', 'content': 0.04739759489893913, 'timestamp': '2025-10-02 00:24:07.408814', 'step': 7652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:07.462091', 'step': 7652, 'epoch': 1}
{'type': 'loss', 'content': 0.14252865314483643, 'timestamp': '2025-10-02 00:24:07.469553', 'step': 7653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:07.524310', 'step': 7653, 'epoch': 1}
{'type': 'loss', 'content': 0.0457451269030571, 'timestamp': '2025-10-02 00:24:07.530112', 'step': 7654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:07.584993', 'step': 7654, 'epoch': 1}
{'type': 'loss', 'content': 0.06797622889280319, 'timestamp': '2025-10-02 00:24:07.594499', 'step': 7655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:07.649010', 'step': 7655, 'epoch': 1}
{'type': 'loss', 'content': 0.03950631618499756, 'timestamp': '2025-10-02 00:24:07.654834', 'step': 7656, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:24:35.499844', 'step': 7656, 'epoch': 1}
{'type': 'pplx', 'content': 91.5061385435528, 'timestamp': '2025-10-02 00:24:35.509335', 'step': 7656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:35.573257', 'step': 7656, 'epoch': 1}
{'type': 'loss', 'content': 0.12418229132890701, 'timestamp': '2025-10-02 00:24:35.576405', 'step': 7657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:35.636283', 'step': 7657, 'epoch': 1}
{'type': 'loss', 'content': 0.03658121079206467, 'timestamp': '2025-10-02 00:24:35.644009', 'step': 7658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:35.716046', 'step': 7658, 'epoch': 1}
{'type': 'loss', 'content': 0.049828700721263885, 'timestamp': '2025-10-02 00:24:35.721584', 'step': 7659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:35.797581', 'step': 7659, 'epoch': 1}
{'type': 'loss', 'content': 0.13684436678886414, 'timestamp': '2025-10-02 00:24:35.810167', 'step': 7660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:35.880584', 'step': 7660, 'epoch': 1}
{'type': 'loss', 'content': 0.09430509060621262, 'timestamp': '2025-10-02 00:24:35.891459', 'step': 7661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:35.967028', 'step': 7661, 'epoch': 1}
{'type': 'loss', 'content': 0.14231541752815247, 'timestamp': '2025-10-02 00:24:35.975675', 'step': 7662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:36.046743', 'step': 7662, 'epoch': 1}
{'type': 'loss', 'content': 0.08658558130264282, 'timestamp': '2025-10-02 00:24:36.054820', 'step': 7663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:36.125166', 'step': 7663, 'epoch': 1}
{'type': 'loss', 'content': 0.10799544304609299, 'timestamp': '2025-10-02 00:24:36.138674', 'step': 7664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:36.213875', 'step': 7664, 'epoch': 1}
{'type': 'loss', 'content': 0.08108846843242645, 'timestamp': '2025-10-02 00:24:36.219346', 'step': 7665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:36.288450', 'step': 7665, 'epoch': 1}
{'type': 'loss', 'content': 0.12704893946647644, 'timestamp': '2025-10-02 00:24:36.296682', 'step': 7666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:36.377289', 'step': 7666, 'epoch': 1}
{'type': 'loss', 'content': 0.03863897919654846, 'timestamp': '2025-10-02 00:24:36.387823', 'step': 7667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:36.466742', 'step': 7667, 'epoch': 1}
{'type': 'loss', 'content': 0.11954309791326523, 'timestamp': '2025-10-02 00:24:36.474083', 'step': 7668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:36.541241', 'step': 7668, 'epoch': 1}
{'type': 'loss', 'content': 0.07376989722251892, 'timestamp': '2025-10-02 00:24:36.548250', 'step': 7669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:24:36.649291', 'step': 7669, 'epoch': 1}
{'type': 'loss', 'content': 0.01717112772166729, 'timestamp': '2025-10-02 00:24:36.659997', 'step': 7670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:36.722694', 'step': 7670, 'epoch': 1}
{'type': 'loss', 'content': 0.10624615103006363, 'timestamp': '2025-10-02 00:24:36.726238', 'step': 7671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:36.789656', 'step': 7671, 'epoch': 1}
{'type': 'loss', 'content': 0.10928061604499817, 'timestamp': '2025-10-02 00:24:36.798348', 'step': 7672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:24:36.868895', 'step': 7672, 'epoch': 1}
{'type': 'loss', 'content': 0.026855526491999626, 'timestamp': '2025-10-02 00:24:36.880232', 'step': 7673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:36.943648', 'step': 7673, 'epoch': 1}
{'type': 'loss', 'content': 0.0485890693962574, 'timestamp': '2025-10-02 00:24:36.952371', 'step': 7674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:37.016311', 'step': 7674, 'epoch': 1}
{'type': 'loss', 'content': 0.15576285123825073, 'timestamp': '2025-10-02 00:24:37.023552', 'step': 7675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:37.084595', 'step': 7675, 'epoch': 1}
{'type': 'loss', 'content': 0.09609853476285934, 'timestamp': '2025-10-02 00:24:37.092419', 'step': 7676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:37.159346', 'step': 7676, 'epoch': 1}
{'type': 'loss', 'content': 0.12000508606433868, 'timestamp': '2025-10-02 00:24:37.166230', 'step': 7677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:37.234143', 'step': 7677, 'epoch': 1}
{'type': 'loss', 'content': 0.1113688051700592, 'timestamp': '2025-10-02 00:24:37.240761', 'step': 7678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:37.306939', 'step': 7678, 'epoch': 1}
{'type': 'loss', 'content': 0.14648401737213135, 'timestamp': '2025-10-02 00:24:37.310200', 'step': 7679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:37.376737', 'step': 7679, 'epoch': 1}
{'type': 'loss', 'content': 0.03221724182367325, 'timestamp': '2025-10-02 00:24:37.383321', 'step': 7680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:24:37.468461', 'step': 7680, 'epoch': 1}
{'type': 'loss', 'content': 0.0316200889647007, 'timestamp': '2025-10-02 00:24:37.482736', 'step': 7681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:37.549673', 'step': 7681, 'epoch': 1}
{'type': 'loss', 'content': 0.16461682319641113, 'timestamp': '2025-10-02 00:24:37.556476', 'step': 7682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:37.623008', 'step': 7682, 'epoch': 1}
{'type': 'loss', 'content': 0.18110068142414093, 'timestamp': '2025-10-02 00:24:37.631522', 'step': 7683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:37.701048', 'step': 7683, 'epoch': 1}
{'type': 'loss', 'content': 0.05513954535126686, 'timestamp': '2025-10-02 00:24:37.711920', 'step': 7684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:37.782869', 'step': 7684, 'epoch': 1}
{'type': 'loss', 'content': 0.04245566949248314, 'timestamp': '2025-10-02 00:24:37.788740', 'step': 7685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:37.849757', 'step': 7685, 'epoch': 1}
{'type': 'loss', 'content': 0.18296192586421967, 'timestamp': '2025-10-02 00:24:37.852955', 'step': 7686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:37.922246', 'step': 7686, 'epoch': 1}
{'type': 'loss', 'content': 0.102960504591465, 'timestamp': '2025-10-02 00:24:37.928345', 'step': 7687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:37.993889', 'step': 7687, 'epoch': 1}
{'type': 'loss', 'content': 0.033907484263181686, 'timestamp': '2025-10-02 00:24:38.006363', 'step': 7688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:38.071213', 'step': 7688, 'epoch': 1}
{'type': 'loss', 'content': 0.07248128950595856, 'timestamp': '2025-10-02 00:24:38.080236', 'step': 7689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:38.152488', 'step': 7689, 'epoch': 1}
{'type': 'loss', 'content': 0.03395653888583183, 'timestamp': '2025-10-02 00:24:38.162727', 'step': 7690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:38.237772', 'step': 7690, 'epoch': 1}
{'type': 'loss', 'content': 0.13238833844661713, 'timestamp': '2025-10-02 00:24:38.244908', 'step': 7691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:38.316640', 'step': 7691, 'epoch': 1}
{'type': 'loss', 'content': 0.02944399043917656, 'timestamp': '2025-10-02 00:24:38.328372', 'step': 7692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:38.396014', 'step': 7692, 'epoch': 1}
{'type': 'loss', 'content': 0.08554218709468842, 'timestamp': '2025-10-02 00:24:38.403526', 'step': 7693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:38.481375', 'step': 7693, 'epoch': 1}
{'type': 'loss', 'content': 0.1877897083759308, 'timestamp': '2025-10-02 00:24:38.488399', 'step': 7694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:38.558286', 'step': 7694, 'epoch': 1}
{'type': 'loss', 'content': 0.281220942735672, 'timestamp': '2025-10-02 00:24:38.565577', 'step': 7695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:38.640986', 'step': 7695, 'epoch': 1}
{'type': 'loss', 'content': 0.07745613902807236, 'timestamp': '2025-10-02 00:24:38.655073', 'step': 7696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:24:38.732175', 'step': 7696, 'epoch': 1}
{'type': 'loss', 'content': 0.017967848107218742, 'timestamp': '2025-10-02 00:24:38.743479', 'step': 7697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:38.815033', 'step': 7697, 'epoch': 1}
{'type': 'loss', 'content': 0.16431912779808044, 'timestamp': '2025-10-02 00:24:38.820584', 'step': 7698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:38.887368', 'step': 7698, 'epoch': 1}
{'type': 'loss', 'content': 0.03881106898188591, 'timestamp': '2025-10-02 00:24:38.898472', 'step': 7699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:38.972325', 'step': 7699, 'epoch': 1}
{'type': 'loss', 'content': 0.09925920516252518, 'timestamp': '2025-10-02 00:24:38.982310', 'step': 7700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:39.057569', 'step': 7700, 'epoch': 1}
{'type': 'loss', 'content': 0.09519826620817184, 'timestamp': '2025-10-02 00:24:39.064627', 'step': 7701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:39.139501', 'step': 7701, 'epoch': 1}
{'type': 'loss', 'content': 0.19171707332134247, 'timestamp': '2025-10-02 00:24:39.146438', 'step': 7702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:24:39.223834', 'step': 7702, 'epoch': 1}
{'type': 'loss', 'content': 0.07739657908678055, 'timestamp': '2025-10-02 00:24:39.234417', 'step': 7703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:24:39.302073', 'step': 7703, 'epoch': 1}
{'type': 'loss', 'content': 0.059966202825307846, 'timestamp': '2025-10-02 00:24:39.313587', 'step': 7704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:39.408729', 'step': 7704, 'epoch': 1}
{'type': 'loss', 'content': 0.053882475942373276, 'timestamp': '2025-10-02 00:24:39.419559', 'step': 7705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:39.496605', 'step': 7705, 'epoch': 1}
{'type': 'loss', 'content': 0.10018333792686462, 'timestamp': '2025-10-02 00:24:39.510101', 'step': 7706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:39.582965', 'step': 7706, 'epoch': 1}
{'type': 'loss', 'content': 0.07478790730237961, 'timestamp': '2025-10-02 00:24:39.588180', 'step': 7707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:39.656902', 'step': 7707, 'epoch': 1}
{'type': 'loss', 'content': 0.12496241182088852, 'timestamp': '2025-10-02 00:24:39.662977', 'step': 7708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:39.717469', 'step': 7708, 'epoch': 1}
{'type': 'loss', 'content': 0.01924808882176876, 'timestamp': '2025-10-02 00:24:39.727018', 'step': 7709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:39.788092', 'step': 7709, 'epoch': 1}
{'type': 'loss', 'content': 0.07950630784034729, 'timestamp': '2025-10-02 00:24:39.798480', 'step': 7710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:39.854339', 'step': 7710, 'epoch': 1}
{'type': 'loss', 'content': 0.06223994120955467, 'timestamp': '2025-10-02 00:24:39.857093', 'step': 7711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:39.914058', 'step': 7711, 'epoch': 1}
{'type': 'loss', 'content': 0.2412572056055069, 'timestamp': '2025-10-02 00:24:39.920825', 'step': 7712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:39.977154', 'step': 7712, 'epoch': 1}
{'type': 'loss', 'content': 0.08457642048597336, 'timestamp': '2025-10-02 00:24:39.985887', 'step': 7713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:40.044237', 'step': 7713, 'epoch': 1}
{'type': 'loss', 'content': 0.0326569564640522, 'timestamp': '2025-10-02 00:24:40.047831', 'step': 7714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:40.104916', 'step': 7714, 'epoch': 1}
{'type': 'loss', 'content': 0.1916186511516571, 'timestamp': '2025-10-02 00:24:40.108191', 'step': 7715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:40.170116', 'step': 7715, 'epoch': 1}
{'type': 'loss', 'content': 0.04927612841129303, 'timestamp': '2025-10-02 00:24:40.181068', 'step': 7716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:40.236125', 'step': 7716, 'epoch': 1}
{'type': 'loss', 'content': 0.10800977051258087, 'timestamp': '2025-10-02 00:24:40.238638', 'step': 7717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:40.294691', 'step': 7717, 'epoch': 1}
{'type': 'loss', 'content': 0.1264939159154892, 'timestamp': '2025-10-02 00:24:40.297962', 'step': 7718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:40.354354', 'step': 7718, 'epoch': 1}
{'type': 'loss', 'content': 0.1153770461678505, 'timestamp': '2025-10-02 00:24:40.357144', 'step': 7719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:40.412703', 'step': 7719, 'epoch': 1}
{'type': 'loss', 'content': 0.19121311604976654, 'timestamp': '2025-10-02 00:24:40.420447', 'step': 7720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:40.475842', 'step': 7720, 'epoch': 1}
{'type': 'loss', 'content': 0.19064933061599731, 'timestamp': '2025-10-02 00:24:40.478189', 'step': 7721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:40.532234', 'step': 7721, 'epoch': 1}
{'type': 'loss', 'content': 0.07775754481554031, 'timestamp': '2025-10-02 00:24:40.535378', 'step': 7722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:40.592754', 'step': 7722, 'epoch': 1}
{'type': 'loss', 'content': 0.029620729386806488, 'timestamp': '2025-10-02 00:24:40.595777', 'step': 7723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:40.652385', 'step': 7723, 'epoch': 1}
{'type': 'loss', 'content': 0.08734861761331558, 'timestamp': '2025-10-02 00:24:40.659031', 'step': 7724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:40.716608', 'step': 7724, 'epoch': 1}
{'type': 'loss', 'content': 0.07371543347835541, 'timestamp': '2025-10-02 00:24:40.720350', 'step': 7725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:40.777214', 'step': 7725, 'epoch': 1}
{'type': 'loss', 'content': 0.05596964806318283, 'timestamp': '2025-10-02 00:24:40.783819', 'step': 7726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:40.840263', 'step': 7726, 'epoch': 1}
{'type': 'loss', 'content': 0.1677202731370926, 'timestamp': '2025-10-02 00:24:40.843523', 'step': 7727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:40.900827', 'step': 7727, 'epoch': 1}
{'type': 'loss', 'content': 0.042099181562662125, 'timestamp': '2025-10-02 00:24:40.907030', 'step': 7728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:40.966907', 'step': 7728, 'epoch': 1}
{'type': 'loss', 'content': 0.01007701363414526, 'timestamp': '2025-10-02 00:24:40.977903', 'step': 7729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:41.035107', 'step': 7729, 'epoch': 1}
{'type': 'loss', 'content': 0.057915788143873215, 'timestamp': '2025-10-02 00:24:41.037690', 'step': 7730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:41.093260', 'step': 7730, 'epoch': 1}
{'type': 'loss', 'content': 0.16959160566329956, 'timestamp': '2025-10-02 00:24:41.096632', 'step': 7731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:41.157155', 'step': 7731, 'epoch': 1}
{'type': 'loss', 'content': 0.03889774903655052, 'timestamp': '2025-10-02 00:24:41.168140', 'step': 7732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:41.223857', 'step': 7732, 'epoch': 1}
{'type': 'loss', 'content': 0.14806914329528809, 'timestamp': '2025-10-02 00:24:41.227673', 'step': 7733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:41.283203', 'step': 7733, 'epoch': 1}
{'type': 'loss', 'content': 0.0583956241607666, 'timestamp': '2025-10-02 00:24:41.292525', 'step': 7734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:41.354133', 'step': 7734, 'epoch': 1}
{'type': 'loss', 'content': 0.011884966865181923, 'timestamp': '2025-10-02 00:24:41.364293', 'step': 7735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:24:41.428858', 'step': 7735, 'epoch': 1}
{'type': 'loss', 'content': 0.024482466280460358, 'timestamp': '2025-10-02 00:24:41.440314', 'step': 7736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:41.495770', 'step': 7736, 'epoch': 1}
{'type': 'loss', 'content': 0.10869026929140091, 'timestamp': '2025-10-02 00:24:41.499133', 'step': 7737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:41.555112', 'step': 7737, 'epoch': 1}
{'type': 'loss', 'content': 0.05845733731985092, 'timestamp': '2025-10-02 00:24:41.561013', 'step': 7738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:41.618214', 'step': 7738, 'epoch': 1}
{'type': 'loss', 'content': 0.021031323820352554, 'timestamp': '2025-10-02 00:24:41.623974', 'step': 7739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:41.680211', 'step': 7739, 'epoch': 1}
{'type': 'loss', 'content': 0.04728807136416435, 'timestamp': '2025-10-02 00:24:41.690353', 'step': 7740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:41.744770', 'step': 7740, 'epoch': 1}
{'type': 'loss', 'content': 0.030643964186310768, 'timestamp': '2025-10-02 00:24:41.747192', 'step': 7741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:41.800687', 'step': 7741, 'epoch': 1}
{'type': 'loss', 'content': 0.22946766018867493, 'timestamp': '2025-10-02 00:24:41.803371', 'step': 7742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:41.857845', 'step': 7742, 'epoch': 1}
{'type': 'loss', 'content': 0.028749650344252586, 'timestamp': '2025-10-02 00:24:41.865421', 'step': 7743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:41.919774', 'step': 7743, 'epoch': 1}
{'type': 'loss', 'content': 0.025551946833729744, 'timestamp': '2025-10-02 00:24:41.925595', 'step': 7744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:41.979137', 'step': 7744, 'epoch': 1}
{'type': 'loss', 'content': 0.07680691033601761, 'timestamp': '2025-10-02 00:24:41.982083', 'step': 7745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:42.040557', 'step': 7745, 'epoch': 1}
{'type': 'loss', 'content': 0.10068803280591965, 'timestamp': '2025-10-02 00:24:42.050733', 'step': 7746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:42.104780', 'step': 7746, 'epoch': 1}
{'type': 'loss', 'content': 0.07337725162506104, 'timestamp': '2025-10-02 00:24:42.110519', 'step': 7747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:42.164428', 'step': 7747, 'epoch': 1}
{'type': 'loss', 'content': 0.15231944620609283, 'timestamp': '2025-10-02 00:24:42.170659', 'step': 7748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:42.224765', 'step': 7748, 'epoch': 1}
{'type': 'loss', 'content': 0.06101543828845024, 'timestamp': '2025-10-02 00:24:42.230641', 'step': 7749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:42.285060', 'step': 7749, 'epoch': 1}
{'type': 'loss', 'content': 0.1541423350572586, 'timestamp': '2025-10-02 00:24:42.287559', 'step': 7750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:42.342114', 'step': 7750, 'epoch': 1}
{'type': 'loss', 'content': 0.11766978353261948, 'timestamp': '2025-10-02 00:24:42.344505', 'step': 7751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:42.398308', 'step': 7751, 'epoch': 1}
{'type': 'loss', 'content': 0.06688589602708817, 'timestamp': '2025-10-02 00:24:42.404080', 'step': 7752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:42.458108', 'step': 7752, 'epoch': 1}
{'type': 'loss', 'content': 0.07834106683731079, 'timestamp': '2025-10-02 00:24:42.460374', 'step': 7753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:42.514115', 'step': 7753, 'epoch': 1}
{'type': 'loss', 'content': 0.04835225269198418, 'timestamp': '2025-10-02 00:24:42.516295', 'step': 7754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:42.570400', 'step': 7754, 'epoch': 1}
{'type': 'loss', 'content': 0.10145164281129837, 'timestamp': '2025-10-02 00:24:42.572710', 'step': 7755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:42.626893', 'step': 7755, 'epoch': 1}
{'type': 'loss', 'content': 0.04773207753896713, 'timestamp': '2025-10-02 00:24:42.633162', 'step': 7756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:42.686709', 'step': 7756, 'epoch': 1}
{'type': 'loss', 'content': 0.04366576671600342, 'timestamp': '2025-10-02 00:24:42.692658', 'step': 7757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:42.746661', 'step': 7757, 'epoch': 1}
{'type': 'loss', 'content': 0.047068867832422256, 'timestamp': '2025-10-02 00:24:42.749541', 'step': 7758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:42.804112', 'step': 7758, 'epoch': 1}
{'type': 'loss', 'content': 0.058312393724918365, 'timestamp': '2025-10-02 00:24:42.811469', 'step': 7759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:42.866929', 'step': 7759, 'epoch': 1}
{'type': 'loss', 'content': 0.15250684320926666, 'timestamp': '2025-10-02 00:24:42.872794', 'step': 7760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:42.926250', 'step': 7760, 'epoch': 1}
{'type': 'loss', 'content': 0.12399110198020935, 'timestamp': '2025-10-02 00:24:42.929013', 'step': 7761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:42.984060', 'step': 7761, 'epoch': 1}
{'type': 'loss', 'content': 0.08286971598863602, 'timestamp': '2025-10-02 00:24:42.986416', 'step': 7762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:43.041648', 'step': 7762, 'epoch': 1}
{'type': 'loss', 'content': 0.04674790799617767, 'timestamp': '2025-10-02 00:24:43.047251', 'step': 7763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:43.102316', 'step': 7763, 'epoch': 1}
{'type': 'loss', 'content': 0.1607709676027298, 'timestamp': '2025-10-02 00:24:43.108363', 'step': 7764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:43.162257', 'step': 7764, 'epoch': 1}
{'type': 'loss', 'content': 0.0935969352722168, 'timestamp': '2025-10-02 00:24:43.164829', 'step': 7765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:43.219863', 'step': 7765, 'epoch': 1}
{'type': 'loss', 'content': 0.02919885329902172, 'timestamp': '2025-10-02 00:24:43.222346', 'step': 7766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:43.276750', 'step': 7766, 'epoch': 1}
{'type': 'loss', 'content': 0.07231457531452179, 'timestamp': '2025-10-02 00:24:43.286067', 'step': 7767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:43.343361', 'step': 7767, 'epoch': 1}
{'type': 'loss', 'content': 0.0748758539557457, 'timestamp': '2025-10-02 00:24:43.348953', 'step': 7768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:43.404321', 'step': 7768, 'epoch': 1}
{'type': 'loss', 'content': 0.13140837848186493, 'timestamp': '2025-10-02 00:24:43.406748', 'step': 7769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:43.461588', 'step': 7769, 'epoch': 1}
{'type': 'loss', 'content': 0.20231404900550842, 'timestamp': '2025-10-02 00:24:43.464416', 'step': 7770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:43.520965', 'step': 7770, 'epoch': 1}
{'type': 'loss', 'content': 0.04398361220955849, 'timestamp': '2025-10-02 00:24:43.530502', 'step': 7771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:43.584733', 'step': 7771, 'epoch': 1}
{'type': 'loss', 'content': 0.0648784339427948, 'timestamp': '2025-10-02 00:24:43.590776', 'step': 7772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:43.645029', 'step': 7772, 'epoch': 1}
{'type': 'loss', 'content': 0.01445599365979433, 'timestamp': '2025-10-02 00:24:43.652550', 'step': 7773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:43.706890', 'step': 7773, 'epoch': 1}
{'type': 'loss', 'content': 0.15025289356708527, 'timestamp': '2025-10-02 00:24:43.709511', 'step': 7774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:43.764133', 'step': 7774, 'epoch': 1}
{'type': 'loss', 'content': 0.1651594489812851, 'timestamp': '2025-10-02 00:24:43.766730', 'step': 7775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:43.820830', 'step': 7775, 'epoch': 1}
{'type': 'loss', 'content': 0.16502246260643005, 'timestamp': '2025-10-02 00:24:43.826870', 'step': 7776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:43.880643', 'step': 7776, 'epoch': 1}
{'type': 'loss', 'content': 0.038418564945459366, 'timestamp': '2025-10-02 00:24:43.883146', 'step': 7777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:43.937300', 'step': 7777, 'epoch': 1}
{'type': 'loss', 'content': 0.03939194604754448, 'timestamp': '2025-10-02 00:24:43.939676', 'step': 7778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 00:24:44.020727', 'step': 7778, 'epoch': 1}
{'type': 'loss', 'content': 0.03437560424208641, 'timestamp': '2025-10-02 00:24:44.035511', 'step': 7779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:44.090701', 'step': 7779, 'epoch': 1}
{'type': 'loss', 'content': 0.013580326922237873, 'timestamp': '2025-10-02 00:24:44.096781', 'step': 7780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:44.150492', 'step': 7780, 'epoch': 1}
{'type': 'loss', 'content': 0.06116550788283348, 'timestamp': '2025-10-02 00:24:44.157975', 'step': 7781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:44.213158', 'step': 7781, 'epoch': 1}
{'type': 'loss', 'content': 0.08077162504196167, 'timestamp': '2025-10-02 00:24:44.222504', 'step': 7782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:44.279116', 'step': 7782, 'epoch': 1}
{'type': 'loss', 'content': 0.0988621860742569, 'timestamp': '2025-10-02 00:24:44.281827', 'step': 7783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:24:44.345725', 'step': 7783, 'epoch': 1}
{'type': 'loss', 'content': 0.03957168757915497, 'timestamp': '2025-10-02 00:24:44.357119', 'step': 7784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:44.411392', 'step': 7784, 'epoch': 1}
{'type': 'loss', 'content': 0.042177919298410416, 'timestamp': '2025-10-02 00:24:44.418916', 'step': 7785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:44.473130', 'step': 7785, 'epoch': 1}
{'type': 'loss', 'content': 0.1314399391412735, 'timestamp': '2025-10-02 00:24:44.475688', 'step': 7786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:44.530384', 'step': 7786, 'epoch': 1}
{'type': 'loss', 'content': 0.07459401339292526, 'timestamp': '2025-10-02 00:24:44.537885', 'step': 7787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:24:44.607527', 'step': 7787, 'epoch': 1}
{'type': 'loss', 'content': 0.048720188438892365, 'timestamp': '2025-10-02 00:24:44.620729', 'step': 7788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:44.675175', 'step': 7788, 'epoch': 1}
{'type': 'loss', 'content': 0.024991752579808235, 'timestamp': '2025-10-02 00:24:44.677775', 'step': 7789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:44.732182', 'step': 7789, 'epoch': 1}
{'type': 'loss', 'content': 0.1193917840719223, 'timestamp': '2025-10-02 00:24:44.737992', 'step': 7790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:44.793812', 'step': 7790, 'epoch': 1}
{'type': 'loss', 'content': 0.13890354335308075, 'timestamp': '2025-10-02 00:24:44.796394', 'step': 7791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:24:44.865594', 'step': 7791, 'epoch': 1}
{'type': 'loss', 'content': 0.03520806133747101, 'timestamp': '2025-10-02 00:24:44.878676', 'step': 7792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:24:44.940470', 'step': 7792, 'epoch': 1}
{'type': 'loss', 'content': 0.08428291976451874, 'timestamp': '2025-10-02 00:24:44.952188', 'step': 7793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:45.007633', 'step': 7793, 'epoch': 1}
{'type': 'loss', 'content': 0.03881092369556427, 'timestamp': '2025-10-02 00:24:45.010050', 'step': 7794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:45.063778', 'step': 7794, 'epoch': 1}
{'type': 'loss', 'content': 0.1230519637465477, 'timestamp': '2025-10-02 00:24:45.066132', 'step': 7795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:45.120782', 'step': 7795, 'epoch': 1}
{'type': 'loss', 'content': 0.13597965240478516, 'timestamp': '2025-10-02 00:24:45.126874', 'step': 7796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:45.179498', 'step': 7796, 'epoch': 1}
{'type': 'loss', 'content': 0.1733008474111557, 'timestamp': '2025-10-02 00:24:45.182418', 'step': 7797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:24:45.235829', 'step': 7797, 'epoch': 1}
{'type': 'loss', 'content': 0.22134506702423096, 'timestamp': '2025-10-02 00:24:45.238137', 'step': 7798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:45.291684', 'step': 7798, 'epoch': 1}
{'type': 'loss', 'content': 0.15411104261875153, 'timestamp': '2025-10-02 00:24:45.294296', 'step': 7799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:45.348031', 'step': 7799, 'epoch': 1}
{'type': 'loss', 'content': 0.11646468192338943, 'timestamp': '2025-10-02 00:24:45.353780', 'step': 7800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:45.407478', 'step': 7800, 'epoch': 1}
{'type': 'loss', 'content': 0.04897311329841614, 'timestamp': '2025-10-02 00:24:45.413257', 'step': 7801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:45.466822', 'step': 7801, 'epoch': 1}
{'type': 'loss', 'content': 0.16323649883270264, 'timestamp': '2025-10-02 00:24:45.469383', 'step': 7802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:45.523445', 'step': 7802, 'epoch': 1}
{'type': 'loss', 'content': 0.11676989495754242, 'timestamp': '2025-10-02 00:24:45.526117', 'step': 7803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:45.580488', 'step': 7803, 'epoch': 1}
{'type': 'loss', 'content': 0.05292433127760887, 'timestamp': '2025-10-02 00:24:45.588308', 'step': 7804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:45.641456', 'step': 7804, 'epoch': 1}
{'type': 'loss', 'content': 0.09309301525354385, 'timestamp': '2025-10-02 00:24:45.647197', 'step': 7805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:45.701447', 'step': 7805, 'epoch': 1}
{'type': 'loss', 'content': 0.06001032143831253, 'timestamp': '2025-10-02 00:24:45.703707', 'step': 7806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:45.757716', 'step': 7806, 'epoch': 1}
{'type': 'loss', 'content': 0.21278826892375946, 'timestamp': '2025-10-02 00:24:45.760049', 'step': 7807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:45.814666', 'step': 7807, 'epoch': 1}
{'type': 'loss', 'content': 0.058348365128040314, 'timestamp': '2025-10-02 00:24:45.823003', 'step': 7808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:45.876759', 'step': 7808, 'epoch': 1}
{'type': 'loss', 'content': 0.10764527320861816, 'timestamp': '2025-10-02 00:24:45.879281', 'step': 7809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:45.932401', 'step': 7809, 'epoch': 1}
{'type': 'loss', 'content': 0.1974503993988037, 'timestamp': '2025-10-02 00:24:45.934961', 'step': 7810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:45.989488', 'step': 7810, 'epoch': 1}
{'type': 'loss', 'content': 0.07997681945562363, 'timestamp': '2025-10-02 00:24:45.991924', 'step': 7811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:46.045507', 'step': 7811, 'epoch': 1}
{'type': 'loss', 'content': 0.05288376286625862, 'timestamp': '2025-10-02 00:24:46.053908', 'step': 7812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:46.107186', 'step': 7812, 'epoch': 1}
{'type': 'loss', 'content': 0.05770343169569969, 'timestamp': '2025-10-02 00:24:46.109742', 'step': 7813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:46.164192', 'step': 7813, 'epoch': 1}
{'type': 'loss', 'content': 0.02345564402639866, 'timestamp': '2025-10-02 00:24:46.166590', 'step': 7814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:46.220345', 'step': 7814, 'epoch': 1}
{'type': 'loss', 'content': 0.29391539096832275, 'timestamp': '2025-10-02 00:24:46.222871', 'step': 7815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:46.277777', 'step': 7815, 'epoch': 1}
{'type': 'loss', 'content': 0.07585080713033676, 'timestamp': '2025-10-02 00:24:46.283713', 'step': 7816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:46.337894', 'step': 7816, 'epoch': 1}
{'type': 'loss', 'content': 0.049702487885951996, 'timestamp': '2025-10-02 00:24:46.343720', 'step': 7817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:46.398599', 'step': 7817, 'epoch': 1}
{'type': 'loss', 'content': 0.10747528076171875, 'timestamp': '2025-10-02 00:24:46.400892', 'step': 7818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:46.454722', 'step': 7818, 'epoch': 1}
{'type': 'loss', 'content': 0.17376939952373505, 'timestamp': '2025-10-02 00:24:46.457317', 'step': 7819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:46.511433', 'step': 7819, 'epoch': 1}
{'type': 'loss', 'content': 0.14123979210853577, 'timestamp': '2025-10-02 00:24:46.517287', 'step': 7820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:46.582280', 'step': 7820, 'epoch': 1}
{'type': 'loss', 'content': 0.03598605841398239, 'timestamp': '2025-10-02 00:24:46.592583', 'step': 7821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:46.648668', 'step': 7821, 'epoch': 1}
{'type': 'loss', 'content': 0.00974479503929615, 'timestamp': '2025-10-02 00:24:46.658050', 'step': 7822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:46.712774', 'step': 7822, 'epoch': 1}
{'type': 'loss', 'content': 0.06445502489805222, 'timestamp': '2025-10-02 00:24:46.715484', 'step': 7823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:46.769054', 'step': 7823, 'epoch': 1}
{'type': 'loss', 'content': 0.08956687152385712, 'timestamp': '2025-10-02 00:24:46.774807', 'step': 7824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:46.828522', 'step': 7824, 'epoch': 1}
{'type': 'loss', 'content': 0.03614310547709465, 'timestamp': '2025-10-02 00:24:46.837806', 'step': 7825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:46.892307', 'step': 7825, 'epoch': 1}
{'type': 'loss', 'content': 0.03626430034637451, 'timestamp': '2025-10-02 00:24:46.895043', 'step': 7826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:46.950116', 'step': 7826, 'epoch': 1}
{'type': 'loss', 'content': 0.07424821704626083, 'timestamp': '2025-10-02 00:24:46.952492', 'step': 7827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:47.006107', 'step': 7827, 'epoch': 1}
{'type': 'loss', 'content': 0.11656005680561066, 'timestamp': '2025-10-02 00:24:47.012498', 'step': 7828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:47.065996', 'step': 7828, 'epoch': 1}
{'type': 'loss', 'content': 0.046516429632902145, 'timestamp': '2025-10-02 00:24:47.068395', 'step': 7829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:47.121747', 'step': 7829, 'epoch': 1}
{'type': 'loss', 'content': 0.13030628859996796, 'timestamp': '2025-10-02 00:24:47.124321', 'step': 7830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:47.179656', 'step': 7830, 'epoch': 1}
{'type': 'loss', 'content': 0.01892870105803013, 'timestamp': '2025-10-02 00:24:47.189219', 'step': 7831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:47.243813', 'step': 7831, 'epoch': 1}
{'type': 'loss', 'content': 0.10845628380775452, 'timestamp': '2025-10-02 00:24:47.250957', 'step': 7832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:47.304791', 'step': 7832, 'epoch': 1}
{'type': 'loss', 'content': 0.10156820714473724, 'timestamp': '2025-10-02 00:24:47.307376', 'step': 7833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:47.361489', 'step': 7833, 'epoch': 1}
{'type': 'loss', 'content': 0.049951352179050446, 'timestamp': '2025-10-02 00:24:47.363878', 'step': 7834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:47.418830', 'step': 7834, 'epoch': 1}
{'type': 'loss', 'content': 0.1043105274438858, 'timestamp': '2025-10-02 00:24:47.421475', 'step': 7835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:47.475482', 'step': 7835, 'epoch': 1}
{'type': 'loss', 'content': 0.1265149712562561, 'timestamp': '2025-10-02 00:24:47.481514', 'step': 7836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:47.534812', 'step': 7836, 'epoch': 1}
{'type': 'loss', 'content': 0.09270769357681274, 'timestamp': '2025-10-02 00:24:47.537065', 'step': 7837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:47.593313', 'step': 7837, 'epoch': 1}
{'type': 'loss', 'content': 0.036169782280921936, 'timestamp': '2025-10-02 00:24:47.602873', 'step': 7838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:47.658194', 'step': 7838, 'epoch': 1}
{'type': 'loss', 'content': 0.05318192020058632, 'timestamp': '2025-10-02 00:24:47.667703', 'step': 7839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:47.722717', 'step': 7839, 'epoch': 1}
{'type': 'loss', 'content': 0.07461975514888763, 'timestamp': '2025-10-02 00:24:47.728610', 'step': 7840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:47.782136', 'step': 7840, 'epoch': 1}
{'type': 'loss', 'content': 0.12487350404262543, 'timestamp': '2025-10-02 00:24:47.784878', 'step': 7841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:47.840284', 'step': 7841, 'epoch': 1}
{'type': 'loss', 'content': 0.04733125492930412, 'timestamp': '2025-10-02 00:24:47.849655', 'step': 7842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:47.903764', 'step': 7842, 'epoch': 1}
{'type': 'loss', 'content': 0.11831267923116684, 'timestamp': '2025-10-02 00:24:47.907351', 'step': 7843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:47.961723', 'step': 7843, 'epoch': 1}
{'type': 'loss', 'content': 0.10467198491096497, 'timestamp': '2025-10-02 00:24:47.967965', 'step': 7844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:48.022469', 'step': 7844, 'epoch': 1}
{'type': 'loss', 'content': 0.20173147320747375, 'timestamp': '2025-10-02 00:24:48.025111', 'step': 7845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:48.079382', 'step': 7845, 'epoch': 1}
{'type': 'loss', 'content': 0.04150122404098511, 'timestamp': '2025-10-02 00:24:48.082335', 'step': 7846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:48.138123', 'step': 7846, 'epoch': 1}
{'type': 'loss', 'content': 0.05909699574112892, 'timestamp': '2025-10-02 00:24:48.143908', 'step': 7847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:48.198692', 'step': 7847, 'epoch': 1}
{'type': 'loss', 'content': 0.2679196298122406, 'timestamp': '2025-10-02 00:24:48.204987', 'step': 7848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:48.259019', 'step': 7848, 'epoch': 1}
{'type': 'loss', 'content': 0.14906412363052368, 'timestamp': '2025-10-02 00:24:48.261342', 'step': 7849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:48.314840', 'step': 7849, 'epoch': 1}
{'type': 'loss', 'content': 0.17347024381160736, 'timestamp': '2025-10-02 00:24:48.318301', 'step': 7850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:48.372490', 'step': 7850, 'epoch': 1}
{'type': 'loss', 'content': 0.08770643174648285, 'timestamp': '2025-10-02 00:24:48.375018', 'step': 7851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:48.428879', 'step': 7851, 'epoch': 1}
{'type': 'loss', 'content': 0.08819972723722458, 'timestamp': '2025-10-02 00:24:48.435249', 'step': 7852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:48.495458', 'step': 7852, 'epoch': 1}
{'type': 'loss', 'content': 0.023100152611732483, 'timestamp': '2025-10-02 00:24:48.497583', 'step': 7853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:48.551198', 'step': 7853, 'epoch': 1}
{'type': 'loss', 'content': 0.05877809599041939, 'timestamp': '2025-10-02 00:24:48.557731', 'step': 7854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:48.611450', 'step': 7854, 'epoch': 1}
{'type': 'loss', 'content': 0.26531463861465454, 'timestamp': '2025-10-02 00:24:48.614105', 'step': 7855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:48.669268', 'step': 7855, 'epoch': 1}
{'type': 'loss', 'content': 0.01747167482972145, 'timestamp': '2025-10-02 00:24:48.679395', 'step': 7856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:48.732898', 'step': 7856, 'epoch': 1}
{'type': 'loss', 'content': 0.10451940447092056, 'timestamp': '2025-10-02 00:24:48.735558', 'step': 7857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:48.790357', 'step': 7857, 'epoch': 1}
{'type': 'loss', 'content': 0.03688840940594673, 'timestamp': '2025-10-02 00:24:48.792650', 'step': 7858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:24:48.854640', 'step': 7858, 'epoch': 1}
{'type': 'loss', 'content': 0.032856617122888565, 'timestamp': '2025-10-02 00:24:48.865459', 'step': 7859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:48.919487', 'step': 7859, 'epoch': 1}
{'type': 'loss', 'content': 0.08306974172592163, 'timestamp': '2025-10-02 00:24:48.925383', 'step': 7860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:48.980881', 'step': 7860, 'epoch': 1}
{'type': 'loss', 'content': 0.017685437574982643, 'timestamp': '2025-10-02 00:24:48.983884', 'step': 7861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:49.039792', 'step': 7861, 'epoch': 1}
{'type': 'loss', 'content': 0.06769512593746185, 'timestamp': '2025-10-02 00:24:49.043751', 'step': 7862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:49.099362', 'step': 7862, 'epoch': 1}
{'type': 'loss', 'content': 0.16353720426559448, 'timestamp': '2025-10-02 00:24:49.104838', 'step': 7863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:49.167051', 'step': 7863, 'epoch': 1}
{'type': 'loss', 'content': 0.07104316353797913, 'timestamp': '2025-10-02 00:24:49.173398', 'step': 7864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:49.233807', 'step': 7864, 'epoch': 1}
{'type': 'loss', 'content': 0.04462754726409912, 'timestamp': '2025-10-02 00:24:49.240579', 'step': 7865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:49.304838', 'step': 7865, 'epoch': 1}
{'type': 'loss', 'content': 0.13454526662826538, 'timestamp': '2025-10-02 00:24:49.308130', 'step': 7866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:49.374217', 'step': 7866, 'epoch': 1}
{'type': 'loss', 'content': 0.05581798031926155, 'timestamp': '2025-10-02 00:24:49.381620', 'step': 7867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:49.463649', 'step': 7867, 'epoch': 1}
{'type': 'loss', 'content': 0.14962126314640045, 'timestamp': '2025-10-02 00:24:49.480348', 'step': 7868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:49.539175', 'step': 7868, 'epoch': 1}
{'type': 'loss', 'content': 0.0708659365773201, 'timestamp': '2025-10-02 00:24:49.543482', 'step': 7869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:49.599047', 'step': 7869, 'epoch': 1}
{'type': 'loss', 'content': 0.05686106160283089, 'timestamp': '2025-10-02 00:24:49.610111', 'step': 7870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:49.674510', 'step': 7870, 'epoch': 1}
{'type': 'loss', 'content': 0.09401928633451462, 'timestamp': '2025-10-02 00:24:49.690098', 'step': 7871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:49.748367', 'step': 7871, 'epoch': 1}
{'type': 'loss', 'content': 0.03272588923573494, 'timestamp': '2025-10-02 00:24:49.755538', 'step': 7872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:49.810476', 'step': 7872, 'epoch': 1}
{'type': 'loss', 'content': 0.0677608773112297, 'timestamp': '2025-10-02 00:24:49.812914', 'step': 7873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:49.875351', 'step': 7873, 'epoch': 1}
{'type': 'loss', 'content': 0.018563419580459595, 'timestamp': '2025-10-02 00:24:49.885873', 'step': 7874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:49.943998', 'step': 7874, 'epoch': 1}
{'type': 'loss', 'content': 0.13609357178211212, 'timestamp': '2025-10-02 00:24:49.950015', 'step': 7875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:24:50.029727', 'step': 7875, 'epoch': 1}
{'type': 'loss', 'content': 0.04946691542863846, 'timestamp': '2025-10-02 00:24:50.043983', 'step': 7876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:24:50.116208', 'step': 7876, 'epoch': 1}
{'type': 'loss', 'content': 0.041570644825696945, 'timestamp': '2025-10-02 00:24:50.129469', 'step': 7877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:50.196333', 'step': 7877, 'epoch': 1}
{'type': 'loss', 'content': 0.10754747688770294, 'timestamp': '2025-10-02 00:24:50.201814', 'step': 7878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:50.274215', 'step': 7878, 'epoch': 1}
{'type': 'loss', 'content': 0.1797313243150711, 'timestamp': '2025-10-02 00:24:50.280915', 'step': 7879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:50.337339', 'step': 7879, 'epoch': 1}
{'type': 'loss', 'content': 0.04764879494905472, 'timestamp': '2025-10-02 00:24:50.343499', 'step': 7880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:50.400536', 'step': 7880, 'epoch': 1}
{'type': 'loss', 'content': 0.18938925862312317, 'timestamp': '2025-10-02 00:24:50.403386', 'step': 7881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:50.460109', 'step': 7881, 'epoch': 1}
{'type': 'loss', 'content': 0.18017169833183289, 'timestamp': '2025-10-02 00:24:50.462927', 'step': 7882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:50.520884', 'step': 7882, 'epoch': 1}
{'type': 'loss', 'content': 0.24902121722698212, 'timestamp': '2025-10-02 00:24:50.524002', 'step': 7883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:24:50.595443', 'step': 7883, 'epoch': 1}
{'type': 'loss', 'content': 0.03538019210100174, 'timestamp': '2025-10-02 00:24:50.608555', 'step': 7884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:50.664296', 'step': 7884, 'epoch': 1}
{'type': 'loss', 'content': 0.06856418401002884, 'timestamp': '2025-10-02 00:24:50.670251', 'step': 7885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:50.726039', 'step': 7885, 'epoch': 1}
{'type': 'loss', 'content': 0.11325099319219589, 'timestamp': '2025-10-02 00:24:50.729219', 'step': 7886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:50.785302', 'step': 7886, 'epoch': 1}
{'type': 'loss', 'content': 0.07206791639328003, 'timestamp': '2025-10-02 00:24:50.792882', 'step': 7887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:50.854460', 'step': 7887, 'epoch': 1}
{'type': 'loss', 'content': 0.10529274493455887, 'timestamp': '2025-10-02 00:24:50.865443', 'step': 7888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:50.921003', 'step': 7888, 'epoch': 1}
{'type': 'loss', 'content': 0.023784391582012177, 'timestamp': '2025-10-02 00:24:50.926657', 'step': 7889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:50.982601', 'step': 7889, 'epoch': 1}
{'type': 'loss', 'content': 0.06949771195650101, 'timestamp': '2025-10-02 00:24:50.985269', 'step': 7890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:51.046242', 'step': 7890, 'epoch': 1}
{'type': 'loss', 'content': 0.06268028169870377, 'timestamp': '2025-10-02 00:24:51.056709', 'step': 7891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:51.111208', 'step': 7891, 'epoch': 1}
{'type': 'loss', 'content': 0.102931909263134, 'timestamp': '2025-10-02 00:24:51.118074', 'step': 7892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:51.171756', 'step': 7892, 'epoch': 1}
{'type': 'loss', 'content': 0.0981035977602005, 'timestamp': '2025-10-02 00:24:51.174232', 'step': 7893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:51.235557', 'step': 7893, 'epoch': 1}
{'type': 'loss', 'content': 0.05430688336491585, 'timestamp': '2025-10-02 00:24:51.246012', 'step': 7894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:51.300761', 'step': 7894, 'epoch': 1}
{'type': 'loss', 'content': 0.15635353326797485, 'timestamp': '2025-10-02 00:24:51.306649', 'step': 7895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:51.361592', 'step': 7895, 'epoch': 1}
{'type': 'loss', 'content': 0.1048879399895668, 'timestamp': '2025-10-02 00:24:51.367602', 'step': 7896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:51.420693', 'step': 7896, 'epoch': 1}
{'type': 'loss', 'content': 0.1317441761493683, 'timestamp': '2025-10-02 00:24:51.423115', 'step': 7897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:51.477283', 'step': 7897, 'epoch': 1}
{'type': 'loss', 'content': 0.17325663566589355, 'timestamp': '2025-10-02 00:24:51.479528', 'step': 7898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:51.541058', 'step': 7898, 'epoch': 1}
{'type': 'loss', 'content': 0.02313583716750145, 'timestamp': '2025-10-02 00:24:51.551596', 'step': 7899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:51.613005', 'step': 7899, 'epoch': 1}
{'type': 'loss', 'content': 0.0299701988697052, 'timestamp': '2025-10-02 00:24:51.624281', 'step': 7900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:51.679170', 'step': 7900, 'epoch': 1}
{'type': 'loss', 'content': 0.06287911534309387, 'timestamp': '2025-10-02 00:24:51.686583', 'step': 7901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:51.745683', 'step': 7901, 'epoch': 1}
{'type': 'loss', 'content': 0.031804244965314865, 'timestamp': '2025-10-02 00:24:51.755896', 'step': 7902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:51.811491', 'step': 7902, 'epoch': 1}
{'type': 'loss', 'content': 0.20310676097869873, 'timestamp': '2025-10-02 00:24:51.813884', 'step': 7903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:24:51.876590', 'step': 7903, 'epoch': 1}
{'type': 'loss', 'content': 0.026075197383761406, 'timestamp': '2025-10-02 00:24:51.888209', 'step': 7904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:51.943082', 'step': 7904, 'epoch': 1}
{'type': 'loss', 'content': 0.09288766235113144, 'timestamp': '2025-10-02 00:24:51.945745', 'step': 7905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:52.000160', 'step': 7905, 'epoch': 1}
{'type': 'loss', 'content': 0.08056759089231491, 'timestamp': '2025-10-02 00:24:52.003494', 'step': 7906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:24:52.057238', 'step': 7906, 'epoch': 1}
{'type': 'loss', 'content': 0.04728147014975548, 'timestamp': '2025-10-02 00:24:52.059641', 'step': 7907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:52.113995', 'step': 7907, 'epoch': 1}
{'type': 'loss', 'content': 0.04484223574399948, 'timestamp': '2025-10-02 00:24:52.119877', 'step': 7908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:52.173322', 'step': 7908, 'epoch': 1}
{'type': 'loss', 'content': 0.0966510996222496, 'timestamp': '2025-10-02 00:24:52.179315', 'step': 7909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:52.233754', 'step': 7909, 'epoch': 1}
{'type': 'loss', 'content': 0.10936442017555237, 'timestamp': '2025-10-02 00:24:52.236400', 'step': 7910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:52.291883', 'step': 7910, 'epoch': 1}
{'type': 'loss', 'content': 0.06404653191566467, 'timestamp': '2025-10-02 00:24:52.294599', 'step': 7911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:52.349024', 'step': 7911, 'epoch': 1}
{'type': 'loss', 'content': 0.030523262917995453, 'timestamp': '2025-10-02 00:24:52.356456', 'step': 7912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:52.409842', 'step': 7912, 'epoch': 1}
{'type': 'loss', 'content': 0.06403793394565582, 'timestamp': '2025-10-02 00:24:52.417349', 'step': 7913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:52.476494', 'step': 7913, 'epoch': 1}
{'type': 'loss', 'content': 0.03305839002132416, 'timestamp': '2025-10-02 00:24:52.486674', 'step': 7914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:52.541488', 'step': 7914, 'epoch': 1}
{'type': 'loss', 'content': 0.05627639591693878, 'timestamp': '2025-10-02 00:24:52.549011', 'step': 7915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:52.604166', 'step': 7915, 'epoch': 1}
{'type': 'loss', 'content': 0.0243271104991436, 'timestamp': '2025-10-02 00:24:52.610367', 'step': 7916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:52.665007', 'step': 7916, 'epoch': 1}
{'type': 'loss', 'content': 0.028235457837581635, 'timestamp': '2025-10-02 00:24:52.672493', 'step': 7917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:52.727654', 'step': 7917, 'epoch': 1}
{'type': 'loss', 'content': 0.1052573099732399, 'timestamp': '2025-10-02 00:24:52.730213', 'step': 7918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:52.784693', 'step': 7918, 'epoch': 1}
{'type': 'loss', 'content': 0.047009024769067764, 'timestamp': '2025-10-02 00:24:52.787310', 'step': 7919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:52.842071', 'step': 7919, 'epoch': 1}
{'type': 'loss', 'content': 0.07314043492078781, 'timestamp': '2025-10-02 00:24:52.850200', 'step': 7920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:52.904028', 'step': 7920, 'epoch': 1}
{'type': 'loss', 'content': 0.13959528505802155, 'timestamp': '2025-10-02 00:24:52.907173', 'step': 7921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:52.961836', 'step': 7921, 'epoch': 1}
{'type': 'loss', 'content': 0.12609781324863434, 'timestamp': '2025-10-02 00:24:52.967375', 'step': 7922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:53.022264', 'step': 7922, 'epoch': 1}
{'type': 'loss', 'content': 0.0783165991306305, 'timestamp': '2025-10-02 00:24:53.024611', 'step': 7923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:24:53.086552', 'step': 7923, 'epoch': 1}
{'type': 'loss', 'content': 0.014159425161778927, 'timestamp': '2025-10-02 00:24:53.098010', 'step': 7924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:53.153278', 'step': 7924, 'epoch': 1}
{'type': 'loss', 'content': 0.0729144737124443, 'timestamp': '2025-10-02 00:24:53.155722', 'step': 7925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:53.209175', 'step': 7925, 'epoch': 1}
{'type': 'loss', 'content': 0.2193797528743744, 'timestamp': '2025-10-02 00:24:53.211445', 'step': 7926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:53.265137', 'step': 7926, 'epoch': 1}
{'type': 'loss', 'content': 0.12948711216449738, 'timestamp': '2025-10-02 00:24:53.267771', 'step': 7927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:53.321822', 'step': 7927, 'epoch': 1}
{'type': 'loss', 'content': 0.03454079478979111, 'timestamp': '2025-10-02 00:24:53.327800', 'step': 7928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:53.381424', 'step': 7928, 'epoch': 1}
{'type': 'loss', 'content': 0.046515751630067825, 'timestamp': '2025-10-02 00:24:53.388093', 'step': 7929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:53.452854', 'step': 7929, 'epoch': 1}
{'type': 'loss', 'content': 0.19706149399280548, 'timestamp': '2025-10-02 00:24:53.455312', 'step': 7930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:53.508821', 'step': 7930, 'epoch': 1}
{'type': 'loss', 'content': 0.24866047501564026, 'timestamp': '2025-10-02 00:24:53.511299', 'step': 7931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:53.565770', 'step': 7931, 'epoch': 1}
{'type': 'loss', 'content': 0.04703005030751228, 'timestamp': '2025-10-02 00:24:53.573988', 'step': 7932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:53.627637', 'step': 7932, 'epoch': 1}
{'type': 'loss', 'content': 0.05490957200527191, 'timestamp': '2025-10-02 00:24:53.630033', 'step': 7933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:53.684743', 'step': 7933, 'epoch': 1}
{'type': 'loss', 'content': 0.14571408927440643, 'timestamp': '2025-10-02 00:24:53.688138', 'step': 7934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:53.743604', 'step': 7934, 'epoch': 1}
{'type': 'loss', 'content': 0.140132874250412, 'timestamp': '2025-10-02 00:24:53.745800', 'step': 7935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:53.799640', 'step': 7935, 'epoch': 1}
{'type': 'loss', 'content': 0.11272231489419937, 'timestamp': '2025-10-02 00:24:53.807764', 'step': 7936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:53.861213', 'step': 7936, 'epoch': 1}
{'type': 'loss', 'content': 0.051886290311813354, 'timestamp': '2025-10-02 00:24:53.868687', 'step': 7937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:53.933404', 'step': 7937, 'epoch': 1}
{'type': 'loss', 'content': 0.08847185224294662, 'timestamp': '2025-10-02 00:24:53.943796', 'step': 7938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:24:54.011805', 'step': 7938, 'epoch': 1}
{'type': 'loss', 'content': 0.06649382412433624, 'timestamp': '2025-10-02 00:24:54.024145', 'step': 7939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:54.077881', 'step': 7939, 'epoch': 1}
{'type': 'loss', 'content': 0.1496499478816986, 'timestamp': '2025-10-02 00:24:54.085014', 'step': 7940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:54.138885', 'step': 7940, 'epoch': 1}
{'type': 'loss', 'content': 0.13815560936927795, 'timestamp': '2025-10-02 00:24:54.141354', 'step': 7941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:54.200657', 'step': 7941, 'epoch': 1}
{'type': 'loss', 'content': 0.02625296637415886, 'timestamp': '2025-10-02 00:24:54.210888', 'step': 7942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:54.265135', 'step': 7942, 'epoch': 1}
{'type': 'loss', 'content': 0.031151767820119858, 'timestamp': '2025-10-02 00:24:54.267625', 'step': 7943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:54.321941', 'step': 7943, 'epoch': 1}
{'type': 'loss', 'content': 0.04543357715010643, 'timestamp': '2025-10-02 00:24:54.327727', 'step': 7944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:54.385177', 'step': 7944, 'epoch': 1}
{'type': 'loss', 'content': 0.06512592732906342, 'timestamp': '2025-10-02 00:24:54.396195', 'step': 7945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:54.450591', 'step': 7945, 'epoch': 1}
{'type': 'loss', 'content': 0.043042682111263275, 'timestamp': '2025-10-02 00:24:54.452964', 'step': 7946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:54.507374', 'step': 7946, 'epoch': 1}
{'type': 'loss', 'content': 0.08822448551654816, 'timestamp': '2025-10-02 00:24:54.509805', 'step': 7947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:54.564393', 'step': 7947, 'epoch': 1}
{'type': 'loss', 'content': 0.08076979964971542, 'timestamp': '2025-10-02 00:24:54.572559', 'step': 7948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:54.626590', 'step': 7948, 'epoch': 1}
{'type': 'loss', 'content': 0.040597204118967056, 'timestamp': '2025-10-02 00:24:54.635952', 'step': 7949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:54.689960', 'step': 7949, 'epoch': 1}
{'type': 'loss', 'content': 0.287757933139801, 'timestamp': '2025-10-02 00:24:54.692438', 'step': 7950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:54.746726', 'step': 7950, 'epoch': 1}
{'type': 'loss', 'content': 0.12790532410144806, 'timestamp': '2025-10-02 00:24:54.748932', 'step': 7951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:54.802735', 'step': 7951, 'epoch': 1}
{'type': 'loss', 'content': 0.027177026495337486, 'timestamp': '2025-10-02 00:24:54.808827', 'step': 7952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:54.863017', 'step': 7952, 'epoch': 1}
{'type': 'loss', 'content': 0.06064986065030098, 'timestamp': '2025-10-02 00:24:54.873264', 'step': 7953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:24:54.942478', 'step': 7953, 'epoch': 1}
{'type': 'loss', 'content': 0.017125170677900314, 'timestamp': '2025-10-02 00:24:54.954452', 'step': 7954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:55.008193', 'step': 7954, 'epoch': 1}
{'type': 'loss', 'content': 0.14541807770729065, 'timestamp': '2025-10-02 00:24:55.010447', 'step': 7955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:55.064070', 'step': 7955, 'epoch': 1}
{'type': 'loss', 'content': 0.1256234347820282, 'timestamp': '2025-10-02 00:24:55.069927', 'step': 7956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:55.123668', 'step': 7956, 'epoch': 1}
{'type': 'loss', 'content': 0.07211564481258392, 'timestamp': '2025-10-02 00:24:55.131190', 'step': 7957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:55.185358', 'step': 7957, 'epoch': 1}
{'type': 'loss', 'content': 0.07554001361131668, 'timestamp': '2025-10-02 00:24:55.187923', 'step': 7958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:55.248354', 'step': 7958, 'epoch': 1}
{'type': 'loss', 'content': 0.05248174071311951, 'timestamp': '2025-10-02 00:24:55.258823', 'step': 7959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:55.312744', 'step': 7959, 'epoch': 1}
{'type': 'loss', 'content': 0.08587326109409332, 'timestamp': '2025-10-02 00:24:55.319451', 'step': 7960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:55.373745', 'step': 7960, 'epoch': 1}
{'type': 'loss', 'content': 0.04933493584394455, 'timestamp': '2025-10-02 00:24:55.384003', 'step': 7961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:55.438339', 'step': 7961, 'epoch': 1}
{'type': 'loss', 'content': 0.11591067165136337, 'timestamp': '2025-10-02 00:24:55.441053', 'step': 7962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:55.495601', 'step': 7962, 'epoch': 1}
{'type': 'loss', 'content': 0.07263310253620148, 'timestamp': '2025-10-02 00:24:55.498167', 'step': 7963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:55.552671', 'step': 7963, 'epoch': 1}
{'type': 'loss', 'content': 0.06545315682888031, 'timestamp': '2025-10-02 00:24:55.558884', 'step': 7964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:55.612091', 'step': 7964, 'epoch': 1}
{'type': 'loss', 'content': 0.1593075841665268, 'timestamp': '2025-10-02 00:24:55.614564', 'step': 7965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:55.668240', 'step': 7965, 'epoch': 1}
{'type': 'loss', 'content': 0.028030095621943474, 'timestamp': '2025-10-02 00:24:55.670783', 'step': 7966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:55.724503', 'step': 7966, 'epoch': 1}
{'type': 'loss', 'content': 0.11640361696481705, 'timestamp': '2025-10-02 00:24:55.727070', 'step': 7967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:55.783160', 'step': 7967, 'epoch': 1}
{'type': 'loss', 'content': 0.03689941391348839, 'timestamp': '2025-10-02 00:24:55.793248', 'step': 7968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:55.847155', 'step': 7968, 'epoch': 1}
{'type': 'loss', 'content': 0.008091715164482594, 'timestamp': '2025-10-02 00:24:55.853139', 'step': 7969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:55.909196', 'step': 7969, 'epoch': 1}
{'type': 'loss', 'content': 0.042235083878040314, 'timestamp': '2025-10-02 00:24:55.916829', 'step': 7970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:55.970541', 'step': 7970, 'epoch': 1}
{'type': 'loss', 'content': 0.2646917998790741, 'timestamp': '2025-10-02 00:24:55.972764', 'step': 7971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:56.027163', 'step': 7971, 'epoch': 1}
{'type': 'loss', 'content': 0.11934594810009003, 'timestamp': '2025-10-02 00:24:56.035470', 'step': 7972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:56.089076', 'step': 7972, 'epoch': 1}
{'type': 'loss', 'content': 0.11575751006603241, 'timestamp': '2025-10-02 00:24:56.092416', 'step': 7973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:56.146670', 'step': 7973, 'epoch': 1}
{'type': 'loss', 'content': 0.03708554804325104, 'timestamp': '2025-10-02 00:24:56.149111', 'step': 7974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:56.202774', 'step': 7974, 'epoch': 1}
{'type': 'loss', 'content': 0.26088663935661316, 'timestamp': '2025-10-02 00:24:56.205280', 'step': 7975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:24:56.287303', 'step': 7975, 'epoch': 1}
{'type': 'loss', 'content': 0.04096740484237671, 'timestamp': '2025-10-02 00:24:56.302928', 'step': 7976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:56.356312', 'step': 7976, 'epoch': 1}
{'type': 'loss', 'content': 0.11972136050462723, 'timestamp': '2025-10-02 00:24:56.358889', 'step': 7977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:56.413077', 'step': 7977, 'epoch': 1}
{'type': 'loss', 'content': 0.16602610051631927, 'timestamp': '2025-10-02 00:24:56.415734', 'step': 7978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:24:56.477806', 'step': 7978, 'epoch': 1}
{'type': 'loss', 'content': 0.033465176820755005, 'timestamp': '2025-10-02 00:24:56.488326', 'step': 7979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:56.553413', 'step': 7979, 'epoch': 1}
{'type': 'loss', 'content': 0.047150418162345886, 'timestamp': '2025-10-02 00:24:56.563710', 'step': 7980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:56.622948', 'step': 7980, 'epoch': 1}
{'type': 'loss', 'content': 0.06102680414915085, 'timestamp': '2025-10-02 00:24:56.625527', 'step': 7981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:56.679235', 'step': 7981, 'epoch': 1}
{'type': 'loss', 'content': 0.1548464298248291, 'timestamp': '2025-10-02 00:24:56.681771', 'step': 7982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:56.736109', 'step': 7982, 'epoch': 1}
{'type': 'loss', 'content': 0.08321422338485718, 'timestamp': '2025-10-02 00:24:56.739732', 'step': 7983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:24:56.796155', 'step': 7983, 'epoch': 1}
{'type': 'loss', 'content': 0.07222673296928406, 'timestamp': '2025-10-02 00:24:56.803029', 'step': 7984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:56.867873', 'step': 7984, 'epoch': 1}
{'type': 'loss', 'content': 0.12352101504802704, 'timestamp': '2025-10-02 00:24:56.873847', 'step': 7985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:56.927991', 'step': 7985, 'epoch': 1}
{'type': 'loss', 'content': 0.1560947746038437, 'timestamp': '2025-10-02 00:24:56.937173', 'step': 7986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:24:57.002078', 'step': 7986, 'epoch': 1}
{'type': 'loss', 'content': 0.04082925245165825, 'timestamp': '2025-10-02 00:24:57.012320', 'step': 7987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:57.067046', 'step': 7987, 'epoch': 1}
{'type': 'loss', 'content': 0.20904257893562317, 'timestamp': '2025-10-02 00:24:57.072896', 'step': 7988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:57.128494', 'step': 7988, 'epoch': 1}
{'type': 'loss', 'content': 0.09197617322206497, 'timestamp': '2025-10-02 00:24:57.131765', 'step': 7989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:57.187866', 'step': 7989, 'epoch': 1}
{'type': 'loss', 'content': 0.06945677101612091, 'timestamp': '2025-10-02 00:24:57.193589', 'step': 7990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:57.250921', 'step': 7990, 'epoch': 1}
{'type': 'loss', 'content': 0.06564179062843323, 'timestamp': '2025-10-02 00:24:57.258317', 'step': 7991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:57.320963', 'step': 7991, 'epoch': 1}
{'type': 'loss', 'content': 0.09078121930360794, 'timestamp': '2025-10-02 00:24:57.331110', 'step': 7992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:57.387468', 'step': 7992, 'epoch': 1}
{'type': 'loss', 'content': 0.18351946771144867, 'timestamp': '2025-10-02 00:24:57.390092', 'step': 7993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:57.450293', 'step': 7993, 'epoch': 1}
{'type': 'loss', 'content': 0.06575916707515717, 'timestamp': '2025-10-02 00:24:57.452658', 'step': 7994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:57.508398', 'step': 7994, 'epoch': 1}
{'type': 'loss', 'content': 0.02834806591272354, 'timestamp': '2025-10-02 00:24:57.514320', 'step': 7995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:57.568821', 'step': 7995, 'epoch': 1}
{'type': 'loss', 'content': 0.0743313878774643, 'timestamp': '2025-10-02 00:24:57.575820', 'step': 7996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:24:57.631135', 'step': 7996, 'epoch': 1}
{'type': 'loss', 'content': 0.23056188225746155, 'timestamp': '2025-10-02 00:24:57.633845', 'step': 7997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:24:57.688600', 'step': 7997, 'epoch': 1}
{'type': 'loss', 'content': 0.14796048402786255, 'timestamp': '2025-10-02 00:24:57.691478', 'step': 7998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:24:57.747581', 'step': 7998, 'epoch': 1}
{'type': 'loss', 'content': 0.010314534418284893, 'timestamp': '2025-10-02 00:24:57.757139', 'step': 7999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:57.814665', 'step': 7999, 'epoch': 1}
{'type': 'loss', 'content': 0.07476404309272766, 'timestamp': '2025-10-02 00:24:57.820648', 'step': 8000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 8000', 'timestamp': '2025-10-02 00:24:58.467733', 'step': 8000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:58.524042', 'step': 8000, 'epoch': 1}
{'type': 'loss', 'content': 0.10614011436700821, 'timestamp': '2025-10-02 00:24:58.526722', 'step': 8001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:58.580778', 'step': 8001, 'epoch': 1}
{'type': 'loss', 'content': 0.12946750223636627, 'timestamp': '2025-10-02 00:24:58.583136', 'step': 8002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:24:58.638104', 'step': 8002, 'epoch': 1}
{'type': 'loss', 'content': 0.05209280177950859, 'timestamp': '2025-10-02 00:24:58.640717', 'step': 8003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:58.695606', 'step': 8003, 'epoch': 1}
{'type': 'loss', 'content': 0.05406154692173004, 'timestamp': '2025-10-02 00:24:58.701908', 'step': 8004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:24:58.766754', 'step': 8004, 'epoch': 1}
{'type': 'loss', 'content': 0.03947668895125389, 'timestamp': '2025-10-02 00:24:58.779723', 'step': 8005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:58.834275', 'step': 8005, 'epoch': 1}
{'type': 'loss', 'content': 0.044640734791755676, 'timestamp': '2025-10-02 00:24:58.841566', 'step': 8006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:58.897961', 'step': 8006, 'epoch': 1}
{'type': 'loss', 'content': 0.06368036568164825, 'timestamp': '2025-10-02 00:24:58.903755', 'step': 8007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:58.957876', 'step': 8007, 'epoch': 1}
{'type': 'loss', 'content': 0.05136941000819206, 'timestamp': '2025-10-02 00:24:58.968043', 'step': 8008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:24:59.021463', 'step': 8008, 'epoch': 1}
{'type': 'loss', 'content': 0.07851369678974152, 'timestamp': '2025-10-02 00:24:59.024070', 'step': 8009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:24:59.080166', 'step': 8009, 'epoch': 1}
{'type': 'loss', 'content': 0.06896986067295074, 'timestamp': '2025-10-02 00:24:59.082450', 'step': 8010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:59.136745', 'step': 8010, 'epoch': 1}
{'type': 'loss', 'content': 0.04188040643930435, 'timestamp': '2025-10-02 00:24:59.139455', 'step': 8011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:59.194026', 'step': 8011, 'epoch': 1}
{'type': 'loss', 'content': 0.055665481835603714, 'timestamp': '2025-10-02 00:24:59.201110', 'step': 8012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:24:59.256665', 'step': 8012, 'epoch': 1}
{'type': 'loss', 'content': 0.13233046233654022, 'timestamp': '2025-10-02 00:24:59.259013', 'step': 8013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:59.313724', 'step': 8013, 'epoch': 1}
{'type': 'loss', 'content': 0.0769227147102356, 'timestamp': '2025-10-02 00:24:59.323031', 'step': 8014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:59.379876', 'step': 8014, 'epoch': 1}
{'type': 'loss', 'content': 0.04491518437862396, 'timestamp': '2025-10-02 00:24:59.385544', 'step': 8015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:24:59.441929', 'step': 8015, 'epoch': 1}
{'type': 'loss', 'content': 0.015977703034877777, 'timestamp': '2025-10-02 00:24:59.448413', 'step': 8016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:24:59.504013', 'step': 8016, 'epoch': 1}
{'type': 'loss', 'content': 0.0404064767062664, 'timestamp': '2025-10-02 00:24:59.511510', 'step': 8017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:24:59.567135', 'step': 8017, 'epoch': 1}
{'type': 'loss', 'content': 0.08721345663070679, 'timestamp': '2025-10-02 00:24:59.570152', 'step': 8018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:24:59.625735', 'step': 8018, 'epoch': 1}
{'type': 'loss', 'content': 0.08030901849269867, 'timestamp': '2025-10-02 00:24:59.629126', 'step': 8019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:24:59.684801', 'step': 8019, 'epoch': 1}
{'type': 'loss', 'content': 0.06942050904035568, 'timestamp': '2025-10-02 00:24:59.691443', 'step': 8020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:24:59.746791', 'step': 8020, 'epoch': 1}
{'type': 'loss', 'content': 0.061942704021930695, 'timestamp': '2025-10-02 00:24:59.756279', 'step': 8021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:24:59.811117', 'step': 8021, 'epoch': 1}
{'type': 'loss', 'content': 0.04987351968884468, 'timestamp': '2025-10-02 00:24:59.816911', 'step': 8022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:24:59.891589', 'step': 8022, 'epoch': 1}
{'type': 'loss', 'content': 0.014591182582080364, 'timestamp': '2025-10-02 00:24:59.904824', 'step': 8023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:24:59.962153', 'step': 8023, 'epoch': 1}
{'type': 'loss', 'content': 0.1468389332294464, 'timestamp': '2025-10-02 00:24:59.968768', 'step': 8024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:00.025185', 'step': 8024, 'epoch': 1}
{'type': 'loss', 'content': 0.026098135858774185, 'timestamp': '2025-10-02 00:25:00.027896', 'step': 8025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:25:00.090878', 'step': 8025, 'epoch': 1}
{'type': 'loss', 'content': 0.055362701416015625, 'timestamp': '2025-10-02 00:25:00.101772', 'step': 8026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:00.158177', 'step': 8026, 'epoch': 1}
{'type': 'loss', 'content': 0.08429751545190811, 'timestamp': '2025-10-02 00:25:00.165593', 'step': 8027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:00.223318', 'step': 8027, 'epoch': 1}
{'type': 'loss', 'content': 0.09863250702619553, 'timestamp': '2025-10-02 00:25:00.230062', 'step': 8028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:00.285111', 'step': 8028, 'epoch': 1}
{'type': 'loss', 'content': 0.08226590603590012, 'timestamp': '2025-10-02 00:25:00.288486', 'step': 8029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:00.344274', 'step': 8029, 'epoch': 1}
{'type': 'loss', 'content': 0.17734146118164062, 'timestamp': '2025-10-02 00:25:00.346600', 'step': 8030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:00.402806', 'step': 8030, 'epoch': 1}
{'type': 'loss', 'content': 0.09850356727838516, 'timestamp': '2025-10-02 00:25:00.405233', 'step': 8031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:00.459218', 'step': 8031, 'epoch': 1}
{'type': 'loss', 'content': 0.06952405720949173, 'timestamp': '2025-10-02 00:25:00.468140', 'step': 8032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:00.521826', 'step': 8032, 'epoch': 1}
{'type': 'loss', 'content': 0.19346871972084045, 'timestamp': '2025-10-02 00:25:00.524280', 'step': 8033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:00.579489', 'step': 8033, 'epoch': 1}
{'type': 'loss', 'content': 0.08935891091823578, 'timestamp': '2025-10-02 00:25:00.583591', 'step': 8034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:00.645104', 'step': 8034, 'epoch': 1}
{'type': 'loss', 'content': 0.06809127330780029, 'timestamp': '2025-10-02 00:25:00.655339', 'step': 8035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:25:00.736623', 'step': 8035, 'epoch': 1}
{'type': 'loss', 'content': 0.03895055875182152, 'timestamp': '2025-10-02 00:25:00.749420', 'step': 8036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:00.808405', 'step': 8036, 'epoch': 1}
{'type': 'loss', 'content': 0.06572484225034714, 'timestamp': '2025-10-02 00:25:00.819432', 'step': 8037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:00.882843', 'step': 8037, 'epoch': 1}
{'type': 'loss', 'content': 0.032588209956884384, 'timestamp': '2025-10-02 00:25:00.893346', 'step': 8038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:00.949033', 'step': 8038, 'epoch': 1}
{'type': 'loss', 'content': 0.05173578858375549, 'timestamp': '2025-10-02 00:25:00.952966', 'step': 8039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:01.009194', 'step': 8039, 'epoch': 1}
{'type': 'loss', 'content': 0.06614864617586136, 'timestamp': '2025-10-02 00:25:01.015375', 'step': 8040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:01.069140', 'step': 8040, 'epoch': 1}
{'type': 'loss', 'content': 0.16177108883857727, 'timestamp': '2025-10-02 00:25:01.071485', 'step': 8041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:01.126033', 'step': 8041, 'epoch': 1}
{'type': 'loss', 'content': 0.05898124724626541, 'timestamp': '2025-10-02 00:25:01.128713', 'step': 8042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:01.183333', 'step': 8042, 'epoch': 1}
{'type': 'loss', 'content': 0.05455797165632248, 'timestamp': '2025-10-02 00:25:01.186503', 'step': 8043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:01.240708', 'step': 8043, 'epoch': 1}
{'type': 'loss', 'content': 0.1599549949169159, 'timestamp': '2025-10-02 00:25:01.246792', 'step': 8044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:01.300780', 'step': 8044, 'epoch': 1}
{'type': 'loss', 'content': 0.03242009878158569, 'timestamp': '2025-10-02 00:25:01.306585', 'step': 8045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:01.361300', 'step': 8045, 'epoch': 1}
{'type': 'loss', 'content': 0.11987976729869843, 'timestamp': '2025-10-02 00:25:01.363916', 'step': 8046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:01.419495', 'step': 8046, 'epoch': 1}
{'type': 'loss', 'content': 0.14306722581386566, 'timestamp': '2025-10-02 00:25:01.421832', 'step': 8047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:01.477512', 'step': 8047, 'epoch': 1}
{'type': 'loss', 'content': 0.07343564182519913, 'timestamp': '2025-10-02 00:25:01.487869', 'step': 8048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:01.541868', 'step': 8048, 'epoch': 1}
{'type': 'loss', 'content': 0.05342608317732811, 'timestamp': '2025-10-02 00:25:01.552114', 'step': 8049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 00:25:01.641800', 'step': 8049, 'epoch': 1}
{'type': 'loss', 'content': 0.024732572957873344, 'timestamp': '2025-10-02 00:25:01.658250', 'step': 8050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:25:01.732294', 'step': 8050, 'epoch': 1}
{'type': 'loss', 'content': 0.018737763166427612, 'timestamp': '2025-10-02 00:25:01.745539', 'step': 8051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:01.807777', 'step': 8051, 'epoch': 1}
{'type': 'loss', 'content': 0.04905843362212181, 'timestamp': '2025-10-02 00:25:01.819021', 'step': 8052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:01.872927', 'step': 8052, 'epoch': 1}
{'type': 'loss', 'content': 0.04302351176738739, 'timestamp': '2025-10-02 00:25:01.875217', 'step': 8053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:01.929261', 'step': 8053, 'epoch': 1}
{'type': 'loss', 'content': 0.06422670930624008, 'timestamp': '2025-10-02 00:25:01.932868', 'step': 8054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:01.986718', 'step': 8054, 'epoch': 1}
{'type': 'loss', 'content': 0.0578528568148613, 'timestamp': '2025-10-02 00:25:01.989685', 'step': 8055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:02.045268', 'step': 8055, 'epoch': 1}
{'type': 'loss', 'content': 0.0257083922624588, 'timestamp': '2025-10-02 00:25:02.051603', 'step': 8056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:02.104989', 'step': 8056, 'epoch': 1}
{'type': 'loss', 'content': 0.020729612559080124, 'timestamp': '2025-10-02 00:25:02.110935', 'step': 8057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:02.166137', 'step': 8057, 'epoch': 1}
{'type': 'loss', 'content': 0.06631546467542648, 'timestamp': '2025-10-02 00:25:02.168554', 'step': 8058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:02.223251', 'step': 8058, 'epoch': 1}
{'type': 'loss', 'content': 0.01309407502412796, 'timestamp': '2025-10-02 00:25:02.226342', 'step': 8059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:02.280371', 'step': 8059, 'epoch': 1}
{'type': 'loss', 'content': 0.10951404273509979, 'timestamp': '2025-10-02 00:25:02.286483', 'step': 8060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:02.340153', 'step': 8060, 'epoch': 1}
{'type': 'loss', 'content': 0.11583959311246872, 'timestamp': '2025-10-02 00:25:02.342455', 'step': 8061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:25:02.404033', 'step': 8061, 'epoch': 1}
{'type': 'loss', 'content': 0.03444672375917435, 'timestamp': '2025-10-02 00:25:02.414726', 'step': 8062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:25:02.485261', 'step': 8062, 'epoch': 1}
{'type': 'loss', 'content': 0.035366080701351166, 'timestamp': '2025-10-02 00:25:02.497593', 'step': 8063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:25:02.552134', 'step': 8063, 'epoch': 1}
{'type': 'loss', 'content': 0.20287594199180603, 'timestamp': '2025-10-02 00:25:02.558731', 'step': 8064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:02.619224', 'step': 8064, 'epoch': 1}
{'type': 'loss', 'content': 0.035505764186382294, 'timestamp': '2025-10-02 00:25:02.630430', 'step': 8065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:02.685566', 'step': 8065, 'epoch': 1}
{'type': 'loss', 'content': 0.07571162283420563, 'timestamp': '2025-10-02 00:25:02.688453', 'step': 8066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:02.748031', 'step': 8066, 'epoch': 1}
{'type': 'loss', 'content': 0.032434992492198944, 'timestamp': '2025-10-02 00:25:02.758233', 'step': 8067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:02.813101', 'step': 8067, 'epoch': 1}
{'type': 'loss', 'content': 0.05350370332598686, 'timestamp': '2025-10-02 00:25:02.819582', 'step': 8068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:02.873260', 'step': 8068, 'epoch': 1}
{'type': 'loss', 'content': 0.0975857600569725, 'timestamp': '2025-10-02 00:25:02.875744', 'step': 8069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:25:02.930596', 'step': 8069, 'epoch': 1}
{'type': 'loss', 'content': 0.07267364114522934, 'timestamp': '2025-10-02 00:25:02.935671', 'step': 8070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:02.990295', 'step': 8070, 'epoch': 1}
{'type': 'loss', 'content': 0.06584268063306808, 'timestamp': '2025-10-02 00:25:02.992814', 'step': 8071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:03.046759', 'step': 8071, 'epoch': 1}
{'type': 'loss', 'content': 0.08409348875284195, 'timestamp': '2025-10-02 00:25:03.053052', 'step': 8072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:03.107639', 'step': 8072, 'epoch': 1}
{'type': 'loss', 'content': 0.06388504803180695, 'timestamp': '2025-10-02 00:25:03.110788', 'step': 8073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:03.164222', 'step': 8073, 'epoch': 1}
{'type': 'loss', 'content': 0.08231421560049057, 'timestamp': '2025-10-02 00:25:03.167254', 'step': 8074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:03.221610', 'step': 8074, 'epoch': 1}
{'type': 'loss', 'content': 0.026268446817994118, 'timestamp': '2025-10-02 00:25:03.224395', 'step': 8075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:03.277902', 'step': 8075, 'epoch': 1}
{'type': 'loss', 'content': 0.08486908674240112, 'timestamp': '2025-10-02 00:25:03.284226', 'step': 8076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:03.337902', 'step': 8076, 'epoch': 1}
{'type': 'loss', 'content': 0.10623778402805328, 'timestamp': '2025-10-02 00:25:03.340860', 'step': 8077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:03.399954', 'step': 8077, 'epoch': 1}
{'type': 'loss', 'content': 0.04909949004650116, 'timestamp': '2025-10-02 00:25:03.410126', 'step': 8078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:03.467180', 'step': 8078, 'epoch': 1}
{'type': 'loss', 'content': 0.032756440341472626, 'timestamp': '2025-10-02 00:25:03.476774', 'step': 8079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:03.531301', 'step': 8079, 'epoch': 1}
{'type': 'loss', 'content': 0.04939284920692444, 'timestamp': '2025-10-02 00:25:03.537197', 'step': 8080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:03.590629', 'step': 8080, 'epoch': 1}
{'type': 'loss', 'content': 0.19871726632118225, 'timestamp': '2025-10-02 00:25:03.593114', 'step': 8081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:03.648136', 'step': 8081, 'epoch': 1}
{'type': 'loss', 'content': 0.044966645538806915, 'timestamp': '2025-10-02 00:25:03.654246', 'step': 8082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:03.711946', 'step': 8082, 'epoch': 1}
{'type': 'loss', 'content': 0.058234456926584244, 'timestamp': '2025-10-02 00:25:03.717611', 'step': 8083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:03.772212', 'step': 8083, 'epoch': 1}
{'type': 'loss', 'content': 0.04572426900267601, 'timestamp': '2025-10-02 00:25:03.778821', 'step': 8084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:03.836573', 'step': 8084, 'epoch': 1}
{'type': 'loss', 'content': 0.04016367346048355, 'timestamp': '2025-10-02 00:25:03.838982', 'step': 8085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:03.892766', 'step': 8085, 'epoch': 1}
{'type': 'loss', 'content': 0.07431183010339737, 'timestamp': '2025-10-02 00:25:03.895198', 'step': 8086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:03.949270', 'step': 8086, 'epoch': 1}
{'type': 'loss', 'content': 0.17419755458831787, 'timestamp': '2025-10-02 00:25:03.951696', 'step': 8087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:04.005821', 'step': 8087, 'epoch': 1}
{'type': 'loss', 'content': 0.14757271111011505, 'timestamp': '2025-10-02 00:25:04.011847', 'step': 8088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:04.066258', 'step': 8088, 'epoch': 1}
{'type': 'loss', 'content': 0.061244506388902664, 'timestamp': '2025-10-02 00:25:04.068966', 'step': 8089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:04.124818', 'step': 8089, 'epoch': 1}
{'type': 'loss', 'content': 0.1884150356054306, 'timestamp': '2025-10-02 00:25:04.139678', 'step': 8090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:04.194322', 'step': 8090, 'epoch': 1}
{'type': 'loss', 'content': 0.05282053351402283, 'timestamp': '2025-10-02 00:25:04.197037', 'step': 8091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:04.251145', 'step': 8091, 'epoch': 1}
{'type': 'loss', 'content': 0.10638294368982315, 'timestamp': '2025-10-02 00:25:04.257734', 'step': 8092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:04.312268', 'step': 8092, 'epoch': 1}
{'type': 'loss', 'content': 0.06150183081626892, 'timestamp': '2025-10-02 00:25:04.314795', 'step': 8093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:04.370362', 'step': 8093, 'epoch': 1}
{'type': 'loss', 'content': 0.1243877112865448, 'timestamp': '2025-10-02 00:25:04.372984', 'step': 8094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:04.428599', 'step': 8094, 'epoch': 1}
{'type': 'loss', 'content': 0.12113761901855469, 'timestamp': '2025-10-02 00:25:04.435713', 'step': 8095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:04.489929', 'step': 8095, 'epoch': 1}
{'type': 'loss', 'content': 0.1280539482831955, 'timestamp': '2025-10-02 00:25:04.495993', 'step': 8096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:04.549385', 'step': 8096, 'epoch': 1}
{'type': 'loss', 'content': 0.09662093222141266, 'timestamp': '2025-10-02 00:25:04.558973', 'step': 8097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:04.614962', 'step': 8097, 'epoch': 1}
{'type': 'loss', 'content': 0.0523502491414547, 'timestamp': '2025-10-02 00:25:04.620512', 'step': 8098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:04.674601', 'step': 8098, 'epoch': 1}
{'type': 'loss', 'content': 0.11228036880493164, 'timestamp': '2025-10-02 00:25:04.676921', 'step': 8099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:25:04.731602', 'step': 8099, 'epoch': 1}
{'type': 'loss', 'content': 0.15880532562732697, 'timestamp': '2025-10-02 00:25:04.740147', 'step': 8100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:04.793449', 'step': 8100, 'epoch': 1}
{'type': 'loss', 'content': 0.0807342603802681, 'timestamp': '2025-10-02 00:25:04.795923', 'step': 8101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:04.850738', 'step': 8101, 'epoch': 1}
{'type': 'loss', 'content': 0.017622172832489014, 'timestamp': '2025-10-02 00:25:04.854042', 'step': 8102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:04.909200', 'step': 8102, 'epoch': 1}
{'type': 'loss', 'content': 0.05497857183218002, 'timestamp': '2025-10-02 00:25:04.911704', 'step': 8103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:04.966094', 'step': 8103, 'epoch': 1}
{'type': 'loss', 'content': 0.0654129907488823, 'timestamp': '2025-10-02 00:25:04.971961', 'step': 8104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:05.026041', 'step': 8104, 'epoch': 1}
{'type': 'loss', 'content': 0.034812819212675095, 'timestamp': '2025-10-02 00:25:05.036289', 'step': 8105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:05.090941', 'step': 8105, 'epoch': 1}
{'type': 'loss', 'content': 0.02254427596926689, 'timestamp': '2025-10-02 00:25:05.096876', 'step': 8106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:05.153288', 'step': 8106, 'epoch': 1}
{'type': 'loss', 'content': 0.05171886831521988, 'timestamp': '2025-10-02 00:25:05.162695', 'step': 8107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:05.223109', 'step': 8107, 'epoch': 1}
{'type': 'loss', 'content': 0.06292702257633209, 'timestamp': '2025-10-02 00:25:05.234253', 'step': 8108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:05.289409', 'step': 8108, 'epoch': 1}
{'type': 'loss', 'content': 0.027787145227193832, 'timestamp': '2025-10-02 00:25:05.296768', 'step': 8109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:05.351087', 'step': 8109, 'epoch': 1}
{'type': 'loss', 'content': 0.07072026282548904, 'timestamp': '2025-10-02 00:25:05.356987', 'step': 8110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:25:05.419257', 'step': 8110, 'epoch': 1}
{'type': 'loss', 'content': 0.018094444647431374, 'timestamp': '2025-10-02 00:25:05.429965', 'step': 8111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:05.486342', 'step': 8111, 'epoch': 1}
{'type': 'loss', 'content': 0.07040926069021225, 'timestamp': '2025-10-02 00:25:05.492133', 'step': 8112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:05.546911', 'step': 8112, 'epoch': 1}
{'type': 'loss', 'content': 0.034409038722515106, 'timestamp': '2025-10-02 00:25:05.549568', 'step': 8113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:05.603427', 'step': 8113, 'epoch': 1}
{'type': 'loss', 'content': 0.036459971219301224, 'timestamp': '2025-10-02 00:25:05.610989', 'step': 8114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:05.666182', 'step': 8114, 'epoch': 1}
{'type': 'loss', 'content': 0.042187631130218506, 'timestamp': '2025-10-02 00:25:05.673485', 'step': 8115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:05.731643', 'step': 8115, 'epoch': 1}
{'type': 'loss', 'content': 0.06433796137571335, 'timestamp': '2025-10-02 00:25:05.737202', 'step': 8116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:05.790626', 'step': 8116, 'epoch': 1}
{'type': 'loss', 'content': 0.052629437297582626, 'timestamp': '2025-10-02 00:25:05.792865', 'step': 8117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:05.847854', 'step': 8117, 'epoch': 1}
{'type': 'loss', 'content': 0.2190217673778534, 'timestamp': '2025-10-02 00:25:05.850479', 'step': 8118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:05.905038', 'step': 8118, 'epoch': 1}
{'type': 'loss', 'content': 0.05675099417567253, 'timestamp': '2025-10-02 00:25:05.908693', 'step': 8119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:25:05.962658', 'step': 8119, 'epoch': 1}
{'type': 'loss', 'content': 0.17857958376407623, 'timestamp': '2025-10-02 00:25:05.969577', 'step': 8120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:06.024134', 'step': 8120, 'epoch': 1}
{'type': 'loss', 'content': 0.06626946479082108, 'timestamp': '2025-10-02 00:25:06.026461', 'step': 8121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:06.080312', 'step': 8121, 'epoch': 1}
{'type': 'loss', 'content': 0.04115008935332298, 'timestamp': '2025-10-02 00:25:06.082519', 'step': 8122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:25:06.144440', 'step': 8122, 'epoch': 1}
{'type': 'loss', 'content': 0.09761668741703033, 'timestamp': '2025-10-02 00:25:06.155076', 'step': 8123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:06.224658', 'step': 8123, 'epoch': 1}
{'type': 'loss', 'content': 0.04945419728755951, 'timestamp': '2025-10-02 00:25:06.240867', 'step': 8124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:06.340384', 'step': 8124, 'epoch': 1}
{'type': 'loss', 'content': 0.1320781707763672, 'timestamp': '2025-10-02 00:25:06.352267', 'step': 8125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:06.432482', 'step': 8125, 'epoch': 1}
{'type': 'loss', 'content': 0.05698547139763832, 'timestamp': '2025-10-02 00:25:06.452731', 'step': 8126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:06.564537', 'step': 8126, 'epoch': 1}
{'type': 'loss', 'content': 0.098818838596344, 'timestamp': '2025-10-02 00:25:06.574692', 'step': 8127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:06.651957', 'step': 8127, 'epoch': 1}
{'type': 'loss', 'content': 0.0620112344622612, 'timestamp': '2025-10-02 00:25:06.664098', 'step': 8128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:06.730565', 'step': 8128, 'epoch': 1}
{'type': 'loss', 'content': 0.015967408195137978, 'timestamp': '2025-10-02 00:25:06.744955', 'step': 8129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:06.828827', 'step': 8129, 'epoch': 1}
{'type': 'loss', 'content': 0.16219186782836914, 'timestamp': '2025-10-02 00:25:06.839951', 'step': 8130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:06.894266', 'step': 8130, 'epoch': 1}
{'type': 'loss', 'content': 0.028211316093802452, 'timestamp': '2025-10-02 00:25:06.907488', 'step': 8131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:06.980861', 'step': 8131, 'epoch': 1}
{'type': 'loss', 'content': 0.05272206664085388, 'timestamp': '2025-10-02 00:25:07.002678', 'step': 8132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:07.073346', 'step': 8132, 'epoch': 1}
{'type': 'loss', 'content': 0.06261803954839706, 'timestamp': '2025-10-02 00:25:07.080095', 'step': 8133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:25:07.144652', 'step': 8133, 'epoch': 1}
{'type': 'loss', 'content': 0.11828573793172836, 'timestamp': '2025-10-02 00:25:07.148530', 'step': 8134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:07.232166', 'step': 8134, 'epoch': 1}
{'type': 'loss', 'content': 0.03134344145655632, 'timestamp': '2025-10-02 00:25:07.242363', 'step': 8135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:25:07.305127', 'step': 8135, 'epoch': 1}
{'type': 'loss', 'content': 0.1354765146970749, 'timestamp': '2025-10-02 00:25:07.323040', 'step': 8136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:07.411804', 'step': 8136, 'epoch': 1}
{'type': 'loss', 'content': 0.0805097296833992, 'timestamp': '2025-10-02 00:25:07.439252', 'step': 8137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:07.509001', 'step': 8137, 'epoch': 1}
{'type': 'loss', 'content': 0.05422048643231392, 'timestamp': '2025-10-02 00:25:07.517481', 'step': 8138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:07.573013', 'step': 8138, 'epoch': 1}
{'type': 'loss', 'content': 0.07949469983577728, 'timestamp': '2025-10-02 00:25:07.582444', 'step': 8139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:07.640689', 'step': 8139, 'epoch': 1}
{'type': 'loss', 'content': 0.03859513998031616, 'timestamp': '2025-10-02 00:25:07.650880', 'step': 8140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:07.707073', 'step': 8140, 'epoch': 1}
{'type': 'loss', 'content': 0.0549115352332592, 'timestamp': '2025-10-02 00:25:07.710399', 'step': 8141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:07.768326', 'step': 8141, 'epoch': 1}
{'type': 'loss', 'content': 0.12231606990098953, 'timestamp': '2025-10-02 00:25:07.775619', 'step': 8142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:25:07.840842', 'step': 8142, 'epoch': 1}
{'type': 'loss', 'content': 0.009797569364309311, 'timestamp': '2025-10-02 00:25:07.851480', 'step': 8143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:07.908862', 'step': 8143, 'epoch': 1}
{'type': 'loss', 'content': 0.07663937658071518, 'timestamp': '2025-10-02 00:25:07.915705', 'step': 8144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:07.975985', 'step': 8144, 'epoch': 1}
{'type': 'loss', 'content': 0.11354675889015198, 'timestamp': '2025-10-02 00:25:07.978751', 'step': 8145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:25:08.050636', 'step': 8145, 'epoch': 1}
{'type': 'loss', 'content': 0.030197428539395332, 'timestamp': '2025-10-02 00:25:08.063094', 'step': 8146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:25:08.135365', 'step': 8146, 'epoch': 1}
{'type': 'loss', 'content': 0.04249022156000137, 'timestamp': '2025-10-02 00:25:08.147730', 'step': 8147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:08.204384', 'step': 8147, 'epoch': 1}
{'type': 'loss', 'content': 0.29084065556526184, 'timestamp': '2025-10-02 00:25:08.211472', 'step': 8148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:08.268480', 'step': 8148, 'epoch': 1}
{'type': 'loss', 'content': 0.06366360187530518, 'timestamp': '2025-10-02 00:25:08.270920', 'step': 8149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:08.326964', 'step': 8149, 'epoch': 1}
{'type': 'loss', 'content': 0.09106137603521347, 'timestamp': '2025-10-02 00:25:08.330263', 'step': 8150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:08.386319', 'step': 8150, 'epoch': 1}
{'type': 'loss', 'content': 0.17426875233650208, 'timestamp': '2025-10-02 00:25:08.388948', 'step': 8151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:08.446522', 'step': 8151, 'epoch': 1}
{'type': 'loss', 'content': 0.03548441454768181, 'timestamp': '2025-10-02 00:25:08.454013', 'step': 8152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:08.508167', 'step': 8152, 'epoch': 1}
{'type': 'loss', 'content': 0.18034285306930542, 'timestamp': '2025-10-02 00:25:08.511624', 'step': 8153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:08.568749', 'step': 8153, 'epoch': 1}
{'type': 'loss', 'content': 0.10330040752887726, 'timestamp': '2025-10-02 00:25:08.572124', 'step': 8154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:08.628803', 'step': 8154, 'epoch': 1}
{'type': 'loss', 'content': 0.14557255804538727, 'timestamp': '2025-10-02 00:25:08.632471', 'step': 8155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:08.688566', 'step': 8155, 'epoch': 1}
{'type': 'loss', 'content': 0.12274488806724548, 'timestamp': '2025-10-02 00:25:08.694739', 'step': 8156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:08.749149', 'step': 8156, 'epoch': 1}
{'type': 'loss', 'content': 0.11312751471996307, 'timestamp': '2025-10-02 00:25:08.751861', 'step': 8157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:08.809515', 'step': 8157, 'epoch': 1}
{'type': 'loss', 'content': 0.02739216759800911, 'timestamp': '2025-10-02 00:25:08.815372', 'step': 8158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:08.872426', 'step': 8158, 'epoch': 1}
{'type': 'loss', 'content': 0.039302196353673935, 'timestamp': '2025-10-02 00:25:08.877934', 'step': 8159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:08.935964', 'step': 8159, 'epoch': 1}
{'type': 'loss', 'content': 0.20706331729888916, 'timestamp': '2025-10-02 00:25:08.943061', 'step': 8160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:08.998661', 'step': 8160, 'epoch': 1}
{'type': 'loss', 'content': 0.1098584532737732, 'timestamp': '2025-10-02 00:25:09.002504', 'step': 8161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:09.058712', 'step': 8161, 'epoch': 1}
{'type': 'loss', 'content': 0.049032993614673615, 'timestamp': '2025-10-02 00:25:09.064461', 'step': 8162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:09.118798', 'step': 8162, 'epoch': 1}
{'type': 'loss', 'content': 0.048802439123392105, 'timestamp': '2025-10-02 00:25:09.121178', 'step': 8163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:09.176503', 'step': 8163, 'epoch': 1}
{'type': 'loss', 'content': 0.08135926723480225, 'timestamp': '2025-10-02 00:25:09.182979', 'step': 8164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:09.237100', 'step': 8164, 'epoch': 1}
{'type': 'loss', 'content': 0.18783769011497498, 'timestamp': '2025-10-02 00:25:09.239785', 'step': 8165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:09.294722', 'step': 8165, 'epoch': 1}
{'type': 'loss', 'content': 0.05162212252616882, 'timestamp': '2025-10-02 00:25:09.300675', 'step': 8166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:25:09.363665', 'step': 8166, 'epoch': 1}
{'type': 'loss', 'content': 0.034208934754133224, 'timestamp': '2025-10-02 00:25:09.374251', 'step': 8167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:09.429168', 'step': 8167, 'epoch': 1}
{'type': 'loss', 'content': 0.03457393869757652, 'timestamp': '2025-10-02 00:25:09.435399', 'step': 8168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:09.489836', 'step': 8168, 'epoch': 1}
{'type': 'loss', 'content': 0.05020885542035103, 'timestamp': '2025-10-02 00:25:09.492977', 'step': 8169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:09.548024', 'step': 8169, 'epoch': 1}
{'type': 'loss', 'content': 0.04923572391271591, 'timestamp': '2025-10-02 00:25:09.550394', 'step': 8170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:09.605766', 'step': 8170, 'epoch': 1}
{'type': 'loss', 'content': 0.0631323829293251, 'timestamp': '2025-10-02 00:25:09.611279', 'step': 8171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:25:09.666048', 'step': 8171, 'epoch': 1}
{'type': 'loss', 'content': 0.0763552263379097, 'timestamp': '2025-10-02 00:25:09.672714', 'step': 8172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:09.728081', 'step': 8172, 'epoch': 1}
{'type': 'loss', 'content': 0.04876894876360893, 'timestamp': '2025-10-02 00:25:09.730528', 'step': 8173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:09.784927', 'step': 8173, 'epoch': 1}
{'type': 'loss', 'content': 0.132844015955925, 'timestamp': '2025-10-02 00:25:09.787528', 'step': 8174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:09.844604', 'step': 8174, 'epoch': 1}
{'type': 'loss', 'content': 0.08338702470064163, 'timestamp': '2025-10-02 00:25:09.851996', 'step': 8175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:09.918415', 'step': 8175, 'epoch': 1}
{'type': 'loss', 'content': 0.0752558782696724, 'timestamp': '2025-10-02 00:25:09.924209', 'step': 8176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:09.978796', 'step': 8176, 'epoch': 1}
{'type': 'loss', 'content': 0.040052782744169235, 'timestamp': '2025-10-02 00:25:09.981367', 'step': 8177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:10.036593', 'step': 8177, 'epoch': 1}
{'type': 'loss', 'content': 0.14438208937644958, 'timestamp': '2025-10-02 00:25:10.039086', 'step': 8178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:10.094603', 'step': 8178, 'epoch': 1}
{'type': 'loss', 'content': 0.09368464350700378, 'timestamp': '2025-10-02 00:25:10.096715', 'step': 8179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:10.152549', 'step': 8179, 'epoch': 1}
{'type': 'loss', 'content': 0.04753118008375168, 'timestamp': '2025-10-02 00:25:10.161795', 'step': 8180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:10.216162', 'step': 8180, 'epoch': 1}
{'type': 'loss', 'content': 0.11839694529771805, 'timestamp': '2025-10-02 00:25:10.218479', 'step': 8181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:10.275188', 'step': 8181, 'epoch': 1}
{'type': 'loss', 'content': 0.07063125818967819, 'timestamp': '2025-10-02 00:25:10.277746', 'step': 8182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:10.336797', 'step': 8182, 'epoch': 1}
{'type': 'loss', 'content': 0.1360115259885788, 'timestamp': '2025-10-02 00:25:10.346975', 'step': 8183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:10.403364', 'step': 8183, 'epoch': 1}
{'type': 'loss', 'content': 0.11143004149198532, 'timestamp': '2025-10-02 00:25:10.409851', 'step': 8184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:10.463740', 'step': 8184, 'epoch': 1}
{'type': 'loss', 'content': 0.1250683218240738, 'timestamp': '2025-10-02 00:25:10.470247', 'step': 8185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:10.528210', 'step': 8185, 'epoch': 1}
{'type': 'loss', 'content': 0.05688416585326195, 'timestamp': '2025-10-02 00:25:10.530886', 'step': 8186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:10.592620', 'step': 8186, 'epoch': 1}
{'type': 'loss', 'content': 0.032346151769161224, 'timestamp': '2025-10-02 00:25:10.603089', 'step': 8187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:10.659702', 'step': 8187, 'epoch': 1}
{'type': 'loss', 'content': 0.038319118320941925, 'timestamp': '2025-10-02 00:25:10.667314', 'step': 8188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:10.723728', 'step': 8188, 'epoch': 1}
{'type': 'loss', 'content': 0.07513701170682907, 'timestamp': '2025-10-02 00:25:10.726106', 'step': 8189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:10.781775', 'step': 8189, 'epoch': 1}
{'type': 'loss', 'content': 0.05378146842122078, 'timestamp': '2025-10-02 00:25:10.787899', 'step': 8190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:10.844031', 'step': 8190, 'epoch': 1}
{'type': 'loss', 'content': 0.05785972252488136, 'timestamp': '2025-10-02 00:25:10.846896', 'step': 8191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:10.903711', 'step': 8191, 'epoch': 1}
{'type': 'loss', 'content': 0.134836807847023, 'timestamp': '2025-10-02 00:25:10.910055', 'step': 8192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:10.964987', 'step': 8192, 'epoch': 1}
{'type': 'loss', 'content': 0.05711033195257187, 'timestamp': '2025-10-02 00:25:10.967843', 'step': 8193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:11.024578', 'step': 8193, 'epoch': 1}
{'type': 'loss', 'content': 0.0183204784989357, 'timestamp': '2025-10-02 00:25:11.027451', 'step': 8194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:11.082243', 'step': 8194, 'epoch': 1}
{'type': 'loss', 'content': 0.11041532456874847, 'timestamp': '2025-10-02 00:25:11.084907', 'step': 8195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:11.141244', 'step': 8195, 'epoch': 1}
{'type': 'loss', 'content': 0.06963077187538147, 'timestamp': '2025-10-02 00:25:11.147840', 'step': 8196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:11.203011', 'step': 8196, 'epoch': 1}
{'type': 'loss', 'content': 0.07181919366121292, 'timestamp': '2025-10-02 00:25:11.205385', 'step': 8197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:11.261393', 'step': 8197, 'epoch': 1}
{'type': 'loss', 'content': 0.18348491191864014, 'timestamp': '2025-10-02 00:25:11.263903', 'step': 8198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:25:11.337740', 'step': 8198, 'epoch': 1}
{'type': 'loss', 'content': 0.018695900216698647, 'timestamp': '2025-10-02 00:25:11.350813', 'step': 8199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:11.411574', 'step': 8199, 'epoch': 1}
{'type': 'loss', 'content': 0.11311107128858566, 'timestamp': '2025-10-02 00:25:11.417592', 'step': 8200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:11.472022', 'step': 8200, 'epoch': 1}
{'type': 'loss', 'content': 0.10499663650989532, 'timestamp': '2025-10-02 00:25:11.475833', 'step': 8201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:11.534507', 'step': 8201, 'epoch': 1}
{'type': 'loss', 'content': 0.053567949682474136, 'timestamp': '2025-10-02 00:25:11.539747', 'step': 8202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:11.596616', 'step': 8202, 'epoch': 1}
{'type': 'loss', 'content': 0.08505043387413025, 'timestamp': '2025-10-02 00:25:11.599124', 'step': 8203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:11.654105', 'step': 8203, 'epoch': 1}
{'type': 'loss', 'content': 0.14604130387306213, 'timestamp': '2025-10-02 00:25:11.664415', 'step': 8204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:25:11.732380', 'step': 8204, 'epoch': 1}
{'type': 'loss', 'content': 0.0975254699587822, 'timestamp': '2025-10-02 00:25:11.745694', 'step': 8205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:11.801812', 'step': 8205, 'epoch': 1}
{'type': 'loss', 'content': 0.04232775419950485, 'timestamp': '2025-10-02 00:25:11.808748', 'step': 8206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:11.865229', 'step': 8206, 'epoch': 1}
{'type': 'loss', 'content': 0.0875275731086731, 'timestamp': '2025-10-02 00:25:11.868124', 'step': 8207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:25:11.937182', 'step': 8207, 'epoch': 1}
{'type': 'loss', 'content': 0.03673945367336273, 'timestamp': '2025-10-02 00:25:11.950256', 'step': 8208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:12.006064', 'step': 8208, 'epoch': 1}
{'type': 'loss', 'content': 0.09342936426401138, 'timestamp': '2025-10-02 00:25:12.008907', 'step': 8209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:12.069865', 'step': 8209, 'epoch': 1}
{'type': 'loss', 'content': 0.007824314758181572, 'timestamp': '2025-10-02 00:25:12.080068', 'step': 8210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:12.136813', 'step': 8210, 'epoch': 1}
{'type': 'loss', 'content': 0.03538702055811882, 'timestamp': '2025-10-02 00:25:12.139157', 'step': 8211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:12.196362', 'step': 8211, 'epoch': 1}
{'type': 'loss', 'content': 0.11104527860879898, 'timestamp': '2025-10-02 00:25:12.202172', 'step': 8212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:25:12.256182', 'step': 8212, 'epoch': 1}
{'type': 'loss', 'content': 0.1898280680179596, 'timestamp': '2025-10-02 00:25:12.258755', 'step': 8213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:12.314182', 'step': 8213, 'epoch': 1}
{'type': 'loss', 'content': 0.15894562005996704, 'timestamp': '2025-10-02 00:25:12.316694', 'step': 8214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:12.373262', 'step': 8214, 'epoch': 1}
{'type': 'loss', 'content': 0.1475573629140854, 'timestamp': '2025-10-02 00:25:12.375561', 'step': 8215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:12.430597', 'step': 8215, 'epoch': 1}
{'type': 'loss', 'content': 0.0773710161447525, 'timestamp': '2025-10-02 00:25:12.437602', 'step': 8216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:12.492906', 'step': 8216, 'epoch': 1}
{'type': 'loss', 'content': 0.04275623708963394, 'timestamp': '2025-10-02 00:25:12.495634', 'step': 8217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:12.551706', 'step': 8217, 'epoch': 1}
{'type': 'loss', 'content': 0.11834558099508286, 'timestamp': '2025-10-02 00:25:12.554516', 'step': 8218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:12.610469', 'step': 8218, 'epoch': 1}
{'type': 'loss', 'content': 0.08689235895872116, 'timestamp': '2025-10-02 00:25:12.613352', 'step': 8219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:12.675868', 'step': 8219, 'epoch': 1}
{'type': 'loss', 'content': 0.014630507677793503, 'timestamp': '2025-10-02 00:25:12.687063', 'step': 8220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:12.742854', 'step': 8220, 'epoch': 1}
{'type': 'loss', 'content': 0.01850469596683979, 'timestamp': '2025-10-02 00:25:12.750777', 'step': 8221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:12.805472', 'step': 8221, 'epoch': 1}
{'type': 'loss', 'content': 0.11763118207454681, 'timestamp': '2025-10-02 00:25:12.808303', 'step': 8222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:12.863194', 'step': 8222, 'epoch': 1}
{'type': 'loss', 'content': 0.08426377922296524, 'timestamp': '2025-10-02 00:25:12.865529', 'step': 8223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:12.920298', 'step': 8223, 'epoch': 1}
{'type': 'loss', 'content': 0.09908527135848999, 'timestamp': '2025-10-02 00:25:12.926408', 'step': 8224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:12.982405', 'step': 8224, 'epoch': 1}
{'type': 'loss', 'content': 0.07168707996606827, 'timestamp': '2025-10-02 00:25:12.985025', 'step': 8225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:13.040506', 'step': 8225, 'epoch': 1}
{'type': 'loss', 'content': 0.07283923774957657, 'timestamp': '2025-10-02 00:25:13.042839', 'step': 8226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:13.098763', 'step': 8226, 'epoch': 1}
{'type': 'loss', 'content': 0.13401377201080322, 'timestamp': '2025-10-02 00:25:13.101134', 'step': 8227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:13.159444', 'step': 8227, 'epoch': 1}
{'type': 'loss', 'content': 0.04740022122859955, 'timestamp': '2025-10-02 00:25:13.170341', 'step': 8228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:13.225643', 'step': 8228, 'epoch': 1}
{'type': 'loss', 'content': 0.14014732837677002, 'timestamp': '2025-10-02 00:25:13.227988', 'step': 8229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:13.283758', 'step': 8229, 'epoch': 1}
{'type': 'loss', 'content': 0.17830955982208252, 'timestamp': '2025-10-02 00:25:13.286430', 'step': 8230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:13.343275', 'step': 8230, 'epoch': 1}
{'type': 'loss', 'content': 0.04166894033551216, 'timestamp': '2025-10-02 00:25:13.351862', 'step': 8231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:13.407039', 'step': 8231, 'epoch': 1}
{'type': 'loss', 'content': 0.19031989574432373, 'timestamp': '2025-10-02 00:25:13.413111', 'step': 8232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:13.468847', 'step': 8232, 'epoch': 1}
{'type': 'loss', 'content': 0.048312801867723465, 'timestamp': '2025-10-02 00:25:13.477793', 'step': 8233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:13.532275', 'step': 8233, 'epoch': 1}
{'type': 'loss', 'content': 0.07185477763414383, 'timestamp': '2025-10-02 00:25:13.536561', 'step': 8234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:13.598909', 'step': 8234, 'epoch': 1}
{'type': 'loss', 'content': 0.03281984105706215, 'timestamp': '2025-10-02 00:25:13.609300', 'step': 8235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:13.665340', 'step': 8235, 'epoch': 1}
{'type': 'loss', 'content': 0.06817206740379333, 'timestamp': '2025-10-02 00:25:13.671445', 'step': 8236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:25:13.731871', 'step': 8236, 'epoch': 1}
{'type': 'loss', 'content': 0.034189969301223755, 'timestamp': '2025-10-02 00:25:13.743645', 'step': 8237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:13.800866', 'step': 8237, 'epoch': 1}
{'type': 'loss', 'content': 0.10667844861745834, 'timestamp': '2025-10-02 00:25:13.806631', 'step': 8238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:13.865455', 'step': 8238, 'epoch': 1}
{'type': 'loss', 'content': 0.06860014796257019, 'timestamp': '2025-10-02 00:25:13.875614', 'step': 8239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:13.937834', 'step': 8239, 'epoch': 1}
{'type': 'loss', 'content': 0.024408401921391487, 'timestamp': '2025-10-02 00:25:13.949113', 'step': 8240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:14.005134', 'step': 8240, 'epoch': 1}
{'type': 'loss', 'content': 0.110310859978199, 'timestamp': '2025-10-02 00:25:14.007717', 'step': 8241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:14.065006', 'step': 8241, 'epoch': 1}
{'type': 'loss', 'content': 0.06999486684799194, 'timestamp': '2025-10-02 00:25:14.067389', 'step': 8242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:14.122181', 'step': 8242, 'epoch': 1}
{'type': 'loss', 'content': 0.12413864582777023, 'timestamp': '2025-10-02 00:25:14.124434', 'step': 8243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:14.178861', 'step': 8243, 'epoch': 1}
{'type': 'loss', 'content': 0.06890763342380524, 'timestamp': '2025-10-02 00:25:14.184933', 'step': 8244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:14.239876', 'step': 8244, 'epoch': 1}
{'type': 'loss', 'content': 0.1350235491991043, 'timestamp': '2025-10-02 00:25:14.242169', 'step': 8245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:14.297735', 'step': 8245, 'epoch': 1}
{'type': 'loss', 'content': 0.07345877587795258, 'timestamp': '2025-10-02 00:25:14.299936', 'step': 8246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:14.355600', 'step': 8246, 'epoch': 1}
{'type': 'loss', 'content': 0.016355928033590317, 'timestamp': '2025-10-02 00:25:14.364026', 'step': 8247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:14.420933', 'step': 8247, 'epoch': 1}
{'type': 'loss', 'content': 0.1611904799938202, 'timestamp': '2025-10-02 00:25:14.428776', 'step': 8248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:14.484302', 'step': 8248, 'epoch': 1}
{'type': 'loss', 'content': 0.03565378487110138, 'timestamp': '2025-10-02 00:25:14.490484', 'step': 8249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:14.549247', 'step': 8249, 'epoch': 1}
{'type': 'loss', 'content': 0.1515640914440155, 'timestamp': '2025-10-02 00:25:14.559455', 'step': 8250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:14.620786', 'step': 8250, 'epoch': 1}
{'type': 'loss', 'content': 0.041099127382040024, 'timestamp': '2025-10-02 00:25:14.631004', 'step': 8251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:14.688120', 'step': 8251, 'epoch': 1}
{'type': 'loss', 'content': 0.07274793088436127, 'timestamp': '2025-10-02 00:25:14.694194', 'step': 8252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:14.751274', 'step': 8252, 'epoch': 1}
{'type': 'loss', 'content': 0.055977340787649155, 'timestamp': '2025-10-02 00:25:14.757341', 'step': 8253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:14.813289', 'step': 8253, 'epoch': 1}
{'type': 'loss', 'content': 0.15742461383342743, 'timestamp': '2025-10-02 00:25:14.815750', 'step': 8254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:14.870088', 'step': 8254, 'epoch': 1}
{'type': 'loss', 'content': 0.06838325411081314, 'timestamp': '2025-10-02 00:25:14.872621', 'step': 8255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:14.926498', 'step': 8255, 'epoch': 1}
{'type': 'loss', 'content': 0.06660184264183044, 'timestamp': '2025-10-02 00:25:14.932470', 'step': 8256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:14.986462', 'step': 8256, 'epoch': 1}
{'type': 'loss', 'content': 0.04902327433228493, 'timestamp': '2025-10-02 00:25:14.996700', 'step': 8257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:15.051131', 'step': 8257, 'epoch': 1}
{'type': 'loss', 'content': 0.22880692780017853, 'timestamp': '2025-10-02 00:25:15.053520', 'step': 8258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:15.107169', 'step': 8258, 'epoch': 1}
{'type': 'loss', 'content': 0.21758496761322021, 'timestamp': '2025-10-02 00:25:15.109560', 'step': 8259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:15.164796', 'step': 8259, 'epoch': 1}
{'type': 'loss', 'content': 0.028635427355766296, 'timestamp': '2025-10-02 00:25:15.174917', 'step': 8260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:15.228701', 'step': 8260, 'epoch': 1}
{'type': 'loss', 'content': 0.1633000522851944, 'timestamp': '2025-10-02 00:25:15.234597', 'step': 8261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:15.289013', 'step': 8261, 'epoch': 1}
{'type': 'loss', 'content': 0.13534331321716309, 'timestamp': '2025-10-02 00:25:15.291523', 'step': 8262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:15.349630', 'step': 8262, 'epoch': 1}
{'type': 'loss', 'content': 0.05427517741918564, 'timestamp': '2025-10-02 00:25:15.359816', 'step': 8263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:15.413990', 'step': 8263, 'epoch': 1}
{'type': 'loss', 'content': 0.2146822214126587, 'timestamp': '2025-10-02 00:25:15.420408', 'step': 8264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:15.474746', 'step': 8264, 'epoch': 1}
{'type': 'loss', 'content': 0.08189209550619125, 'timestamp': '2025-10-02 00:25:15.484959', 'step': 8265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:15.541113', 'step': 8265, 'epoch': 1}
{'type': 'loss', 'content': 0.044482309371232986, 'timestamp': '2025-10-02 00:25:15.550664', 'step': 8266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:15.605534', 'step': 8266, 'epoch': 1}
{'type': 'loss', 'content': 0.13072417676448822, 'timestamp': '2025-10-02 00:25:15.607891', 'step': 8267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:15.662393', 'step': 8267, 'epoch': 1}
{'type': 'loss', 'content': 0.1512155681848526, 'timestamp': '2025-10-02 00:25:15.668609', 'step': 8268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:15.724594', 'step': 8268, 'epoch': 1}
{'type': 'loss', 'content': 0.07009363919496536, 'timestamp': '2025-10-02 00:25:15.731970', 'step': 8269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:25:15.799508', 'step': 8269, 'epoch': 1}
{'type': 'loss', 'content': 0.026409946382045746, 'timestamp': '2025-10-02 00:25:15.811450', 'step': 8270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:25:15.873730', 'step': 8270, 'epoch': 1}
{'type': 'loss', 'content': 0.021058237180113792, 'timestamp': '2025-10-02 00:25:15.884381', 'step': 8271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:25:15.948147', 'step': 8271, 'epoch': 1}
{'type': 'loss', 'content': 0.018753143027424812, 'timestamp': '2025-10-02 00:25:15.959823', 'step': 8272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:16.013817', 'step': 8272, 'epoch': 1}
{'type': 'loss', 'content': 0.06562613695859909, 'timestamp': '2025-10-02 00:25:16.016398', 'step': 8273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:16.070522', 'step': 8273, 'epoch': 1}
{'type': 'loss', 'content': 0.07572031766176224, 'timestamp': '2025-10-02 00:25:16.073486', 'step': 8274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:25:16.156801', 'step': 8274, 'epoch': 1}
{'type': 'loss', 'content': 0.044064171612262726, 'timestamp': '2025-10-02 00:25:16.171591', 'step': 8275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:16.228898', 'step': 8275, 'epoch': 1}
{'type': 'loss', 'content': 0.025540871545672417, 'timestamp': '2025-10-02 00:25:16.239230', 'step': 8276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:16.293929', 'step': 8276, 'epoch': 1}
{'type': 'loss', 'content': 0.03455525264143944, 'timestamp': '2025-10-02 00:25:16.301081', 'step': 8277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:16.354997', 'step': 8277, 'epoch': 1}
{'type': 'loss', 'content': 0.1255677491426468, 'timestamp': '2025-10-02 00:25:16.358085', 'step': 8278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:16.415850', 'step': 8278, 'epoch': 1}
{'type': 'loss', 'content': 0.05038652569055557, 'timestamp': '2025-10-02 00:25:16.421467', 'step': 8279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:16.479155', 'step': 8279, 'epoch': 1}
{'type': 'loss', 'content': 0.17574036121368408, 'timestamp': '2025-10-02 00:25:16.487238', 'step': 8280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:16.551992', 'step': 8280, 'epoch': 1}
{'type': 'loss', 'content': 0.06606936454772949, 'timestamp': '2025-10-02 00:25:16.563300', 'step': 8281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:16.621045', 'step': 8281, 'epoch': 1}
{'type': 'loss', 'content': 0.05877266079187393, 'timestamp': '2025-10-02 00:25:16.624822', 'step': 8282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:16.680822', 'step': 8282, 'epoch': 1}
{'type': 'loss', 'content': 0.1516466587781906, 'timestamp': '2025-10-02 00:25:16.684534', 'step': 8283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:16.742402', 'step': 8283, 'epoch': 1}
{'type': 'loss', 'content': 0.14612871408462524, 'timestamp': '2025-10-02 00:25:16.749250', 'step': 8284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:16.806758', 'step': 8284, 'epoch': 1}
{'type': 'loss', 'content': 0.1144838035106659, 'timestamp': '2025-10-02 00:25:16.814129', 'step': 8285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:16.869072', 'step': 8285, 'epoch': 1}
{'type': 'loss', 'content': 0.04777195304632187, 'timestamp': '2025-10-02 00:25:16.878425', 'step': 8286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:16.937539', 'step': 8286, 'epoch': 1}
{'type': 'loss', 'content': 0.10578818619251251, 'timestamp': '2025-10-02 00:25:16.940852', 'step': 8287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:25:17.004331', 'step': 8287, 'epoch': 1}
{'type': 'loss', 'content': 0.042608290910720825, 'timestamp': '2025-10-02 00:25:17.015803', 'step': 8288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:17.070478', 'step': 8288, 'epoch': 1}
{'type': 'loss', 'content': 0.13872100412845612, 'timestamp': '2025-10-02 00:25:17.074600', 'step': 8289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:17.132384', 'step': 8289, 'epoch': 1}
{'type': 'loss', 'content': 0.07564955204725266, 'timestamp': '2025-10-02 00:25:17.134783', 'step': 8290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:17.192093', 'step': 8290, 'epoch': 1}
{'type': 'loss', 'content': 0.054806772619485855, 'timestamp': '2025-10-02 00:25:17.197591', 'step': 8291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:17.260752', 'step': 8291, 'epoch': 1}
{'type': 'loss', 'content': 0.03933519124984741, 'timestamp': '2025-10-02 00:25:17.272025', 'step': 8292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:17.327861', 'step': 8292, 'epoch': 1}
{'type': 'loss', 'content': 0.22766868770122528, 'timestamp': '2025-10-02 00:25:17.331022', 'step': 8293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:17.389038', 'step': 8293, 'epoch': 1}
{'type': 'loss', 'content': 0.03434586152434349, 'timestamp': '2025-10-02 00:25:17.394624', 'step': 8294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:17.452705', 'step': 8294, 'epoch': 1}
{'type': 'loss', 'content': 0.07765526324510574, 'timestamp': '2025-10-02 00:25:17.455391', 'step': 8295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:17.510797', 'step': 8295, 'epoch': 1}
{'type': 'loss', 'content': 0.14642266929149628, 'timestamp': '2025-10-02 00:25:17.517018', 'step': 8296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:17.576864', 'step': 8296, 'epoch': 1}
{'type': 'loss', 'content': 0.033916011452674866, 'timestamp': '2025-10-02 00:25:17.580336', 'step': 8297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:17.636006', 'step': 8297, 'epoch': 1}
{'type': 'loss', 'content': 0.12684869766235352, 'timestamp': '2025-10-02 00:25:17.639455', 'step': 8298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:17.697049', 'step': 8298, 'epoch': 1}
{'type': 'loss', 'content': 0.18946585059165955, 'timestamp': '2025-10-02 00:25:17.700815', 'step': 8299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:17.757990', 'step': 8299, 'epoch': 1}
{'type': 'loss', 'content': 0.12498122453689575, 'timestamp': '2025-10-02 00:25:17.764980', 'step': 8300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:17.820190', 'step': 8300, 'epoch': 1}
{'type': 'loss', 'content': 0.06135835498571396, 'timestamp': '2025-10-02 00:25:17.827346', 'step': 8301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:17.885419', 'step': 8301, 'epoch': 1}
{'type': 'loss', 'content': 0.059136901050806046, 'timestamp': '2025-10-02 00:25:17.894937', 'step': 8302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:17.950494', 'step': 8302, 'epoch': 1}
{'type': 'loss', 'content': 0.13927626609802246, 'timestamp': '2025-10-02 00:25:17.953406', 'step': 8303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:18.010671', 'step': 8303, 'epoch': 1}
{'type': 'loss', 'content': 0.08556286990642548, 'timestamp': '2025-10-02 00:25:18.020785', 'step': 8304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:18.077441', 'step': 8304, 'epoch': 1}
{'type': 'loss', 'content': 0.050058115273714066, 'timestamp': '2025-10-02 00:25:18.083019', 'step': 8305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:18.139355', 'step': 8305, 'epoch': 1}
{'type': 'loss', 'content': 0.02363697811961174, 'timestamp': '2025-10-02 00:25:18.146719', 'step': 8306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:18.204120', 'step': 8306, 'epoch': 1}
{'type': 'loss', 'content': 0.16051889955997467, 'timestamp': '2025-10-02 00:25:18.206307', 'step': 8307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:18.262466', 'step': 8307, 'epoch': 1}
{'type': 'loss', 'content': 0.041802648454904556, 'timestamp': '2025-10-02 00:25:18.272547', 'step': 8308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:18.327442', 'step': 8308, 'epoch': 1}
{'type': 'loss', 'content': 0.04720231890678406, 'timestamp': '2025-10-02 00:25:18.330390', 'step': 8309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:18.386717', 'step': 8309, 'epoch': 1}
{'type': 'loss', 'content': 0.02111104689538479, 'timestamp': '2025-10-02 00:25:18.390224', 'step': 8310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:18.447498', 'step': 8310, 'epoch': 1}
{'type': 'loss', 'content': 0.16819553077220917, 'timestamp': '2025-10-02 00:25:18.449882', 'step': 8311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:18.504684', 'step': 8311, 'epoch': 1}
{'type': 'loss', 'content': 0.11509248614311218, 'timestamp': '2025-10-02 00:25:18.510515', 'step': 8312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:18.565251', 'step': 8312, 'epoch': 1}
{'type': 'loss', 'content': 0.011701436713337898, 'timestamp': '2025-10-02 00:25:18.575484', 'step': 8313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:18.630847', 'step': 8313, 'epoch': 1}
{'type': 'loss', 'content': 0.19865180552005768, 'timestamp': '2025-10-02 00:25:18.633507', 'step': 8314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:25:18.695492', 'step': 8314, 'epoch': 1}
{'type': 'loss', 'content': 0.020234985277056694, 'timestamp': '2025-10-02 00:25:18.706089', 'step': 8315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:18.760841', 'step': 8315, 'epoch': 1}
{'type': 'loss', 'content': 0.11453612893819809, 'timestamp': '2025-10-02 00:25:18.766888', 'step': 8316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:18.821250', 'step': 8316, 'epoch': 1}
{'type': 'loss', 'content': 0.13470663130283356, 'timestamp': '2025-10-02 00:25:18.826610', 'step': 8317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:18.882813', 'step': 8317, 'epoch': 1}
{'type': 'loss', 'content': 0.027611006051301956, 'timestamp': '2025-10-02 00:25:18.892354', 'step': 8318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:18.947304', 'step': 8318, 'epoch': 1}
{'type': 'loss', 'content': 0.12298130244016647, 'timestamp': '2025-10-02 00:25:18.949570', 'step': 8319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:19.010986', 'step': 8319, 'epoch': 1}
{'type': 'loss', 'content': 0.038878802210092545, 'timestamp': '2025-10-02 00:25:19.022291', 'step': 8320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:19.077745', 'step': 8320, 'epoch': 1}
{'type': 'loss', 'content': 0.011023947969079018, 'timestamp': '2025-10-02 00:25:19.080324', 'step': 8321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:19.134891', 'step': 8321, 'epoch': 1}
{'type': 'loss', 'content': 0.019282981753349304, 'timestamp': '2025-10-02 00:25:19.137509', 'step': 8322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:25:19.201318', 'step': 8322, 'epoch': 1}
{'type': 'loss', 'content': 0.034936897456645966, 'timestamp': '2025-10-02 00:25:19.211978', 'step': 8323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:19.267658', 'step': 8323, 'epoch': 1}
{'type': 'loss', 'content': 0.02810848504304886, 'timestamp': '2025-10-02 00:25:19.273715', 'step': 8324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:19.328714', 'step': 8324, 'epoch': 1}
{'type': 'loss', 'content': 0.09644239395856857, 'timestamp': '2025-10-02 00:25:19.334247', 'step': 8325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:19.388894', 'step': 8325, 'epoch': 1}
{'type': 'loss', 'content': 0.07412966340780258, 'timestamp': '2025-10-02 00:25:19.391698', 'step': 8326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:19.446192', 'step': 8326, 'epoch': 1}
{'type': 'loss', 'content': 0.1591872274875641, 'timestamp': '2025-10-02 00:25:19.448686', 'step': 8327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:19.503531', 'step': 8327, 'epoch': 1}
{'type': 'loss', 'content': 0.08492099493741989, 'timestamp': '2025-10-02 00:25:19.509546', 'step': 8328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:19.563408', 'step': 8328, 'epoch': 1}
{'type': 'loss', 'content': 0.21756446361541748, 'timestamp': '2025-10-02 00:25:19.565935', 'step': 8329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:19.625847', 'step': 8329, 'epoch': 1}
{'type': 'loss', 'content': 0.01863912306725979, 'timestamp': '2025-10-02 00:25:19.636021', 'step': 8330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:19.691413', 'step': 8330, 'epoch': 1}
{'type': 'loss', 'content': 0.13555069267749786, 'timestamp': '2025-10-02 00:25:19.693804', 'step': 8331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:19.748131', 'step': 8331, 'epoch': 1}
{'type': 'loss', 'content': 0.13685639202594757, 'timestamp': '2025-10-02 00:25:19.754373', 'step': 8332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:19.808284', 'step': 8332, 'epoch': 1}
{'type': 'loss', 'content': 0.0936363935470581, 'timestamp': '2025-10-02 00:25:19.810592', 'step': 8333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:19.865318', 'step': 8333, 'epoch': 1}
{'type': 'loss', 'content': 0.055097710341215134, 'timestamp': '2025-10-02 00:25:19.872584', 'step': 8334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:19.927514', 'step': 8334, 'epoch': 1}
{'type': 'loss', 'content': 0.051693033427000046, 'timestamp': '2025-10-02 00:25:19.929900', 'step': 8335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:19.984681', 'step': 8335, 'epoch': 1}
{'type': 'loss', 'content': 0.08290653675794601, 'timestamp': '2025-10-02 00:25:19.991326', 'step': 8336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:20.047977', 'step': 8336, 'epoch': 1}
{'type': 'loss', 'content': 0.05796438083052635, 'timestamp': '2025-10-02 00:25:20.050769', 'step': 8337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:20.106368', 'step': 8337, 'epoch': 1}
{'type': 'loss', 'content': 0.036956869065761566, 'timestamp': '2025-10-02 00:25:20.115391', 'step': 8338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:20.170194', 'step': 8338, 'epoch': 1}
{'type': 'loss', 'content': 0.15958337485790253, 'timestamp': '2025-10-02 00:25:20.172778', 'step': 8339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:20.227343', 'step': 8339, 'epoch': 1}
{'type': 'loss', 'content': 0.20162440836429596, 'timestamp': '2025-10-02 00:25:20.233828', 'step': 8340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:20.287128', 'step': 8340, 'epoch': 1}
{'type': 'loss', 'content': 0.06913358718156815, 'timestamp': '2025-10-02 00:25:20.294637', 'step': 8341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:25:20.358103', 'step': 8341, 'epoch': 1}
{'type': 'loss', 'content': 0.06735257804393768, 'timestamp': '2025-10-02 00:25:20.368954', 'step': 8342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:20.423982', 'step': 8342, 'epoch': 1}
{'type': 'loss', 'content': 0.07474175095558167, 'timestamp': '2025-10-02 00:25:20.429569', 'step': 8343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:25:20.499905', 'step': 8343, 'epoch': 1}
{'type': 'loss', 'content': 0.02817285992205143, 'timestamp': '2025-10-02 00:25:20.513021', 'step': 8344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:20.568692', 'step': 8344, 'epoch': 1}
{'type': 'loss', 'content': 0.027061771601438522, 'timestamp': '2025-10-02 00:25:20.576194', 'step': 8345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:20.630848', 'step': 8345, 'epoch': 1}
{'type': 'loss', 'content': 0.1080130785703659, 'timestamp': '2025-10-02 00:25:20.633524', 'step': 8346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:20.688491', 'step': 8346, 'epoch': 1}
{'type': 'loss', 'content': 0.040960490703582764, 'timestamp': '2025-10-02 00:25:20.690989', 'step': 8347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:20.747105', 'step': 8347, 'epoch': 1}
{'type': 'loss', 'content': 0.025628814473748207, 'timestamp': '2025-10-02 00:25:20.757384', 'step': 8348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:20.812079', 'step': 8348, 'epoch': 1}
{'type': 'loss', 'content': 0.09547530114650726, 'timestamp': '2025-10-02 00:25:20.815118', 'step': 8349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:20.870057', 'step': 8349, 'epoch': 1}
{'type': 'loss', 'content': 0.07451988756656647, 'timestamp': '2025-10-02 00:25:20.872614', 'step': 8350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:20.928301', 'step': 8350, 'epoch': 1}
{'type': 'loss', 'content': 0.062025636434555054, 'timestamp': '2025-10-02 00:25:20.937669', 'step': 8351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:20.993348', 'step': 8351, 'epoch': 1}
{'type': 'loss', 'content': 0.1605052649974823, 'timestamp': '2025-10-02 00:25:21.001781', 'step': 8352, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:25:48.041615', 'step': 8352, 'epoch': 1}
{'type': 'pplx', 'content': 94.44517645499286, 'timestamp': '2025-10-02 00:25:48.045313', 'step': 8352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:48.100151', 'step': 8352, 'epoch': 1}
{'type': 'loss', 'content': 0.0952165424823761, 'timestamp': '2025-10-02 00:25:48.102683', 'step': 8353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:48.158682', 'step': 8353, 'epoch': 1}
{'type': 'loss', 'content': 0.07408823072910309, 'timestamp': '2025-10-02 00:25:48.161209', 'step': 8354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:48.216396', 'step': 8354, 'epoch': 1}
{'type': 'loss', 'content': 0.02600065991282463, 'timestamp': '2025-10-02 00:25:48.218794', 'step': 8355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:48.273576', 'step': 8355, 'epoch': 1}
{'type': 'loss', 'content': 0.0999765396118164, 'timestamp': '2025-10-02 00:25:48.281704', 'step': 8356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:48.335568', 'step': 8356, 'epoch': 1}
{'type': 'loss', 'content': 0.07172048091888428, 'timestamp': '2025-10-02 00:25:48.343096', 'step': 8357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:48.397599', 'step': 8357, 'epoch': 1}
{'type': 'loss', 'content': 0.08470083773136139, 'timestamp': '2025-10-02 00:25:48.399749', 'step': 8358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:48.453601', 'step': 8358, 'epoch': 1}
{'type': 'loss', 'content': 0.16104167699813843, 'timestamp': '2025-10-02 00:25:48.455930', 'step': 8359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:48.510182', 'step': 8359, 'epoch': 1}
{'type': 'loss', 'content': 0.1108015701174736, 'timestamp': '2025-10-02 00:25:48.516891', 'step': 8360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:48.571586', 'step': 8360, 'epoch': 1}
{'type': 'loss', 'content': 0.06203610077500343, 'timestamp': '2025-10-02 00:25:48.573854', 'step': 8361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:48.629058', 'step': 8361, 'epoch': 1}
{'type': 'loss', 'content': 0.07567441463470459, 'timestamp': '2025-10-02 00:25:48.631460', 'step': 8362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:48.686311', 'step': 8362, 'epoch': 1}
{'type': 'loss', 'content': 0.077619768679142, 'timestamp': '2025-10-02 00:25:48.688541', 'step': 8363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:48.743982', 'step': 8363, 'epoch': 1}
{'type': 'loss', 'content': 0.014951400458812714, 'timestamp': '2025-10-02 00:25:48.754351', 'step': 8364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:48.808606', 'step': 8364, 'epoch': 1}
{'type': 'loss', 'content': 0.048345353454351425, 'timestamp': '2025-10-02 00:25:48.811384', 'step': 8365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:48.866065', 'step': 8365, 'epoch': 1}
{'type': 'loss', 'content': 0.061015013605356216, 'timestamp': '2025-10-02 00:25:48.873544', 'step': 8366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:48.928226', 'step': 8366, 'epoch': 1}
{'type': 'loss', 'content': 0.16892924904823303, 'timestamp': '2025-10-02 00:25:48.930562', 'step': 8367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:48.985805', 'step': 8367, 'epoch': 1}
{'type': 'loss', 'content': 0.053744278848171234, 'timestamp': '2025-10-02 00:25:48.992500', 'step': 8368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:49.046642', 'step': 8368, 'epoch': 1}
{'type': 'loss', 'content': 0.1346016377210617, 'timestamp': '2025-10-02 00:25:49.049161', 'step': 8369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:49.104595', 'step': 8369, 'epoch': 1}
{'type': 'loss', 'content': 0.07928750663995743, 'timestamp': '2025-10-02 00:25:49.114095', 'step': 8370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:49.169695', 'step': 8370, 'epoch': 1}
{'type': 'loss', 'content': 0.04748816043138504, 'timestamp': '2025-10-02 00:25:49.177142', 'step': 8371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:49.231603', 'step': 8371, 'epoch': 1}
{'type': 'loss', 'content': 0.057810213416814804, 'timestamp': '2025-10-02 00:25:49.239941', 'step': 8372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:25:49.311026', 'step': 8372, 'epoch': 1}
{'type': 'loss', 'content': 0.06460916250944138, 'timestamp': '2025-10-02 00:25:49.324803', 'step': 8373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:25:49.387088', 'step': 8373, 'epoch': 1}
{'type': 'loss', 'content': 0.030143866315484047, 'timestamp': '2025-10-02 00:25:49.397769', 'step': 8374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:49.452306', 'step': 8374, 'epoch': 1}
{'type': 'loss', 'content': 0.1818714290857315, 'timestamp': '2025-10-02 00:25:49.461697', 'step': 8375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:49.515920', 'step': 8375, 'epoch': 1}
{'type': 'loss', 'content': 0.19814586639404297, 'timestamp': '2025-10-02 00:25:49.521847', 'step': 8376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:49.576668', 'step': 8376, 'epoch': 1}
{'type': 'loss', 'content': 0.02193678356707096, 'timestamp': '2025-10-02 00:25:49.586933', 'step': 8377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:49.645789', 'step': 8377, 'epoch': 1}
{'type': 'loss', 'content': 0.09496616572141647, 'timestamp': '2025-10-02 00:25:49.655987', 'step': 8378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:49.710480', 'step': 8378, 'epoch': 1}
{'type': 'loss', 'content': 0.19920381903648376, 'timestamp': '2025-10-02 00:25:49.714139', 'step': 8379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:49.769032', 'step': 8379, 'epoch': 1}
{'type': 'loss', 'content': 0.16269297897815704, 'timestamp': '2025-10-02 00:25:49.775577', 'step': 8380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:49.829184', 'step': 8380, 'epoch': 1}
{'type': 'loss', 'content': 0.139243021607399, 'timestamp': '2025-10-02 00:25:49.831623', 'step': 8381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:49.885945', 'step': 8381, 'epoch': 1}
{'type': 'loss', 'content': 0.07778491824865341, 'timestamp': '2025-10-02 00:25:49.893406', 'step': 8382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:49.947605', 'step': 8382, 'epoch': 1}
{'type': 'loss', 'content': 0.13277672231197357, 'timestamp': '2025-10-02 00:25:49.949903', 'step': 8383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:50.003928', 'step': 8383, 'epoch': 1}
{'type': 'loss', 'content': 0.041690707206726074, 'timestamp': '2025-10-02 00:25:50.010194', 'step': 8384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:50.065030', 'step': 8384, 'epoch': 1}
{'type': 'loss', 'content': 0.05203641206026077, 'timestamp': '2025-10-02 00:25:50.067400', 'step': 8385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:25:50.137350', 'step': 8385, 'epoch': 1}
{'type': 'loss', 'content': 0.025127118453383446, 'timestamp': '2025-10-02 00:25:50.149770', 'step': 8386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:50.205609', 'step': 8386, 'epoch': 1}
{'type': 'loss', 'content': 0.05908621847629547, 'timestamp': '2025-10-02 00:25:50.212969', 'step': 8387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:50.267825', 'step': 8387, 'epoch': 1}
{'type': 'loss', 'content': 0.035638418048620224, 'timestamp': '2025-10-02 00:25:50.275183', 'step': 8388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:50.331708', 'step': 8388, 'epoch': 1}
{'type': 'loss', 'content': 0.08850177377462387, 'timestamp': '2025-10-02 00:25:50.334415', 'step': 8389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:50.389732', 'step': 8389, 'epoch': 1}
{'type': 'loss', 'content': 0.041243042796850204, 'timestamp': '2025-10-02 00:25:50.399127', 'step': 8390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:50.455078', 'step': 8390, 'epoch': 1}
{'type': 'loss', 'content': 0.11283431947231293, 'timestamp': '2025-10-02 00:25:50.457570', 'step': 8391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:50.512372', 'step': 8391, 'epoch': 1}
{'type': 'loss', 'content': 0.15636201202869415, 'timestamp': '2025-10-02 00:25:50.518240', 'step': 8392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:50.571875', 'step': 8392, 'epoch': 1}
{'type': 'loss', 'content': 0.11390651017427444, 'timestamp': '2025-10-02 00:25:50.574562', 'step': 8393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:50.629643', 'step': 8393, 'epoch': 1}
{'type': 'loss', 'content': 0.14734652638435364, 'timestamp': '2025-10-02 00:25:50.632130', 'step': 8394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:50.686319', 'step': 8394, 'epoch': 1}
{'type': 'loss', 'content': 0.05624063313007355, 'timestamp': '2025-10-02 00:25:50.688700', 'step': 8395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:50.743864', 'step': 8395, 'epoch': 1}
{'type': 'loss', 'content': 0.07335164397954941, 'timestamp': '2025-10-02 00:25:50.749453', 'step': 8396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:50.803489', 'step': 8396, 'epoch': 1}
{'type': 'loss', 'content': 0.11839903891086578, 'timestamp': '2025-10-02 00:25:50.805912', 'step': 8397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:50.861221', 'step': 8397, 'epoch': 1}
{'type': 'loss', 'content': 0.0593181736767292, 'timestamp': '2025-10-02 00:25:50.863385', 'step': 8398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:50.918091', 'step': 8398, 'epoch': 1}
{'type': 'loss', 'content': 0.08192028850317001, 'timestamp': '2025-10-02 00:25:50.920265', 'step': 8399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:50.975044', 'step': 8399, 'epoch': 1}
{'type': 'loss', 'content': 0.05202959477901459, 'timestamp': '2025-10-02 00:25:50.981105', 'step': 8400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:51.035125', 'step': 8400, 'epoch': 1}
{'type': 'loss', 'content': 0.07344700396060944, 'timestamp': '2025-10-02 00:25:51.037833', 'step': 8401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:51.094819', 'step': 8401, 'epoch': 1}
{'type': 'loss', 'content': 0.13801828026771545, 'timestamp': '2025-10-02 00:25:51.097221', 'step': 8402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:51.154203', 'step': 8402, 'epoch': 1}
{'type': 'loss', 'content': 0.0856635794043541, 'timestamp': '2025-10-02 00:25:51.156645', 'step': 8403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:25:51.225722', 'step': 8403, 'epoch': 1}
{'type': 'loss', 'content': 0.020857537165284157, 'timestamp': '2025-10-02 00:25:51.238423', 'step': 8404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:51.292098', 'step': 8404, 'epoch': 1}
{'type': 'loss', 'content': 0.15831725299358368, 'timestamp': '2025-10-02 00:25:51.294496', 'step': 8405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:25:51.364168', 'step': 8405, 'epoch': 1}
{'type': 'loss', 'content': 0.016060801222920418, 'timestamp': '2025-10-02 00:25:51.376505', 'step': 8406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:25:51.445628', 'step': 8406, 'epoch': 1}
{'type': 'loss', 'content': 0.033949628472328186, 'timestamp': '2025-10-02 00:25:51.456516', 'step': 8407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:51.512515', 'step': 8407, 'epoch': 1}
{'type': 'loss', 'content': 0.08734890818595886, 'timestamp': '2025-10-02 00:25:51.522895', 'step': 8408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:51.577867', 'step': 8408, 'epoch': 1}
{'type': 'loss', 'content': 0.1040542796254158, 'timestamp': '2025-10-02 00:25:51.585397', 'step': 8409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:51.640176', 'step': 8409, 'epoch': 1}
{'type': 'loss', 'content': 0.09778419137001038, 'timestamp': '2025-10-02 00:25:51.645550', 'step': 8410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:51.700658', 'step': 8410, 'epoch': 1}
{'type': 'loss', 'content': 0.05560491234064102, 'timestamp': '2025-10-02 00:25:51.703112', 'step': 8411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:51.757555', 'step': 8411, 'epoch': 1}
{'type': 'loss', 'content': 0.053678348660469055, 'timestamp': '2025-10-02 00:25:51.764117', 'step': 8412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:51.818172', 'step': 8412, 'epoch': 1}
{'type': 'loss', 'content': 0.06023402139544487, 'timestamp': '2025-10-02 00:25:51.820358', 'step': 8413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:51.875236', 'step': 8413, 'epoch': 1}
{'type': 'loss', 'content': 0.15821973979473114, 'timestamp': '2025-10-02 00:25:51.877639', 'step': 8414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:25:51.932532', 'step': 8414, 'epoch': 1}
{'type': 'loss', 'content': 0.1107533723115921, 'timestamp': '2025-10-02 00:25:51.934878', 'step': 8415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:51.990210', 'step': 8415, 'epoch': 1}
{'type': 'loss', 'content': 0.14361968636512756, 'timestamp': '2025-10-02 00:25:51.995989', 'step': 8416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:52.050471', 'step': 8416, 'epoch': 1}
{'type': 'loss', 'content': 0.16177645325660706, 'timestamp': '2025-10-02 00:25:52.052723', 'step': 8417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:52.106950', 'step': 8417, 'epoch': 1}
{'type': 'loss', 'content': 0.0578850694000721, 'timestamp': '2025-10-02 00:25:52.108966', 'step': 8418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:52.163424', 'step': 8418, 'epoch': 1}
{'type': 'loss', 'content': 0.046566594392061234, 'timestamp': '2025-10-02 00:25:52.165828', 'step': 8419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:52.220290', 'step': 8419, 'epoch': 1}
{'type': 'loss', 'content': 0.07221214473247528, 'timestamp': '2025-10-02 00:25:52.226180', 'step': 8420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:52.279821', 'step': 8420, 'epoch': 1}
{'type': 'loss', 'content': 0.05332901328802109, 'timestamp': '2025-10-02 00:25:52.285663', 'step': 8421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:52.345800', 'step': 8421, 'epoch': 1}
{'type': 'loss', 'content': 0.030634133145213127, 'timestamp': '2025-10-02 00:25:52.355989', 'step': 8422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:52.410009', 'step': 8422, 'epoch': 1}
{'type': 'loss', 'content': 0.06966434419155121, 'timestamp': '2025-10-02 00:25:52.412447', 'step': 8423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 640], 'flops': 12800077771264.0}, 'timestamp': '2025-10-02 00:25:52.505704', 'step': 8423, 'epoch': 1}
{'type': 'loss', 'content': 0.023776089772582054, 'timestamp': '2025-10-02 00:25:52.523677', 'step': 8424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:52.577509', 'step': 8424, 'epoch': 1}
{'type': 'loss', 'content': 0.06262005120515823, 'timestamp': '2025-10-02 00:25:52.580019', 'step': 8425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:52.634501', 'step': 8425, 'epoch': 1}
{'type': 'loss', 'content': 0.14444242417812347, 'timestamp': '2025-10-02 00:25:52.636808', 'step': 8426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:52.691399', 'step': 8426, 'epoch': 1}
{'type': 'loss', 'content': 0.09343814104795456, 'timestamp': '2025-10-02 00:25:52.693827', 'step': 8427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:25:52.757521', 'step': 8427, 'epoch': 1}
{'type': 'loss', 'content': 0.011134637519717216, 'timestamp': '2025-10-02 00:25:52.769169', 'step': 8428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:52.840210', 'step': 8428, 'epoch': 1}
{'type': 'loss', 'content': 0.031058980152010918, 'timestamp': '2025-10-02 00:25:52.851587', 'step': 8429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:52.907893', 'step': 8429, 'epoch': 1}
{'type': 'loss', 'content': 0.12794625759124756, 'timestamp': '2025-10-02 00:25:52.910811', 'step': 8430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:25:52.988274', 'step': 8430, 'epoch': 1}
{'type': 'loss', 'content': 0.012289098463952541, 'timestamp': '2025-10-02 00:25:53.002015', 'step': 8431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:53.056643', 'step': 8431, 'epoch': 1}
{'type': 'loss', 'content': 0.12355932593345642, 'timestamp': '2025-10-02 00:25:53.063356', 'step': 8432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:53.117031', 'step': 8432, 'epoch': 1}
{'type': 'loss', 'content': 0.14221984148025513, 'timestamp': '2025-10-02 00:25:53.119270', 'step': 8433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:53.173522', 'step': 8433, 'epoch': 1}
{'type': 'loss', 'content': 0.048227760940790176, 'timestamp': '2025-10-02 00:25:53.175965', 'step': 8434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:53.230456', 'step': 8434, 'epoch': 1}
{'type': 'loss', 'content': 0.10142221301794052, 'timestamp': '2025-10-02 00:25:53.232540', 'step': 8435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:53.286508', 'step': 8435, 'epoch': 1}
{'type': 'loss', 'content': 0.041041918098926544, 'timestamp': '2025-10-02 00:25:53.292578', 'step': 8436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:53.345710', 'step': 8436, 'epoch': 1}
{'type': 'loss', 'content': 0.11425553262233734, 'timestamp': '2025-10-02 00:25:53.348260', 'step': 8437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:53.402665', 'step': 8437, 'epoch': 1}
{'type': 'loss', 'content': 0.085086889564991, 'timestamp': '2025-10-02 00:25:53.408644', 'step': 8438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:53.463694', 'step': 8438, 'epoch': 1}
{'type': 'loss', 'content': 0.03070257045328617, 'timestamp': '2025-10-02 00:25:53.466030', 'step': 8439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:53.520062', 'step': 8439, 'epoch': 1}
{'type': 'loss', 'content': 0.018423328176140785, 'timestamp': '2025-10-02 00:25:53.528383', 'step': 8440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:53.582184', 'step': 8440, 'epoch': 1}
{'type': 'loss', 'content': 0.05592513456940651, 'timestamp': '2025-10-02 00:25:53.584452', 'step': 8441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:53.638651', 'step': 8441, 'epoch': 1}
{'type': 'loss', 'content': 0.10988567024469376, 'timestamp': '2025-10-02 00:25:53.640979', 'step': 8442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:53.695823', 'step': 8442, 'epoch': 1}
{'type': 'loss', 'content': 0.0402587428689003, 'timestamp': '2025-10-02 00:25:53.700700', 'step': 8443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:25:53.762542', 'step': 8443, 'epoch': 1}
{'type': 'loss', 'content': 0.020898962393403053, 'timestamp': '2025-10-02 00:25:53.773778', 'step': 8444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:53.829221', 'step': 8444, 'epoch': 1}
{'type': 'loss', 'content': 0.08586987853050232, 'timestamp': '2025-10-02 00:25:53.839468', 'step': 8445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:53.894345', 'step': 8445, 'epoch': 1}
{'type': 'loss', 'content': 0.12042638659477234, 'timestamp': '2025-10-02 00:25:53.896840', 'step': 8446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:53.952262', 'step': 8446, 'epoch': 1}
{'type': 'loss', 'content': 0.13684029877185822, 'timestamp': '2025-10-02 00:25:53.954722', 'step': 8447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:54.010285', 'step': 8447, 'epoch': 1}
{'type': 'loss', 'content': 0.051001813262701035, 'timestamp': '2025-10-02 00:25:54.017035', 'step': 8448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:54.072136', 'step': 8448, 'epoch': 1}
{'type': 'loss', 'content': 0.08288609236478806, 'timestamp': '2025-10-02 00:25:54.074928', 'step': 8449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:54.130124', 'step': 8449, 'epoch': 1}
{'type': 'loss', 'content': 0.0572625957429409, 'timestamp': '2025-10-02 00:25:54.132669', 'step': 8450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:54.188966', 'step': 8450, 'epoch': 1}
{'type': 'loss', 'content': 0.060945816338062286, 'timestamp': '2025-10-02 00:25:54.196458', 'step': 8451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:54.251730', 'step': 8451, 'epoch': 1}
{'type': 'loss', 'content': 0.10487615317106247, 'timestamp': '2025-10-02 00:25:54.258355', 'step': 8452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:54.313015', 'step': 8452, 'epoch': 1}
{'type': 'loss', 'content': 0.06026279181241989, 'timestamp': '2025-10-02 00:25:54.322713', 'step': 8453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:54.379430', 'step': 8453, 'epoch': 1}
{'type': 'loss', 'content': 0.10624998807907104, 'timestamp': '2025-10-02 00:25:54.386658', 'step': 8454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:54.443558', 'step': 8454, 'epoch': 1}
{'type': 'loss', 'content': 0.06555257737636566, 'timestamp': '2025-10-02 00:25:54.446355', 'step': 8455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:54.501375', 'step': 8455, 'epoch': 1}
{'type': 'loss', 'content': 0.1037214994430542, 'timestamp': '2025-10-02 00:25:54.508116', 'step': 8456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:54.563196', 'step': 8456, 'epoch': 1}
{'type': 'loss', 'content': 0.07225064188241959, 'timestamp': '2025-10-02 00:25:54.570768', 'step': 8457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:54.626907', 'step': 8457, 'epoch': 1}
{'type': 'loss', 'content': 0.06000347062945366, 'timestamp': '2025-10-02 00:25:54.629822', 'step': 8458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:54.685887', 'step': 8458, 'epoch': 1}
{'type': 'loss', 'content': 0.22297511994838715, 'timestamp': '2025-10-02 00:25:54.688174', 'step': 8459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:54.743614', 'step': 8459, 'epoch': 1}
{'type': 'loss', 'content': 0.1569969207048416, 'timestamp': '2025-10-02 00:25:54.752021', 'step': 8460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:54.805902', 'step': 8460, 'epoch': 1}
{'type': 'loss', 'content': 0.1740250438451767, 'timestamp': '2025-10-02 00:25:54.809117', 'step': 8461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:54.865662', 'step': 8461, 'epoch': 1}
{'type': 'loss', 'content': 0.1543779969215393, 'timestamp': '2025-10-02 00:25:54.868954', 'step': 8462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:54.925428', 'step': 8462, 'epoch': 1}
{'type': 'loss', 'content': 0.21891315281391144, 'timestamp': '2025-10-02 00:25:54.929344', 'step': 8463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:54.986693', 'step': 8463, 'epoch': 1}
{'type': 'loss', 'content': 0.02588699385523796, 'timestamp': '2025-10-02 00:25:54.997020', 'step': 8464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:55.051821', 'step': 8464, 'epoch': 1}
{'type': 'loss', 'content': 0.10983961820602417, 'timestamp': '2025-10-02 00:25:55.054599', 'step': 8465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:25:55.124108', 'step': 8465, 'epoch': 1}
{'type': 'loss', 'content': 0.008694512769579887, 'timestamp': '2025-10-02 00:25:55.136101', 'step': 8466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:25:55.207054', 'step': 8466, 'epoch': 1}
{'type': 'loss', 'content': 0.013873378746211529, 'timestamp': '2025-10-02 00:25:55.219068', 'step': 8467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:55.275138', 'step': 8467, 'epoch': 1}
{'type': 'loss', 'content': 0.06758946925401688, 'timestamp': '2025-10-02 00:25:55.281191', 'step': 8468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:25:55.337082', 'step': 8468, 'epoch': 1}
{'type': 'loss', 'content': 0.029666289687156677, 'timestamp': '2025-10-02 00:25:55.347361', 'step': 8469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:55.403636', 'step': 8469, 'epoch': 1}
{'type': 'loss', 'content': 0.05082685500383377, 'timestamp': '2025-10-02 00:25:55.407005', 'step': 8470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:25:55.472577', 'step': 8470, 'epoch': 1}
{'type': 'loss', 'content': 0.018581269308924675, 'timestamp': '2025-10-02 00:25:55.483453', 'step': 8471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:55.541497', 'step': 8471, 'epoch': 1}
{'type': 'loss', 'content': 0.1009402722120285, 'timestamp': '2025-10-02 00:25:55.547873', 'step': 8472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:55.604479', 'step': 8472, 'epoch': 1}
{'type': 'loss', 'content': 0.020792290568351746, 'timestamp': '2025-10-02 00:25:55.606738', 'step': 8473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:55.662371', 'step': 8473, 'epoch': 1}
{'type': 'loss', 'content': 0.273671954870224, 'timestamp': '2025-10-02 00:25:55.664996', 'step': 8474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:55.722913', 'step': 8474, 'epoch': 1}
{'type': 'loss', 'content': 0.10243566334247589, 'timestamp': '2025-10-02 00:25:55.725509', 'step': 8475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:55.781090', 'step': 8475, 'epoch': 1}
{'type': 'loss', 'content': 0.24495163559913635, 'timestamp': '2025-10-02 00:25:55.787635', 'step': 8476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:55.842865', 'step': 8476, 'epoch': 1}
{'type': 'loss', 'content': 0.046200405806303024, 'timestamp': '2025-10-02 00:25:55.845287', 'step': 8477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:55.900216', 'step': 8477, 'epoch': 1}
{'type': 'loss', 'content': 0.08313898742198944, 'timestamp': '2025-10-02 00:25:55.909559', 'step': 8478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:55.964316', 'step': 8478, 'epoch': 1}
{'type': 'loss', 'content': 0.21448349952697754, 'timestamp': '2025-10-02 00:25:55.966765', 'step': 8479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:56.022333', 'step': 8479, 'epoch': 1}
{'type': 'loss', 'content': 0.18244533240795135, 'timestamp': '2025-10-02 00:25:56.030619', 'step': 8480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:56.087576', 'step': 8480, 'epoch': 1}
{'type': 'loss', 'content': 0.044804707169532776, 'timestamp': '2025-10-02 00:25:56.093607', 'step': 8481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:56.150494', 'step': 8481, 'epoch': 1}
{'type': 'loss', 'content': 0.0817079246044159, 'timestamp': '2025-10-02 00:25:56.153029', 'step': 8482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:56.207786', 'step': 8482, 'epoch': 1}
{'type': 'loss', 'content': 0.06235753744840622, 'timestamp': '2025-10-02 00:25:56.211061', 'step': 8483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:56.272208', 'step': 8483, 'epoch': 1}
{'type': 'loss', 'content': 0.056141022592782974, 'timestamp': '2025-10-02 00:25:56.278824', 'step': 8484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:25:56.334836', 'step': 8484, 'epoch': 1}
{'type': 'loss', 'content': 0.16866472363471985, 'timestamp': '2025-10-02 00:25:56.338826', 'step': 8485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:56.394888', 'step': 8485, 'epoch': 1}
{'type': 'loss', 'content': 0.04395730793476105, 'timestamp': '2025-10-02 00:25:56.398179', 'step': 8486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:56.454732', 'step': 8486, 'epoch': 1}
{'type': 'loss', 'content': 0.09442894160747528, 'timestamp': '2025-10-02 00:25:56.460327', 'step': 8487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:56.515116', 'step': 8487, 'epoch': 1}
{'type': 'loss', 'content': 0.05821819230914116, 'timestamp': '2025-10-02 00:25:56.520881', 'step': 8488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:56.574991', 'step': 8488, 'epoch': 1}
{'type': 'loss', 'content': 0.030562056228518486, 'timestamp': '2025-10-02 00:25:56.577388', 'step': 8489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:56.631390', 'step': 8489, 'epoch': 1}
{'type': 'loss', 'content': 0.10282547771930695, 'timestamp': '2025-10-02 00:25:56.633822', 'step': 8490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:56.690262', 'step': 8490, 'epoch': 1}
{'type': 'loss', 'content': 0.08296781033277512, 'timestamp': '2025-10-02 00:25:56.692660', 'step': 8491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:56.746819', 'step': 8491, 'epoch': 1}
{'type': 'loss', 'content': 0.1002911850810051, 'timestamp': '2025-10-02 00:25:56.752627', 'step': 8492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:56.806331', 'step': 8492, 'epoch': 1}
{'type': 'loss', 'content': 0.09111662209033966, 'timestamp': '2025-10-02 00:25:56.808603', 'step': 8493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:56.868070', 'step': 8493, 'epoch': 1}
{'type': 'loss', 'content': 0.053918518126010895, 'timestamp': '2025-10-02 00:25:56.878297', 'step': 8494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:25:56.932650', 'step': 8494, 'epoch': 1}
{'type': 'loss', 'content': 0.13053111732006073, 'timestamp': '2025-10-02 00:25:56.935134', 'step': 8495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:56.989262', 'step': 8495, 'epoch': 1}
{'type': 'loss', 'content': 0.025525150820612907, 'timestamp': '2025-10-02 00:25:56.995083', 'step': 8496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:57.048606', 'step': 8496, 'epoch': 1}
{'type': 'loss', 'content': 0.09710776805877686, 'timestamp': '2025-10-02 00:25:57.051289', 'step': 8497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:57.106643', 'step': 8497, 'epoch': 1}
{'type': 'loss', 'content': 0.05052690953016281, 'timestamp': '2025-10-02 00:25:57.108944', 'step': 8498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:25:57.164250', 'step': 8498, 'epoch': 1}
{'type': 'loss', 'content': 0.1602805107831955, 'timestamp': '2025-10-02 00:25:57.166779', 'step': 8499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:57.221192', 'step': 8499, 'epoch': 1}
{'type': 'loss', 'content': 0.07131277024745941, 'timestamp': '2025-10-02 00:25:57.227796', 'step': 8500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 8500', 'timestamp': '2025-10-02 00:25:57.654586', 'step': 8500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:57.710834', 'step': 8500, 'epoch': 1}
{'type': 'loss', 'content': 0.031026605516672134, 'timestamp': '2025-10-02 00:25:57.715489', 'step': 8501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:57.771532', 'step': 8501, 'epoch': 1}
{'type': 'loss', 'content': 0.07551257312297821, 'timestamp': '2025-10-02 00:25:57.773848', 'step': 8502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:57.828056', 'step': 8502, 'epoch': 1}
{'type': 'loss', 'content': 0.14164604246616364, 'timestamp': '2025-10-02 00:25:57.830604', 'step': 8503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:57.885319', 'step': 8503, 'epoch': 1}
{'type': 'loss', 'content': 0.027340348809957504, 'timestamp': '2025-10-02 00:25:57.893385', 'step': 8504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:57.947125', 'step': 8504, 'epoch': 1}
{'type': 'loss', 'content': 0.06708935648202896, 'timestamp': '2025-10-02 00:25:57.949572', 'step': 8505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:58.003922', 'step': 8505, 'epoch': 1}
{'type': 'loss', 'content': 0.13424459099769592, 'timestamp': '2025-10-02 00:25:58.006179', 'step': 8506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:25:58.060115', 'step': 8506, 'epoch': 1}
{'type': 'loss', 'content': 0.21036551892757416, 'timestamp': '2025-10-02 00:25:58.062451', 'step': 8507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:58.116963', 'step': 8507, 'epoch': 1}
{'type': 'loss', 'content': 0.055035512894392014, 'timestamp': '2025-10-02 00:25:58.125259', 'step': 8508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:58.179871', 'step': 8508, 'epoch': 1}
{'type': 'loss', 'content': 0.024126971140503883, 'timestamp': '2025-10-02 00:25:58.182560', 'step': 8509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:58.241792', 'step': 8509, 'epoch': 1}
{'type': 'loss', 'content': 0.03547729551792145, 'timestamp': '2025-10-02 00:25:58.252010', 'step': 8510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:25:58.323827', 'step': 8510, 'epoch': 1}
{'type': 'loss', 'content': 0.02860376052558422, 'timestamp': '2025-10-02 00:25:58.336436', 'step': 8511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:58.392743', 'step': 8511, 'epoch': 1}
{'type': 'loss', 'content': 0.0708690658211708, 'timestamp': '2025-10-02 00:25:58.398409', 'step': 8512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:58.452911', 'step': 8512, 'epoch': 1}
{'type': 'loss', 'content': 0.0132434768602252, 'timestamp': '2025-10-02 00:25:58.458406', 'step': 8513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:25:58.514045', 'step': 8513, 'epoch': 1}
{'type': 'loss', 'content': 0.0404609851539135, 'timestamp': '2025-10-02 00:25:58.519344', 'step': 8514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:58.574051', 'step': 8514, 'epoch': 1}
{'type': 'loss', 'content': 0.14369718730449677, 'timestamp': '2025-10-02 00:25:58.576504', 'step': 8515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:25:58.630806', 'step': 8515, 'epoch': 1}
{'type': 'loss', 'content': 0.1929447501897812, 'timestamp': '2025-10-02 00:25:58.636575', 'step': 8516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:58.689870', 'step': 8516, 'epoch': 1}
{'type': 'loss', 'content': 0.09260644018650055, 'timestamp': '2025-10-02 00:25:58.692425', 'step': 8517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:25:58.750817', 'step': 8517, 'epoch': 1}
{'type': 'loss', 'content': 0.08561791479587555, 'timestamp': '2025-10-02 00:25:58.761021', 'step': 8518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:25:58.815619', 'step': 8518, 'epoch': 1}
{'type': 'loss', 'content': 0.08494172990322113, 'timestamp': '2025-10-02 00:25:58.818285', 'step': 8519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:58.872201', 'step': 8519, 'epoch': 1}
{'type': 'loss', 'content': 0.23641592264175415, 'timestamp': '2025-10-02 00:25:58.878161', 'step': 8520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:58.933464', 'step': 8520, 'epoch': 1}
{'type': 'loss', 'content': 0.06069254130125046, 'timestamp': '2025-10-02 00:25:58.935584', 'step': 8521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:58.990546', 'step': 8521, 'epoch': 1}
{'type': 'loss', 'content': 0.017179081216454506, 'timestamp': '2025-10-02 00:25:58.993463', 'step': 8522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:25:59.048666', 'step': 8522, 'epoch': 1}
{'type': 'loss', 'content': 0.15914678573608398, 'timestamp': '2025-10-02 00:25:59.051055', 'step': 8523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:25:59.106199', 'step': 8523, 'epoch': 1}
{'type': 'loss', 'content': 0.020925704389810562, 'timestamp': '2025-10-02 00:25:59.112194', 'step': 8524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:59.167426', 'step': 8524, 'epoch': 1}
{'type': 'loss', 'content': 0.06934629380702972, 'timestamp': '2025-10-02 00:25:59.174433', 'step': 8525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:59.229160', 'step': 8525, 'epoch': 1}
{'type': 'loss', 'content': 0.17580419778823853, 'timestamp': '2025-10-02 00:25:59.231833', 'step': 8526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:25:59.287991', 'step': 8526, 'epoch': 1}
{'type': 'loss', 'content': 0.11904306709766388, 'timestamp': '2025-10-02 00:25:59.290393', 'step': 8527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:59.344906', 'step': 8527, 'epoch': 1}
{'type': 'loss', 'content': 0.22846129536628723, 'timestamp': '2025-10-02 00:25:59.350898', 'step': 8528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:59.406605', 'step': 8528, 'epoch': 1}
{'type': 'loss', 'content': 0.03499520942568779, 'timestamp': '2025-10-02 00:25:59.413778', 'step': 8529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:25:59.468659', 'step': 8529, 'epoch': 1}
{'type': 'loss', 'content': 0.2883966565132141, 'timestamp': '2025-10-02 00:25:59.471482', 'step': 8530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:25:59.527614', 'step': 8530, 'epoch': 1}
{'type': 'loss', 'content': 0.059951163828372955, 'timestamp': '2025-10-02 00:25:59.536578', 'step': 8531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:59.592625', 'step': 8531, 'epoch': 1}
{'type': 'loss', 'content': 0.020103897899389267, 'timestamp': '2025-10-02 00:25:59.598697', 'step': 8532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:25:59.652776', 'step': 8532, 'epoch': 1}
{'type': 'loss', 'content': 0.021576909348368645, 'timestamp': '2025-10-02 00:25:59.655173', 'step': 8533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:25:59.709319', 'step': 8533, 'epoch': 1}
{'type': 'loss', 'content': 0.13108637928962708, 'timestamp': '2025-10-02 00:25:59.712027', 'step': 8534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:59.767348', 'step': 8534, 'epoch': 1}
{'type': 'loss', 'content': 0.08056918531656265, 'timestamp': '2025-10-02 00:25:59.769612', 'step': 8535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:25:59.824319', 'step': 8535, 'epoch': 1}
{'type': 'loss', 'content': 0.08834701776504517, 'timestamp': '2025-10-02 00:25:59.830946', 'step': 8536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:25:59.886318', 'step': 8536, 'epoch': 1}
{'type': 'loss', 'content': 0.08631252497434616, 'timestamp': '2025-10-02 00:25:59.888860', 'step': 8537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:25:59.945396', 'step': 8537, 'epoch': 1}
{'type': 'loss', 'content': 0.05579358711838722, 'timestamp': '2025-10-02 00:25:59.952290', 'step': 8538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:00.012703', 'step': 8538, 'epoch': 1}
{'type': 'loss', 'content': 0.005707763601094484, 'timestamp': '2025-10-02 00:26:00.022882', 'step': 8539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:00.077495', 'step': 8539, 'epoch': 1}
{'type': 'loss', 'content': 0.08073072880506516, 'timestamp': '2025-10-02 00:26:00.083552', 'step': 8540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:00.137675', 'step': 8540, 'epoch': 1}
{'type': 'loss', 'content': 0.09220034629106522, 'timestamp': '2025-10-02 00:26:00.140227', 'step': 8541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:00.194707', 'step': 8541, 'epoch': 1}
{'type': 'loss', 'content': 0.05092763155698776, 'timestamp': '2025-10-02 00:26:00.204079', 'step': 8542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:00.259664', 'step': 8542, 'epoch': 1}
{'type': 'loss', 'content': 0.11882583051919937, 'timestamp': '2025-10-02 00:26:00.261875', 'step': 8543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:00.316868', 'step': 8543, 'epoch': 1}
{'type': 'loss', 'content': 0.0859919860959053, 'timestamp': '2025-10-02 00:26:00.322593', 'step': 8544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:00.377390', 'step': 8544, 'epoch': 1}
{'type': 'loss', 'content': 0.15002189576625824, 'timestamp': '2025-10-02 00:26:00.380112', 'step': 8545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:00.435802', 'step': 8545, 'epoch': 1}
{'type': 'loss', 'content': 0.07043034583330154, 'timestamp': '2025-10-02 00:26:00.445318', 'step': 8546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:00.500804', 'step': 8546, 'epoch': 1}
{'type': 'loss', 'content': 0.047985903918743134, 'timestamp': '2025-10-02 00:26:00.509923', 'step': 8547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:00.565622', 'step': 8547, 'epoch': 1}
{'type': 'loss', 'content': 0.10609373450279236, 'timestamp': '2025-10-02 00:26:00.571655', 'step': 8548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:00.625263', 'step': 8548, 'epoch': 1}
{'type': 'loss', 'content': 0.09004616737365723, 'timestamp': '2025-10-02 00:26:00.627858', 'step': 8549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:00.682828', 'step': 8549, 'epoch': 1}
{'type': 'loss', 'content': 0.19971182942390442, 'timestamp': '2025-10-02 00:26:00.685024', 'step': 8550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:00.744039', 'step': 8550, 'epoch': 1}
{'type': 'loss', 'content': 0.0729956328868866, 'timestamp': '2025-10-02 00:26:00.754179', 'step': 8551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:00.810699', 'step': 8551, 'epoch': 1}
{'type': 'loss', 'content': 0.025286106392741203, 'timestamp': '2025-10-02 00:26:00.816661', 'step': 8552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:00.872117', 'step': 8552, 'epoch': 1}
{'type': 'loss', 'content': 0.007397002074867487, 'timestamp': '2025-10-02 00:26:00.874387', 'step': 8553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:00.928561', 'step': 8553, 'epoch': 1}
{'type': 'loss', 'content': 0.024291083216667175, 'timestamp': '2025-10-02 00:26:00.936200', 'step': 8554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:00.992275', 'step': 8554, 'epoch': 1}
{'type': 'loss', 'content': 0.0698482021689415, 'timestamp': '2025-10-02 00:26:00.997794', 'step': 8555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:01.053033', 'step': 8555, 'epoch': 1}
{'type': 'loss', 'content': 0.128644660115242, 'timestamp': '2025-10-02 00:26:01.059097', 'step': 8556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:01.113250', 'step': 8556, 'epoch': 1}
{'type': 'loss', 'content': 0.2226148396730423, 'timestamp': '2025-10-02 00:26:01.115683', 'step': 8557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:01.170768', 'step': 8557, 'epoch': 1}
{'type': 'loss', 'content': 0.04234878346323967, 'timestamp': '2025-10-02 00:26:01.173441', 'step': 8558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:01.229349', 'step': 8558, 'epoch': 1}
{'type': 'loss', 'content': 0.07846571505069733, 'timestamp': '2025-10-02 00:26:01.231763', 'step': 8559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:01.287663', 'step': 8559, 'epoch': 1}
{'type': 'loss', 'content': 0.06354058533906937, 'timestamp': '2025-10-02 00:26:01.294111', 'step': 8560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:01.348648', 'step': 8560, 'epoch': 1}
{'type': 'loss', 'content': 0.0722791850566864, 'timestamp': '2025-10-02 00:26:01.350845', 'step': 8561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:01.404874', 'step': 8561, 'epoch': 1}
{'type': 'loss', 'content': 0.04620632156729698, 'timestamp': '2025-10-02 00:26:01.407415', 'step': 8562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:01.462457', 'step': 8562, 'epoch': 1}
{'type': 'loss', 'content': 0.12238811701536179, 'timestamp': '2025-10-02 00:26:01.465558', 'step': 8563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:01.527849', 'step': 8563, 'epoch': 1}
{'type': 'loss', 'content': 0.030258003622293472, 'timestamp': '2025-10-02 00:26:01.539115', 'step': 8564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:01.593699', 'step': 8564, 'epoch': 1}
{'type': 'loss', 'content': 0.026209937408566475, 'timestamp': '2025-10-02 00:26:01.602783', 'step': 8565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:01.658777', 'step': 8565, 'epoch': 1}
{'type': 'loss', 'content': 0.05867542326450348, 'timestamp': '2025-10-02 00:26:01.668342', 'step': 8566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:01.723485', 'step': 8566, 'epoch': 1}
{'type': 'loss', 'content': 0.06689855456352234, 'timestamp': '2025-10-02 00:26:01.729442', 'step': 8567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:01.785301', 'step': 8567, 'epoch': 1}
{'type': 'loss', 'content': 0.027371475473046303, 'timestamp': '2025-10-02 00:26:01.795623', 'step': 8568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:26:01.864168', 'step': 8568, 'epoch': 1}
{'type': 'loss', 'content': 0.01553217601031065, 'timestamp': '2025-10-02 00:26:01.877567', 'step': 8569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:01.932487', 'step': 8569, 'epoch': 1}
{'type': 'loss', 'content': 0.11205023527145386, 'timestamp': '2025-10-02 00:26:01.934801', 'step': 8570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:01.989700', 'step': 8570, 'epoch': 1}
{'type': 'loss', 'content': 0.09782073646783829, 'timestamp': '2025-10-02 00:26:01.992541', 'step': 8571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:02.047951', 'step': 8571, 'epoch': 1}
{'type': 'loss', 'content': 0.1187179833650589, 'timestamp': '2025-10-02 00:26:02.054143', 'step': 8572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:02.111956', 'step': 8572, 'epoch': 1}
{'type': 'loss', 'content': 0.06249047815799713, 'timestamp': '2025-10-02 00:26:02.122937', 'step': 8573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:26:02.195006', 'step': 8573, 'epoch': 1}
{'type': 'loss', 'content': 0.015427689999341965, 'timestamp': '2025-10-02 00:26:02.207663', 'step': 8574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:02.269260', 'step': 8574, 'epoch': 1}
{'type': 'loss', 'content': 0.0437585785984993, 'timestamp': '2025-10-02 00:26:02.279739', 'step': 8575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:02.333936', 'step': 8575, 'epoch': 1}
{'type': 'loss', 'content': 0.1437223255634308, 'timestamp': '2025-10-02 00:26:02.340169', 'step': 8576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:02.394656', 'step': 8576, 'epoch': 1}
{'type': 'loss', 'content': 0.07889055460691452, 'timestamp': '2025-10-02 00:26:02.397377', 'step': 8577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:02.452181', 'step': 8577, 'epoch': 1}
{'type': 'loss', 'content': 0.2279321253299713, 'timestamp': '2025-10-02 00:26:02.454556', 'step': 8578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:02.509633', 'step': 8578, 'epoch': 1}
{'type': 'loss', 'content': 0.21509510278701782, 'timestamp': '2025-10-02 00:26:02.511732', 'step': 8579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:02.566212', 'step': 8579, 'epoch': 1}
{'type': 'loss', 'content': 0.18401166796684265, 'timestamp': '2025-10-02 00:26:02.572742', 'step': 8580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:02.627346', 'step': 8580, 'epoch': 1}
{'type': 'loss', 'content': 0.21475417912006378, 'timestamp': '2025-10-02 00:26:02.629859', 'step': 8581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:02.684807', 'step': 8581, 'epoch': 1}
{'type': 'loss', 'content': 0.11793398857116699, 'timestamp': '2025-10-02 00:26:02.687574', 'step': 8582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:02.742855', 'step': 8582, 'epoch': 1}
{'type': 'loss', 'content': 0.13078735768795013, 'timestamp': '2025-10-02 00:26:02.750090', 'step': 8583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:02.804086', 'step': 8583, 'epoch': 1}
{'type': 'loss', 'content': 0.05156239867210388, 'timestamp': '2025-10-02 00:26:02.810627', 'step': 8584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:02.865067', 'step': 8584, 'epoch': 1}
{'type': 'loss', 'content': 0.06530477851629257, 'timestamp': '2025-10-02 00:26:02.875319', 'step': 8585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:02.932423', 'step': 8585, 'epoch': 1}
{'type': 'loss', 'content': 0.022470509633421898, 'timestamp': '2025-10-02 00:26:02.935058', 'step': 8586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:02.989009', 'step': 8586, 'epoch': 1}
{'type': 'loss', 'content': 0.1872590333223343, 'timestamp': '2025-10-02 00:26:02.991442', 'step': 8587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:03.046414', 'step': 8587, 'epoch': 1}
{'type': 'loss', 'content': 0.08210109174251556, 'timestamp': '2025-10-02 00:26:03.052906', 'step': 8588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:03.108230', 'step': 8588, 'epoch': 1}
{'type': 'loss', 'content': 0.04139969125390053, 'timestamp': '2025-10-02 00:26:03.111027', 'step': 8589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:03.167902', 'step': 8589, 'epoch': 1}
{'type': 'loss', 'content': 0.03512711450457573, 'timestamp': '2025-10-02 00:26:03.177473', 'step': 8590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:03.249709', 'step': 8590, 'epoch': 1}
{'type': 'loss', 'content': 0.08062122762203217, 'timestamp': '2025-10-02 00:26:03.255531', 'step': 8591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:03.313902', 'step': 8591, 'epoch': 1}
{'type': 'loss', 'content': 0.058625977486371994, 'timestamp': '2025-10-02 00:26:03.322088', 'step': 8592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:03.378147', 'step': 8592, 'epoch': 1}
{'type': 'loss', 'content': 0.13163860142230988, 'timestamp': '2025-10-02 00:26:03.380517', 'step': 8593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:03.435897', 'step': 8593, 'epoch': 1}
{'type': 'loss', 'content': 0.03094150871038437, 'timestamp': '2025-10-02 00:26:03.441872', 'step': 8594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:03.504236', 'step': 8594, 'epoch': 1}
{'type': 'loss', 'content': 0.08240891247987747, 'timestamp': '2025-10-02 00:26:03.507159', 'step': 8595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:26:03.581668', 'step': 8595, 'epoch': 1}
{'type': 'loss', 'content': 0.014680168591439724, 'timestamp': '2025-10-02 00:26:03.595107', 'step': 8596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:03.658432', 'step': 8596, 'epoch': 1}
{'type': 'loss', 'content': 0.1060774102807045, 'timestamp': '2025-10-02 00:26:03.661997', 'step': 8597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:03.722341', 'step': 8597, 'epoch': 1}
{'type': 'loss', 'content': 0.048448819667100906, 'timestamp': '2025-10-02 00:26:03.728050', 'step': 8598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:26:03.797954', 'step': 8598, 'epoch': 1}
{'type': 'loss', 'content': 0.04107649251818657, 'timestamp': '2025-10-02 00:26:03.810985', 'step': 8599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:03.874230', 'step': 8599, 'epoch': 1}
{'type': 'loss', 'content': 0.02154127135872841, 'timestamp': '2025-10-02 00:26:03.882814', 'step': 8600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:03.954217', 'step': 8600, 'epoch': 1}
{'type': 'loss', 'content': 0.1301155686378479, 'timestamp': '2025-10-02 00:26:03.956989', 'step': 8601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:04.013950', 'step': 8601, 'epoch': 1}
{'type': 'loss', 'content': 0.20146502554416656, 'timestamp': '2025-10-02 00:26:04.016690', 'step': 8602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:04.081698', 'step': 8602, 'epoch': 1}
{'type': 'loss', 'content': 0.07100909948348999, 'timestamp': '2025-10-02 00:26:04.083921', 'step': 8603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:04.139734', 'step': 8603, 'epoch': 1}
{'type': 'loss', 'content': 0.03365090489387512, 'timestamp': '2025-10-02 00:26:04.146055', 'step': 8604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:04.201319', 'step': 8604, 'epoch': 1}
{'type': 'loss', 'content': 0.056125979870557785, 'timestamp': '2025-10-02 00:26:04.203768', 'step': 8605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:04.258738', 'step': 8605, 'epoch': 1}
{'type': 'loss', 'content': 0.0998523011803627, 'timestamp': '2025-10-02 00:26:04.266219', 'step': 8606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:04.322421', 'step': 8606, 'epoch': 1}
{'type': 'loss', 'content': 0.049627289175987244, 'timestamp': '2025-10-02 00:26:04.327051', 'step': 8607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:04.391470', 'step': 8607, 'epoch': 1}
{'type': 'loss', 'content': 0.05039365217089653, 'timestamp': '2025-10-02 00:26:04.407102', 'step': 8608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:04.463998', 'step': 8608, 'epoch': 1}
{'type': 'loss', 'content': 0.04805542528629303, 'timestamp': '2025-10-02 00:26:04.472042', 'step': 8609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:04.530198', 'step': 8609, 'epoch': 1}
{'type': 'loss', 'content': 0.08335243910551071, 'timestamp': '2025-10-02 00:26:04.535230', 'step': 8610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:04.593353', 'step': 8610, 'epoch': 1}
{'type': 'loss', 'content': 0.03481094539165497, 'timestamp': '2025-10-02 00:26:04.598904', 'step': 8611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:04.657241', 'step': 8611, 'epoch': 1}
{'type': 'loss', 'content': 0.26157253980636597, 'timestamp': '2025-10-02 00:26:04.663818', 'step': 8612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:04.718277', 'step': 8612, 'epoch': 1}
{'type': 'loss', 'content': 0.047768063843250275, 'timestamp': '2025-10-02 00:26:04.720722', 'step': 8613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:04.779129', 'step': 8613, 'epoch': 1}
{'type': 'loss', 'content': 0.18076102435588837, 'timestamp': '2025-10-02 00:26:04.781604', 'step': 8614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:26:04.837325', 'step': 8614, 'epoch': 1}
{'type': 'loss', 'content': 0.1018390953540802, 'timestamp': '2025-10-02 00:26:04.839927', 'step': 8615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:04.899795', 'step': 8615, 'epoch': 1}
{'type': 'loss', 'content': 0.11305945366621017, 'timestamp': '2025-10-02 00:26:04.907716', 'step': 8616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:04.963687', 'step': 8616, 'epoch': 1}
{'type': 'loss', 'content': 0.10328774154186249, 'timestamp': '2025-10-02 00:26:04.972442', 'step': 8617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:05.034308', 'step': 8617, 'epoch': 1}
{'type': 'loss', 'content': 0.08980980515480042, 'timestamp': '2025-10-02 00:26:05.037389', 'step': 8618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:05.095044', 'step': 8618, 'epoch': 1}
{'type': 'loss', 'content': 0.09663637727499008, 'timestamp': '2025-10-02 00:26:05.100821', 'step': 8619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:05.158348', 'step': 8619, 'epoch': 1}
{'type': 'loss', 'content': 0.06990716606378555, 'timestamp': '2025-10-02 00:26:05.165634', 'step': 8620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:05.227941', 'step': 8620, 'epoch': 1}
{'type': 'loss', 'content': 0.12859667837619781, 'timestamp': '2025-10-02 00:26:05.230649', 'step': 8621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:05.289739', 'step': 8621, 'epoch': 1}
{'type': 'loss', 'content': 0.17613562941551208, 'timestamp': '2025-10-02 00:26:05.292691', 'step': 8622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:05.347744', 'step': 8622, 'epoch': 1}
{'type': 'loss', 'content': 0.16445103287696838, 'timestamp': '2025-10-02 00:26:05.351829', 'step': 8623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:05.408240', 'step': 8623, 'epoch': 1}
{'type': 'loss', 'content': 0.1394037902355194, 'timestamp': '2025-10-02 00:26:05.414269', 'step': 8624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:05.469299', 'step': 8624, 'epoch': 1}
{'type': 'loss', 'content': 0.12169849872589111, 'timestamp': '2025-10-02 00:26:05.475187', 'step': 8625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:05.529331', 'step': 8625, 'epoch': 1}
{'type': 'loss', 'content': 0.0806049257516861, 'timestamp': '2025-10-02 00:26:05.531683', 'step': 8626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:05.585686', 'step': 8626, 'epoch': 1}
{'type': 'loss', 'content': 0.11806226521730423, 'timestamp': '2025-10-02 00:26:05.587998', 'step': 8627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:26:05.650486', 'step': 8627, 'epoch': 1}
{'type': 'loss', 'content': 0.04242228344082832, 'timestamp': '2025-10-02 00:26:05.662069', 'step': 8628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:05.716254', 'step': 8628, 'epoch': 1}
{'type': 'loss', 'content': 0.03354571387171745, 'timestamp': '2025-10-02 00:26:05.723740', 'step': 8629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:05.779069', 'step': 8629, 'epoch': 1}
{'type': 'loss', 'content': 0.04089093580842018, 'timestamp': '2025-10-02 00:26:05.781541', 'step': 8630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:05.835696', 'step': 8630, 'epoch': 1}
{'type': 'loss', 'content': 0.21515531837940216, 'timestamp': '2025-10-02 00:26:05.837997', 'step': 8631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:05.892386', 'step': 8631, 'epoch': 1}
{'type': 'loss', 'content': 0.028605179861187935, 'timestamp': '2025-10-02 00:26:05.898081', 'step': 8632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:05.957430', 'step': 8632, 'epoch': 1}
{'type': 'loss', 'content': 0.11467411369085312, 'timestamp': '2025-10-02 00:26:05.959831', 'step': 8633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:06.014594', 'step': 8633, 'epoch': 1}
{'type': 'loss', 'content': 0.055823128670454025, 'timestamp': '2025-10-02 00:26:06.023798', 'step': 8634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:06.079278', 'step': 8634, 'epoch': 1}
{'type': 'loss', 'content': 0.1314031481742859, 'timestamp': '2025-10-02 00:26:06.081657', 'step': 8635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:06.137098', 'step': 8635, 'epoch': 1}
{'type': 'loss', 'content': 0.05455402284860611, 'timestamp': '2025-10-02 00:26:06.142881', 'step': 8636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:06.196782', 'step': 8636, 'epoch': 1}
{'type': 'loss', 'content': 0.0588141605257988, 'timestamp': '2025-10-02 00:26:06.199055', 'step': 8637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:06.253852', 'step': 8637, 'epoch': 1}
{'type': 'loss', 'content': 0.060261860489845276, 'timestamp': '2025-10-02 00:26:06.256492', 'step': 8638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:06.311188', 'step': 8638, 'epoch': 1}
{'type': 'loss', 'content': 0.07626230269670486, 'timestamp': '2025-10-02 00:26:06.320513', 'step': 8639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:06.375185', 'step': 8639, 'epoch': 1}
{'type': 'loss', 'content': 0.12160053849220276, 'timestamp': '2025-10-02 00:26:06.380929', 'step': 8640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:06.434684', 'step': 8640, 'epoch': 1}
{'type': 'loss', 'content': 0.04413075000047684, 'timestamp': '2025-10-02 00:26:06.437051', 'step': 8641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:06.492145', 'step': 8641, 'epoch': 1}
{'type': 'loss', 'content': 0.13388699293136597, 'timestamp': '2025-10-02 00:26:06.500784', 'step': 8642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:06.573639', 'step': 8642, 'epoch': 1}
{'type': 'loss', 'content': 0.16652557253837585, 'timestamp': '2025-10-02 00:26:06.577662', 'step': 8643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:06.640486', 'step': 8643, 'epoch': 1}
{'type': 'loss', 'content': 0.1023370623588562, 'timestamp': '2025-10-02 00:26:06.647501', 'step': 8644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:06.718633', 'step': 8644, 'epoch': 1}
{'type': 'loss', 'content': 0.16365352272987366, 'timestamp': '2025-10-02 00:26:06.722964', 'step': 8645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:06.784861', 'step': 8645, 'epoch': 1}
{'type': 'loss', 'content': 0.09180542081594467, 'timestamp': '2025-10-02 00:26:06.787450', 'step': 8646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:06.861206', 'step': 8646, 'epoch': 1}
{'type': 'loss', 'content': 0.07643961906433105, 'timestamp': '2025-10-02 00:26:06.874331', 'step': 8647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:06.945874', 'step': 8647, 'epoch': 1}
{'type': 'loss', 'content': 0.07162612676620483, 'timestamp': '2025-10-02 00:26:06.956023', 'step': 8648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:07.035788', 'step': 8648, 'epoch': 1}
{'type': 'loss', 'content': 0.0809665098786354, 'timestamp': '2025-10-02 00:26:07.040932', 'step': 8649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:07.111230', 'step': 8649, 'epoch': 1}
{'type': 'loss', 'content': 0.017522122710943222, 'timestamp': '2025-10-02 00:26:07.118848', 'step': 8650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:07.174667', 'step': 8650, 'epoch': 1}
{'type': 'loss', 'content': 0.15271522104740143, 'timestamp': '2025-10-02 00:26:07.177472', 'step': 8651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:07.243934', 'step': 8651, 'epoch': 1}
{'type': 'loss', 'content': 0.058283451944589615, 'timestamp': '2025-10-02 00:26:07.251732', 'step': 8652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:07.311729', 'step': 8652, 'epoch': 1}
{'type': 'loss', 'content': 0.036572374403476715, 'timestamp': '2025-10-02 00:26:07.322713', 'step': 8653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:07.382706', 'step': 8653, 'epoch': 1}
{'type': 'loss', 'content': 0.09339216351509094, 'timestamp': '2025-10-02 00:26:07.392253', 'step': 8654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:07.448671', 'step': 8654, 'epoch': 1}
{'type': 'loss', 'content': 0.07288941740989685, 'timestamp': '2025-10-02 00:26:07.456002', 'step': 8655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:07.523573', 'step': 8655, 'epoch': 1}
{'type': 'loss', 'content': 0.07544613629579544, 'timestamp': '2025-10-02 00:26:07.533869', 'step': 8656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:07.604851', 'step': 8656, 'epoch': 1}
{'type': 'loss', 'content': 0.048548538237810135, 'timestamp': '2025-10-02 00:26:07.607392', 'step': 8657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:07.669028', 'step': 8657, 'epoch': 1}
{'type': 'loss', 'content': 0.14309477806091309, 'timestamp': '2025-10-02 00:26:07.674552', 'step': 8658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:07.736791', 'step': 8658, 'epoch': 1}
{'type': 'loss', 'content': 0.09812305867671967, 'timestamp': '2025-10-02 00:26:07.739703', 'step': 8659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:07.795381', 'step': 8659, 'epoch': 1}
{'type': 'loss', 'content': 0.15153896808624268, 'timestamp': '2025-10-02 00:26:07.805958', 'step': 8660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:07.861979', 'step': 8660, 'epoch': 1}
{'type': 'loss', 'content': 0.051488909870386124, 'timestamp': '2025-10-02 00:26:07.870937', 'step': 8661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:07.940820', 'step': 8661, 'epoch': 1}
{'type': 'loss', 'content': 0.07660597562789917, 'timestamp': '2025-10-02 00:26:07.947075', 'step': 8662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:08.021085', 'step': 8662, 'epoch': 1}
{'type': 'loss', 'content': 0.026507249101996422, 'timestamp': '2025-10-02 00:26:08.031556', 'step': 8663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:08.088466', 'step': 8663, 'epoch': 1}
{'type': 'loss', 'content': 0.011667310260236263, 'timestamp': '2025-10-02 00:26:08.096843', 'step': 8664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:08.166268', 'step': 8664, 'epoch': 1}
{'type': 'loss', 'content': 0.18077245354652405, 'timestamp': '2025-10-02 00:26:08.169543', 'step': 8665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:08.241651', 'step': 8665, 'epoch': 1}
{'type': 'loss', 'content': 0.027307182550430298, 'timestamp': '2025-10-02 00:26:08.249139', 'step': 8666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:08.322243', 'step': 8666, 'epoch': 1}
{'type': 'loss', 'content': 0.09695525467395782, 'timestamp': '2025-10-02 00:26:08.332405', 'step': 8667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:08.402181', 'step': 8667, 'epoch': 1}
{'type': 'loss', 'content': 0.12848924100399017, 'timestamp': '2025-10-02 00:26:08.413156', 'step': 8668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:08.476273', 'step': 8668, 'epoch': 1}
{'type': 'loss', 'content': 0.18326333165168762, 'timestamp': '2025-10-02 00:26:08.479003', 'step': 8669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:08.542116', 'step': 8669, 'epoch': 1}
{'type': 'loss', 'content': 0.12076849490404129, 'timestamp': '2025-10-02 00:26:08.549053', 'step': 8670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:08.614779', 'step': 8670, 'epoch': 1}
{'type': 'loss', 'content': 0.04111161082983017, 'timestamp': '2025-10-02 00:26:08.617935', 'step': 8671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:08.682561', 'step': 8671, 'epoch': 1}
{'type': 'loss', 'content': 0.13741612434387207, 'timestamp': '2025-10-02 00:26:08.688924', 'step': 8672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:08.756642', 'step': 8672, 'epoch': 1}
{'type': 'loss', 'content': 0.10917069762945175, 'timestamp': '2025-10-02 00:26:08.765513', 'step': 8673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:08.832178', 'step': 8673, 'epoch': 1}
{'type': 'loss', 'content': 0.04001345857977867, 'timestamp': '2025-10-02 00:26:08.835242', 'step': 8674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:08.895490', 'step': 8674, 'epoch': 1}
{'type': 'loss', 'content': 0.147376149892807, 'timestamp': '2025-10-02 00:26:08.898785', 'step': 8675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:08.960194', 'step': 8675, 'epoch': 1}
{'type': 'loss', 'content': 0.034949615597724915, 'timestamp': '2025-10-02 00:26:08.966978', 'step': 8676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:09.022638', 'step': 8676, 'epoch': 1}
{'type': 'loss', 'content': 0.0093564847484231, 'timestamp': '2025-10-02 00:26:09.031949', 'step': 8677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:09.099687', 'step': 8677, 'epoch': 1}
{'type': 'loss', 'content': 0.20727023482322693, 'timestamp': '2025-10-02 00:26:09.108862', 'step': 8678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:09.172657', 'step': 8678, 'epoch': 1}
{'type': 'loss', 'content': 0.0426039956510067, 'timestamp': '2025-10-02 00:26:09.175395', 'step': 8679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:09.240387', 'step': 8679, 'epoch': 1}
{'type': 'loss', 'content': 0.17887623608112335, 'timestamp': '2025-10-02 00:26:09.247281', 'step': 8680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:09.302998', 'step': 8680, 'epoch': 1}
{'type': 'loss', 'content': 0.01998055726289749, 'timestamp': '2025-10-02 00:26:09.310911', 'step': 8681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:09.368171', 'step': 8681, 'epoch': 1}
{'type': 'loss', 'content': 0.06022154167294502, 'timestamp': '2025-10-02 00:26:09.374008', 'step': 8682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:09.429945', 'step': 8682, 'epoch': 1}
{'type': 'loss', 'content': 0.09800612181425095, 'timestamp': '2025-10-02 00:26:09.437695', 'step': 8683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:26:09.514328', 'step': 8683, 'epoch': 1}
{'type': 'loss', 'content': 0.034248434007167816, 'timestamp': '2025-10-02 00:26:09.528518', 'step': 8684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:09.597254', 'step': 8684, 'epoch': 1}
{'type': 'loss', 'content': 0.09860803186893463, 'timestamp': '2025-10-02 00:26:09.599693', 'step': 8685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:09.665137', 'step': 8685, 'epoch': 1}
{'type': 'loss', 'content': 0.11949416249990463, 'timestamp': '2025-10-02 00:26:09.668037', 'step': 8686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:09.728659', 'step': 8686, 'epoch': 1}
{'type': 'loss', 'content': 0.10134067386388779, 'timestamp': '2025-10-02 00:26:09.731729', 'step': 8687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:09.793513', 'step': 8687, 'epoch': 1}
{'type': 'loss', 'content': 0.04362049698829651, 'timestamp': '2025-10-02 00:26:09.800524', 'step': 8688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:09.857251', 'step': 8688, 'epoch': 1}
{'type': 'loss', 'content': 0.1050911471247673, 'timestamp': '2025-10-02 00:26:09.859904', 'step': 8689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:09.920660', 'step': 8689, 'epoch': 1}
{'type': 'loss', 'content': 0.08331725001335144, 'timestamp': '2025-10-02 00:26:09.928211', 'step': 8690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:09.986455', 'step': 8690, 'epoch': 1}
{'type': 'loss', 'content': 0.12872937321662903, 'timestamp': '2025-10-02 00:26:09.990942', 'step': 8691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:10.048762', 'step': 8691, 'epoch': 1}
{'type': 'loss', 'content': 0.056427910923957825, 'timestamp': '2025-10-02 00:26:10.055372', 'step': 8692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:10.110138', 'step': 8692, 'epoch': 1}
{'type': 'loss', 'content': 0.20398856699466705, 'timestamp': '2025-10-02 00:26:10.113006', 'step': 8693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:10.182112', 'step': 8693, 'epoch': 1}
{'type': 'loss', 'content': 0.15649092197418213, 'timestamp': '2025-10-02 00:26:10.185426', 'step': 8694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:10.242598', 'step': 8694, 'epoch': 1}
{'type': 'loss', 'content': 0.11602406948804855, 'timestamp': '2025-10-02 00:26:10.248792', 'step': 8695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:10.318642', 'step': 8695, 'epoch': 1}
{'type': 'loss', 'content': 0.14671814441680908, 'timestamp': '2025-10-02 00:26:10.326520', 'step': 8696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:10.384121', 'step': 8696, 'epoch': 1}
{'type': 'loss', 'content': 0.27466464042663574, 'timestamp': '2025-10-02 00:26:10.387699', 'step': 8697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:10.453487', 'step': 8697, 'epoch': 1}
{'type': 'loss', 'content': 0.1258552074432373, 'timestamp': '2025-10-02 00:26:10.456742', 'step': 8698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:10.514623', 'step': 8698, 'epoch': 1}
{'type': 'loss', 'content': 0.19277577102184296, 'timestamp': '2025-10-02 00:26:10.517410', 'step': 8699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:10.577732', 'step': 8699, 'epoch': 1}
{'type': 'loss', 'content': 0.22304178774356842, 'timestamp': '2025-10-02 00:26:10.584271', 'step': 8700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:10.639948', 'step': 8700, 'epoch': 1}
{'type': 'loss', 'content': 0.08906268328428268, 'timestamp': '2025-10-02 00:26:10.642702', 'step': 8701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:10.703794', 'step': 8701, 'epoch': 1}
{'type': 'loss', 'content': 0.12972770631313324, 'timestamp': '2025-10-02 00:26:10.706699', 'step': 8702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:10.767818', 'step': 8702, 'epoch': 1}
{'type': 'loss', 'content': 0.07310763746500015, 'timestamp': '2025-10-02 00:26:10.770898', 'step': 8703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:10.832111', 'step': 8703, 'epoch': 1}
{'type': 'loss', 'content': 0.09568187594413757, 'timestamp': '2025-10-02 00:26:10.838679', 'step': 8704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:10.899619', 'step': 8704, 'epoch': 1}
{'type': 'loss', 'content': 0.051782529801130295, 'timestamp': '2025-10-02 00:26:10.909856', 'step': 8705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:10.970037', 'step': 8705, 'epoch': 1}
{'type': 'loss', 'content': 0.1997736543416977, 'timestamp': '2025-10-02 00:26:10.973117', 'step': 8706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:11.030327', 'step': 8706, 'epoch': 1}
{'type': 'loss', 'content': 0.05983767285943031, 'timestamp': '2025-10-02 00:26:11.034157', 'step': 8707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:11.094399', 'step': 8707, 'epoch': 1}
{'type': 'loss', 'content': 0.16399410367012024, 'timestamp': '2025-10-02 00:26:11.105590', 'step': 8708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:11.164088', 'step': 8708, 'epoch': 1}
{'type': 'loss', 'content': 0.024262523278594017, 'timestamp': '2025-10-02 00:26:11.167396', 'step': 8709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:11.229942', 'step': 8709, 'epoch': 1}
{'type': 'loss', 'content': 0.17340391874313354, 'timestamp': '2025-10-02 00:26:11.233405', 'step': 8710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:11.297614', 'step': 8710, 'epoch': 1}
{'type': 'loss', 'content': 0.1460692137479782, 'timestamp': '2025-10-02 00:26:11.300554', 'step': 8711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:11.368106', 'step': 8711, 'epoch': 1}
{'type': 'loss', 'content': 0.039945997297763824, 'timestamp': '2025-10-02 00:26:11.377823', 'step': 8712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:11.440098', 'step': 8712, 'epoch': 1}
{'type': 'loss', 'content': 0.049204520881175995, 'timestamp': '2025-10-02 00:26:11.450376', 'step': 8713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:11.520642', 'step': 8713, 'epoch': 1}
{'type': 'loss', 'content': 0.10499966144561768, 'timestamp': '2025-10-02 00:26:11.527768', 'step': 8714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:11.589723', 'step': 8714, 'epoch': 1}
{'type': 'loss', 'content': 0.032078683376312256, 'timestamp': '2025-10-02 00:26:11.595444', 'step': 8715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:11.652909', 'step': 8715, 'epoch': 1}
{'type': 'loss', 'content': 0.04151034727692604, 'timestamp': '2025-10-02 00:26:11.662163', 'step': 8716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:11.728426', 'step': 8716, 'epoch': 1}
{'type': 'loss', 'content': 0.045982882380485535, 'timestamp': '2025-10-02 00:26:11.739745', 'step': 8717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:11.794672', 'step': 8717, 'epoch': 1}
{'type': 'loss', 'content': 0.16331572830677032, 'timestamp': '2025-10-02 00:26:11.798228', 'step': 8718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:11.856751', 'step': 8718, 'epoch': 1}
{'type': 'loss', 'content': 0.16709910333156586, 'timestamp': '2025-10-02 00:26:11.860467', 'step': 8719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:11.917068', 'step': 8719, 'epoch': 1}
{'type': 'loss', 'content': 0.1283004879951477, 'timestamp': '2025-10-02 00:26:11.924375', 'step': 8720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:11.986249', 'step': 8720, 'epoch': 1}
{'type': 'loss', 'content': 0.1359313726425171, 'timestamp': '2025-10-02 00:26:11.990061', 'step': 8721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:12.046756', 'step': 8721, 'epoch': 1}
{'type': 'loss', 'content': 0.14160040020942688, 'timestamp': '2025-10-02 00:26:12.049756', 'step': 8722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:12.109657', 'step': 8722, 'epoch': 1}
{'type': 'loss', 'content': 0.25876930356025696, 'timestamp': '2025-10-02 00:26:12.115920', 'step': 8723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:12.173083', 'step': 8723, 'epoch': 1}
{'type': 'loss', 'content': 0.1386181265115738, 'timestamp': '2025-10-02 00:26:12.181102', 'step': 8724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:12.245629', 'step': 8724, 'epoch': 1}
{'type': 'loss', 'content': 0.06526705622673035, 'timestamp': '2025-10-02 00:26:12.251126', 'step': 8725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:12.310708', 'step': 8725, 'epoch': 1}
{'type': 'loss', 'content': 0.022277463227510452, 'timestamp': '2025-10-02 00:26:12.320267', 'step': 8726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:12.377850', 'step': 8726, 'epoch': 1}
{'type': 'loss', 'content': 0.060831762850284576, 'timestamp': '2025-10-02 00:26:12.380577', 'step': 8727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:12.438590', 'step': 8727, 'epoch': 1}
{'type': 'loss', 'content': 0.05187574028968811, 'timestamp': '2025-10-02 00:26:12.448905', 'step': 8728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:12.507011', 'step': 8728, 'epoch': 1}
{'type': 'loss', 'content': 0.06870931386947632, 'timestamp': '2025-10-02 00:26:12.509523', 'step': 8729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:12.569291', 'step': 8729, 'epoch': 1}
{'type': 'loss', 'content': 0.08315608650445938, 'timestamp': '2025-10-02 00:26:12.572052', 'step': 8730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:12.635739', 'step': 8730, 'epoch': 1}
{'type': 'loss', 'content': 0.11738703399896622, 'timestamp': '2025-10-02 00:26:12.639304', 'step': 8731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:12.701605', 'step': 8731, 'epoch': 1}
{'type': 'loss', 'content': 0.0962366834282875, 'timestamp': '2025-10-02 00:26:12.712047', 'step': 8732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:12.770731', 'step': 8732, 'epoch': 1}
{'type': 'loss', 'content': 0.18647293746471405, 'timestamp': '2025-10-02 00:26:12.777291', 'step': 8733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:26:12.847827', 'step': 8733, 'epoch': 1}
{'type': 'loss', 'content': 0.024126389995217323, 'timestamp': '2025-10-02 00:26:12.858689', 'step': 8734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:12.930338', 'step': 8734, 'epoch': 1}
{'type': 'loss', 'content': 0.05463361740112305, 'timestamp': '2025-10-02 00:26:12.937378', 'step': 8735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:13.000086', 'step': 8735, 'epoch': 1}
{'type': 'loss', 'content': 0.10798916220664978, 'timestamp': '2025-10-02 00:26:13.007117', 'step': 8736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:13.066637', 'step': 8736, 'epoch': 1}
{'type': 'loss', 'content': 0.02159823477268219, 'timestamp': '2025-10-02 00:26:13.076914', 'step': 8737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:13.148650', 'step': 8737, 'epoch': 1}
{'type': 'loss', 'content': 0.021448709070682526, 'timestamp': '2025-10-02 00:26:13.158813', 'step': 8738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:13.224663', 'step': 8738, 'epoch': 1}
{'type': 'loss', 'content': 0.04730994626879692, 'timestamp': '2025-10-02 00:26:13.231482', 'step': 8739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:13.298804', 'step': 8739, 'epoch': 1}
{'type': 'loss', 'content': 0.3578115701675415, 'timestamp': '2025-10-02 00:26:13.311675', 'step': 8740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:13.368742', 'step': 8740, 'epoch': 1}
{'type': 'loss', 'content': 0.01845967210829258, 'timestamp': '2025-10-02 00:26:13.371739', 'step': 8741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:13.427408', 'step': 8741, 'epoch': 1}
{'type': 'loss', 'content': 0.09214486926794052, 'timestamp': '2025-10-02 00:26:13.430037', 'step': 8742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:13.493751', 'step': 8742, 'epoch': 1}
{'type': 'loss', 'content': 0.026676004752516747, 'timestamp': '2025-10-02 00:26:13.504251', 'step': 8743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:13.581365', 'step': 8743, 'epoch': 1}
{'type': 'loss', 'content': 0.09365628659725189, 'timestamp': '2025-10-02 00:26:13.588805', 'step': 8744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:13.659598', 'step': 8744, 'epoch': 1}
{'type': 'loss', 'content': 0.08579222857952118, 'timestamp': '2025-10-02 00:26:13.667280', 'step': 8745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:13.733736', 'step': 8745, 'epoch': 1}
{'type': 'loss', 'content': 0.13757364451885223, 'timestamp': '2025-10-02 00:26:13.742747', 'step': 8746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:13.811092', 'step': 8746, 'epoch': 1}
{'type': 'loss', 'content': 0.013745051808655262, 'timestamp': '2025-10-02 00:26:13.821315', 'step': 8747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:13.898026', 'step': 8747, 'epoch': 1}
{'type': 'loss', 'content': 0.05938394367694855, 'timestamp': '2025-10-02 00:26:13.905619', 'step': 8748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:13.975538', 'step': 8748, 'epoch': 1}
{'type': 'loss', 'content': 0.07036308944225311, 'timestamp': '2025-10-02 00:26:13.978112', 'step': 8749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:14.037934', 'step': 8749, 'epoch': 1}
{'type': 'loss', 'content': 0.133056178689003, 'timestamp': '2025-10-02 00:26:14.040640', 'step': 8750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:14.115874', 'step': 8750, 'epoch': 1}
{'type': 'loss', 'content': 0.0424775704741478, 'timestamp': '2025-10-02 00:26:14.126071', 'step': 8751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:14.183791', 'step': 8751, 'epoch': 1}
{'type': 'loss', 'content': 0.06415092200040817, 'timestamp': '2025-10-02 00:26:14.194294', 'step': 8752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:14.267479', 'step': 8752, 'epoch': 1}
{'type': 'loss', 'content': 0.08145947754383087, 'timestamp': '2025-10-02 00:26:14.272829', 'step': 8753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:14.342913', 'step': 8753, 'epoch': 1}
{'type': 'loss', 'content': 0.07238364964723587, 'timestamp': '2025-10-02 00:26:14.350086', 'step': 8754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:14.409663', 'step': 8754, 'epoch': 1}
{'type': 'loss', 'content': 0.04343130812048912, 'timestamp': '2025-10-02 00:26:14.418798', 'step': 8755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:14.486588', 'step': 8755, 'epoch': 1}
{'type': 'loss', 'content': 0.17634952068328857, 'timestamp': '2025-10-02 00:26:14.503349', 'step': 8756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:14.591219', 'step': 8756, 'epoch': 1}
{'type': 'loss', 'content': 0.03845667839050293, 'timestamp': '2025-10-02 00:26:14.602183', 'step': 8757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:14.679269', 'step': 8757, 'epoch': 1}
{'type': 'loss', 'content': 0.052937403321266174, 'timestamp': '2025-10-02 00:26:14.682503', 'step': 8758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:14.757921', 'step': 8758, 'epoch': 1}
{'type': 'loss', 'content': 0.02969587966799736, 'timestamp': '2025-10-02 00:26:14.768170', 'step': 8759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:14.840955', 'step': 8759, 'epoch': 1}
{'type': 'loss', 'content': 0.10578913241624832, 'timestamp': '2025-10-02 00:26:14.847044', 'step': 8760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:14.905637', 'step': 8760, 'epoch': 1}
{'type': 'loss', 'content': 0.08378088474273682, 'timestamp': '2025-10-02 00:26:14.915383', 'step': 8761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:14.987026', 'step': 8761, 'epoch': 1}
{'type': 'loss', 'content': 0.17021934688091278, 'timestamp': '2025-10-02 00:26:14.990277', 'step': 8762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:15.054112', 'step': 8762, 'epoch': 1}
{'type': 'loss', 'content': 0.04077976942062378, 'timestamp': '2025-10-02 00:26:15.064570', 'step': 8763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:15.134154', 'step': 8763, 'epoch': 1}
{'type': 'loss', 'content': 0.1247967854142189, 'timestamp': '2025-10-02 00:26:15.142178', 'step': 8764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:26:15.209928', 'step': 8764, 'epoch': 1}
{'type': 'loss', 'content': 0.02534949593245983, 'timestamp': '2025-10-02 00:26:15.221510', 'step': 8765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:15.301383', 'step': 8765, 'epoch': 1}
{'type': 'loss', 'content': 0.11182957887649536, 'timestamp': '2025-10-02 00:26:15.304753', 'step': 8766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:15.368959', 'step': 8766, 'epoch': 1}
{'type': 'loss', 'content': 0.05184876546263695, 'timestamp': '2025-10-02 00:26:15.378220', 'step': 8767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:26:15.445884', 'step': 8767, 'epoch': 1}
{'type': 'loss', 'content': 0.019136497750878334, 'timestamp': '2025-10-02 00:26:15.458989', 'step': 8768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:15.515316', 'step': 8768, 'epoch': 1}
{'type': 'loss', 'content': 0.07321176677942276, 'timestamp': '2025-10-02 00:26:15.518966', 'step': 8769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:15.576458', 'step': 8769, 'epoch': 1}
{'type': 'loss', 'content': 0.03260115161538124, 'timestamp': '2025-10-02 00:26:15.579552', 'step': 8770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:15.642811', 'step': 8770, 'epoch': 1}
{'type': 'loss', 'content': 0.04821673780679703, 'timestamp': '2025-10-02 00:26:15.653347', 'step': 8771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:15.727607', 'step': 8771, 'epoch': 1}
{'type': 'loss', 'content': 0.09687314927577972, 'timestamp': '2025-10-02 00:26:15.734279', 'step': 8772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:15.815680', 'step': 8772, 'epoch': 1}
{'type': 'loss', 'content': 0.052062179893255234, 'timestamp': '2025-10-02 00:26:15.827053', 'step': 8773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:15.909066', 'step': 8773, 'epoch': 1}
{'type': 'loss', 'content': 0.039957743138074875, 'timestamp': '2025-10-02 00:26:15.914939', 'step': 8774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:15.984356', 'step': 8774, 'epoch': 1}
{'type': 'loss', 'content': 0.0908409059047699, 'timestamp': '2025-10-02 00:26:15.994136', 'step': 8775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:16.072112', 'step': 8775, 'epoch': 1}
{'type': 'loss', 'content': 0.05957167595624924, 'timestamp': '2025-10-02 00:26:16.079740', 'step': 8776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:16.148616', 'step': 8776, 'epoch': 1}
{'type': 'loss', 'content': 0.1913130283355713, 'timestamp': '2025-10-02 00:26:16.156000', 'step': 8777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:16.233910', 'step': 8777, 'epoch': 1}
{'type': 'loss', 'content': 0.08886003494262695, 'timestamp': '2025-10-02 00:26:16.238122', 'step': 8778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:16.328608', 'step': 8778, 'epoch': 1}
{'type': 'loss', 'content': 0.0802328884601593, 'timestamp': '2025-10-02 00:26:16.338089', 'step': 8779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:26:16.414388', 'step': 8779, 'epoch': 1}
{'type': 'loss', 'content': 0.019564827904105186, 'timestamp': '2025-10-02 00:26:16.425852', 'step': 8780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:26:16.484963', 'step': 8780, 'epoch': 1}
{'type': 'loss', 'content': 0.12926143407821655, 'timestamp': '2025-10-02 00:26:16.492096', 'step': 8781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:16.555374', 'step': 8781, 'epoch': 1}
{'type': 'loss', 'content': 0.23972894251346588, 'timestamp': '2025-10-02 00:26:16.561477', 'step': 8782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:16.626212', 'step': 8782, 'epoch': 1}
{'type': 'loss', 'content': 0.012591389939188957, 'timestamp': '2025-10-02 00:26:16.635778', 'step': 8783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:16.691482', 'step': 8783, 'epoch': 1}
{'type': 'loss', 'content': 0.05746375769376755, 'timestamp': '2025-10-02 00:26:16.701986', 'step': 8784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:16.765890', 'step': 8784, 'epoch': 1}
{'type': 'loss', 'content': 0.05694990232586861, 'timestamp': '2025-10-02 00:26:16.768424', 'step': 8785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:16.834486', 'step': 8785, 'epoch': 1}
{'type': 'loss', 'content': 0.07684353739023209, 'timestamp': '2025-10-02 00:26:16.840628', 'step': 8786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:16.904625', 'step': 8786, 'epoch': 1}
{'type': 'loss', 'content': 0.0960007831454277, 'timestamp': '2025-10-02 00:26:16.911403', 'step': 8787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:16.970717', 'step': 8787, 'epoch': 1}
{'type': 'loss', 'content': 0.09271132946014404, 'timestamp': '2025-10-02 00:26:16.979629', 'step': 8788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:17.040879', 'step': 8788, 'epoch': 1}
{'type': 'loss', 'content': 0.08949817717075348, 'timestamp': '2025-10-02 00:26:17.044305', 'step': 8789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:17.107050', 'step': 8789, 'epoch': 1}
{'type': 'loss', 'content': 0.12844562530517578, 'timestamp': '2025-10-02 00:26:17.109590', 'step': 8790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:17.165556', 'step': 8790, 'epoch': 1}
{'type': 'loss', 'content': 0.1523573100566864, 'timestamp': '2025-10-02 00:26:17.169036', 'step': 8791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:17.236528', 'step': 8791, 'epoch': 1}
{'type': 'loss', 'content': 0.17022819817066193, 'timestamp': '2025-10-02 00:26:17.247892', 'step': 8792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:17.311835', 'step': 8792, 'epoch': 1}
{'type': 'loss', 'content': 0.12533415853977203, 'timestamp': '2025-10-02 00:26:17.314451', 'step': 8793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:17.376647', 'step': 8793, 'epoch': 1}
{'type': 'loss', 'content': 0.12219103425741196, 'timestamp': '2025-10-02 00:26:17.384216', 'step': 8794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:17.454943', 'step': 8794, 'epoch': 1}
{'type': 'loss', 'content': 0.06721725314855576, 'timestamp': '2025-10-02 00:26:17.462238', 'step': 8795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:17.535466', 'step': 8795, 'epoch': 1}
{'type': 'loss', 'content': 0.028795680031180382, 'timestamp': '2025-10-02 00:26:17.547000', 'step': 8796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:17.613413', 'step': 8796, 'epoch': 1}
{'type': 'loss', 'content': 0.10902109742164612, 'timestamp': '2025-10-02 00:26:17.621283', 'step': 8797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:17.693176', 'step': 8797, 'epoch': 1}
{'type': 'loss', 'content': 0.04485291242599487, 'timestamp': '2025-10-02 00:26:17.701863', 'step': 8798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:17.769894', 'step': 8798, 'epoch': 1}
{'type': 'loss', 'content': 0.09921328723430634, 'timestamp': '2025-10-02 00:26:17.780416', 'step': 8799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:17.842268', 'step': 8799, 'epoch': 1}
{'type': 'loss', 'content': 0.08929791301488876, 'timestamp': '2025-10-02 00:26:17.848802', 'step': 8800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:17.910216', 'step': 8800, 'epoch': 1}
{'type': 'loss', 'content': 0.07044348120689392, 'timestamp': '2025-10-02 00:26:17.915993', 'step': 8801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:17.979595', 'step': 8801, 'epoch': 1}
{'type': 'loss', 'content': 0.07974661886692047, 'timestamp': '2025-10-02 00:26:17.988883', 'step': 8802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:18.046002', 'step': 8802, 'epoch': 1}
{'type': 'loss', 'content': 0.08103484660387039, 'timestamp': '2025-10-02 00:26:18.051735', 'step': 8803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:18.116650', 'step': 8803, 'epoch': 1}
{'type': 'loss', 'content': 0.07548399269580841, 'timestamp': '2025-10-02 00:26:18.122837', 'step': 8804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:18.178589', 'step': 8804, 'epoch': 1}
{'type': 'loss', 'content': 0.02172478847205639, 'timestamp': '2025-10-02 00:26:18.186093', 'step': 8805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:18.256281', 'step': 8805, 'epoch': 1}
{'type': 'loss', 'content': 0.041223034262657166, 'timestamp': '2025-10-02 00:26:18.264515', 'step': 8806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:18.327745', 'step': 8806, 'epoch': 1}
{'type': 'loss', 'content': 0.09298723936080933, 'timestamp': '2025-10-02 00:26:18.336883', 'step': 8807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:18.416334', 'step': 8807, 'epoch': 1}
{'type': 'loss', 'content': 0.05563292279839516, 'timestamp': '2025-10-02 00:26:18.427348', 'step': 8808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:18.504907', 'step': 8808, 'epoch': 1}
{'type': 'loss', 'content': 0.13285410404205322, 'timestamp': '2025-10-02 00:26:18.513468', 'step': 8809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:26:18.606398', 'step': 8809, 'epoch': 1}
{'type': 'loss', 'content': 0.02660466730594635, 'timestamp': '2025-10-02 00:26:18.621217', 'step': 8810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:18.689434', 'step': 8810, 'epoch': 1}
{'type': 'loss', 'content': 0.061607204377651215, 'timestamp': '2025-10-02 00:26:18.693231', 'step': 8811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:18.755633', 'step': 8811, 'epoch': 1}
{'type': 'loss', 'content': 0.2559438645839691, 'timestamp': '2025-10-02 00:26:18.763508', 'step': 8812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:18.821588', 'step': 8812, 'epoch': 1}
{'type': 'loss', 'content': 0.09135834872722626, 'timestamp': '2025-10-02 00:26:18.825276', 'step': 8813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:18.887162', 'step': 8813, 'epoch': 1}
{'type': 'loss', 'content': 0.05971940979361534, 'timestamp': '2025-10-02 00:26:18.894635', 'step': 8814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:18.951476', 'step': 8814, 'epoch': 1}
{'type': 'loss', 'content': 0.22270233929157257, 'timestamp': '2025-10-02 00:26:18.961576', 'step': 8815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:19.019195', 'step': 8815, 'epoch': 1}
{'type': 'loss', 'content': 0.04407691955566406, 'timestamp': '2025-10-02 00:26:19.025346', 'step': 8816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:19.085708', 'step': 8816, 'epoch': 1}
{'type': 'loss', 'content': 0.10751001536846161, 'timestamp': '2025-10-02 00:26:19.093848', 'step': 8817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:19.166594', 'step': 8817, 'epoch': 1}
{'type': 'loss', 'content': 0.0938316360116005, 'timestamp': '2025-10-02 00:26:19.174450', 'step': 8818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:19.252133', 'step': 8818, 'epoch': 1}
{'type': 'loss', 'content': 0.026959000155329704, 'timestamp': '2025-10-02 00:26:19.262654', 'step': 8819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:19.321244', 'step': 8819, 'epoch': 1}
{'type': 'loss', 'content': 0.13982240855693817, 'timestamp': '2025-10-02 00:26:19.332853', 'step': 8820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:19.390769', 'step': 8820, 'epoch': 1}
{'type': 'loss', 'content': 0.045211371034383774, 'timestamp': '2025-10-02 00:26:19.400146', 'step': 8821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:19.464901', 'step': 8821, 'epoch': 1}
{'type': 'loss', 'content': 0.14126789569854736, 'timestamp': '2025-10-02 00:26:19.468465', 'step': 8822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:19.540358', 'step': 8822, 'epoch': 1}
{'type': 'loss', 'content': 0.09003244340419769, 'timestamp': '2025-10-02 00:26:19.546105', 'step': 8823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:19.603357', 'step': 8823, 'epoch': 1}
{'type': 'loss', 'content': 0.10327140986919403, 'timestamp': '2025-10-02 00:26:19.609731', 'step': 8824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:19.672927', 'step': 8824, 'epoch': 1}
{'type': 'loss', 'content': 0.03771840035915375, 'timestamp': '2025-10-02 00:26:19.682455', 'step': 8825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:19.752244', 'step': 8825, 'epoch': 1}
{'type': 'loss', 'content': 0.055227335542440414, 'timestamp': '2025-10-02 00:26:19.762363', 'step': 8826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:19.832837', 'step': 8826, 'epoch': 1}
{'type': 'loss', 'content': 0.08206046372652054, 'timestamp': '2025-10-02 00:26:19.835295', 'step': 8827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:19.896620', 'step': 8827, 'epoch': 1}
{'type': 'loss', 'content': 0.044112857431173325, 'timestamp': '2025-10-02 00:26:19.907879', 'step': 8828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:19.972391', 'step': 8828, 'epoch': 1}
{'type': 'loss', 'content': 0.041407156735658646, 'timestamp': '2025-10-02 00:26:19.978296', 'step': 8829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:20.035113', 'step': 8829, 'epoch': 1}
{'type': 'loss', 'content': 0.12700673937797546, 'timestamp': '2025-10-02 00:26:20.037274', 'step': 8830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:20.090959', 'step': 8830, 'epoch': 1}
{'type': 'loss', 'content': 0.12165812402963638, 'timestamp': '2025-10-02 00:26:20.093477', 'step': 8831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:20.147805', 'step': 8831, 'epoch': 1}
{'type': 'loss', 'content': 0.0464404858648777, 'timestamp': '2025-10-02 00:26:20.153644', 'step': 8832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:20.208306', 'step': 8832, 'epoch': 1}
{'type': 'loss', 'content': 0.08055708557367325, 'timestamp': '2025-10-02 00:26:20.210926', 'step': 8833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:20.265598', 'step': 8833, 'epoch': 1}
{'type': 'loss', 'content': 0.05805690214037895, 'timestamp': '2025-10-02 00:26:20.269468', 'step': 8834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:20.324287', 'step': 8834, 'epoch': 1}
{'type': 'loss', 'content': 0.06117004156112671, 'timestamp': '2025-10-02 00:26:20.326814', 'step': 8835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:20.381710', 'step': 8835, 'epoch': 1}
{'type': 'loss', 'content': 0.04281426966190338, 'timestamp': '2025-10-02 00:26:20.387592', 'step': 8836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:20.440678', 'step': 8836, 'epoch': 1}
{'type': 'loss', 'content': 0.1467837244272232, 'timestamp': '2025-10-02 00:26:20.443025', 'step': 8837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:20.497208', 'step': 8837, 'epoch': 1}
{'type': 'loss', 'content': 0.1275247037410736, 'timestamp': '2025-10-02 00:26:20.499477', 'step': 8838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:20.553251', 'step': 8838, 'epoch': 1}
{'type': 'loss', 'content': 0.16444173455238342, 'timestamp': '2025-10-02 00:26:20.555528', 'step': 8839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:20.611102', 'step': 8839, 'epoch': 1}
{'type': 'loss', 'content': 0.05081792175769806, 'timestamp': '2025-10-02 00:26:20.616867', 'step': 8840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:20.671302', 'step': 8840, 'epoch': 1}
{'type': 'loss', 'content': 0.07885883003473282, 'timestamp': '2025-10-02 00:26:20.678664', 'step': 8841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:26:20.746883', 'step': 8841, 'epoch': 1}
{'type': 'loss', 'content': 0.02420920692384243, 'timestamp': '2025-10-02 00:26:20.758899', 'step': 8842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:26:20.820423', 'step': 8842, 'epoch': 1}
{'type': 'loss', 'content': 0.040083881467580795, 'timestamp': '2025-10-02 00:26:20.831087', 'step': 8843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:20.885584', 'step': 8843, 'epoch': 1}
{'type': 'loss', 'content': 0.035966552793979645, 'timestamp': '2025-10-02 00:26:20.891294', 'step': 8844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:20.945451', 'step': 8844, 'epoch': 1}
{'type': 'loss', 'content': 0.06854481250047684, 'timestamp': '2025-10-02 00:26:20.947744', 'step': 8845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:21.005401', 'step': 8845, 'epoch': 1}
{'type': 'loss', 'content': 0.10660124570131302, 'timestamp': '2025-10-02 00:26:21.008058', 'step': 8846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:21.062368', 'step': 8846, 'epoch': 1}
{'type': 'loss', 'content': 0.15519209206104279, 'timestamp': '2025-10-02 00:26:21.064863', 'step': 8847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:21.119926', 'step': 8847, 'epoch': 1}
{'type': 'loss', 'content': 0.059189047664403915, 'timestamp': '2025-10-02 00:26:21.125869', 'step': 8848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:21.180026', 'step': 8848, 'epoch': 1}
{'type': 'loss', 'content': 0.0583127997815609, 'timestamp': '2025-10-02 00:26:21.185751', 'step': 8849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:21.239859', 'step': 8849, 'epoch': 1}
{'type': 'loss', 'content': 0.13407854735851288, 'timestamp': '2025-10-02 00:26:21.242902', 'step': 8850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:21.298143', 'step': 8850, 'epoch': 1}
{'type': 'loss', 'content': 0.019568949937820435, 'timestamp': '2025-10-02 00:26:21.305179', 'step': 8851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:21.359052', 'step': 8851, 'epoch': 1}
{'type': 'loss', 'content': 0.15547525882720947, 'timestamp': '2025-10-02 00:26:21.365013', 'step': 8852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:21.418951', 'step': 8852, 'epoch': 1}
{'type': 'loss', 'content': 0.22076719999313354, 'timestamp': '2025-10-02 00:26:21.421215', 'step': 8853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:21.480518', 'step': 8853, 'epoch': 1}
{'type': 'loss', 'content': 0.026697425171732903, 'timestamp': '2025-10-02 00:26:21.490719', 'step': 8854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:21.552503', 'step': 8854, 'epoch': 1}
{'type': 'loss', 'content': 0.04329174384474754, 'timestamp': '2025-10-02 00:26:21.562980', 'step': 8855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:21.617498', 'step': 8855, 'epoch': 1}
{'type': 'loss', 'content': 0.1630103588104248, 'timestamp': '2025-10-02 00:26:21.624348', 'step': 8856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:21.678737', 'step': 8856, 'epoch': 1}
{'type': 'loss', 'content': 0.11738709360361099, 'timestamp': '2025-10-02 00:26:21.688023', 'step': 8857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:21.741835', 'step': 8857, 'epoch': 1}
{'type': 'loss', 'content': 0.11034572869539261, 'timestamp': '2025-10-02 00:26:21.744016', 'step': 8858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:21.798764', 'step': 8858, 'epoch': 1}
{'type': 'loss', 'content': 0.07239867746829987, 'timestamp': '2025-10-02 00:26:21.801140', 'step': 8859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:21.856930', 'step': 8859, 'epoch': 1}
{'type': 'loss', 'content': 0.030815312638878822, 'timestamp': '2025-10-02 00:26:21.862971', 'step': 8860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:21.917286', 'step': 8860, 'epoch': 1}
{'type': 'loss', 'content': 0.035374801605939865, 'timestamp': '2025-10-02 00:26:21.923135', 'step': 8861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:21.977259', 'step': 8861, 'epoch': 1}
{'type': 'loss', 'content': 0.10950716584920883, 'timestamp': '2025-10-02 00:26:21.979639', 'step': 8862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:22.033294', 'step': 8862, 'epoch': 1}
{'type': 'loss', 'content': 0.07993853092193604, 'timestamp': '2025-10-02 00:26:22.035556', 'step': 8863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:22.089587', 'step': 8863, 'epoch': 1}
{'type': 'loss', 'content': 0.15088032186031342, 'timestamp': '2025-10-02 00:26:22.096209', 'step': 8864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:22.150033', 'step': 8864, 'epoch': 1}
{'type': 'loss', 'content': 0.028590012341737747, 'timestamp': '2025-10-02 00:26:22.152177', 'step': 8865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:22.206059', 'step': 8865, 'epoch': 1}
{'type': 'loss', 'content': 0.10061163455247879, 'timestamp': '2025-10-02 00:26:22.208556', 'step': 8866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:22.263244', 'step': 8866, 'epoch': 1}
{'type': 'loss', 'content': 0.029550841078162193, 'timestamp': '2025-10-02 00:26:22.272197', 'step': 8867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:22.327096', 'step': 8867, 'epoch': 1}
{'type': 'loss', 'content': 0.06254307180643082, 'timestamp': '2025-10-02 00:26:22.333098', 'step': 8868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:22.386903', 'step': 8868, 'epoch': 1}
{'type': 'loss', 'content': 0.16871388256549835, 'timestamp': '2025-10-02 00:26:22.389530', 'step': 8869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:22.444915', 'step': 8869, 'epoch': 1}
{'type': 'loss', 'content': 0.037228986620903015, 'timestamp': '2025-10-02 00:26:22.447453', 'step': 8870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:22.503379', 'step': 8870, 'epoch': 1}
{'type': 'loss', 'content': 0.04803406074643135, 'timestamp': '2025-10-02 00:26:22.508826', 'step': 8871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:22.562499', 'step': 8871, 'epoch': 1}
{'type': 'loss', 'content': 0.1213591992855072, 'timestamp': '2025-10-02 00:26:22.568470', 'step': 8872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:22.622307', 'step': 8872, 'epoch': 1}
{'type': 'loss', 'content': 0.040387075394392014, 'timestamp': '2025-10-02 00:26:22.624309', 'step': 8873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:22.679662', 'step': 8873, 'epoch': 1}
{'type': 'loss', 'content': 0.047565240412950516, 'timestamp': '2025-10-02 00:26:22.689109', 'step': 8874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:22.744140', 'step': 8874, 'epoch': 1}
{'type': 'loss', 'content': 0.11097242683172226, 'timestamp': '2025-10-02 00:26:22.746619', 'step': 8875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:26:22.815688', 'step': 8875, 'epoch': 1}
{'type': 'loss', 'content': 0.06996431201696396, 'timestamp': '2025-10-02 00:26:22.828826', 'step': 8876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:22.882806', 'step': 8876, 'epoch': 1}
{'type': 'loss', 'content': 0.20090848207473755, 'timestamp': '2025-10-02 00:26:22.885149', 'step': 8877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:22.939142', 'step': 8877, 'epoch': 1}
{'type': 'loss', 'content': 0.042790867388248444, 'timestamp': '2025-10-02 00:26:22.941633', 'step': 8878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:22.996040', 'step': 8878, 'epoch': 1}
{'type': 'loss', 'content': 0.06167076528072357, 'timestamp': '2025-10-02 00:26:22.998381', 'step': 8879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:23.052250', 'step': 8879, 'epoch': 1}
{'type': 'loss', 'content': 0.054690342396497726, 'timestamp': '2025-10-02 00:26:23.058369', 'step': 8880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:23.111923', 'step': 8880, 'epoch': 1}
{'type': 'loss', 'content': 0.06997490674257278, 'timestamp': '2025-10-02 00:26:23.114297', 'step': 8881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:23.167388', 'step': 8881, 'epoch': 1}
{'type': 'loss', 'content': 0.12838514149188995, 'timestamp': '2025-10-02 00:26:23.173427', 'step': 8882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:26:23.242451', 'step': 8882, 'epoch': 1}
{'type': 'loss', 'content': 0.035686884075403214, 'timestamp': '2025-10-02 00:26:23.253117', 'step': 8883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:23.308930', 'step': 8883, 'epoch': 1}
{'type': 'loss', 'content': 0.12525007128715515, 'timestamp': '2025-10-02 00:26:23.314799', 'step': 8884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:23.369485', 'step': 8884, 'epoch': 1}
{'type': 'loss', 'content': 0.049869779497385025, 'timestamp': '2025-10-02 00:26:23.379730', 'step': 8885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:23.434097', 'step': 8885, 'epoch': 1}
{'type': 'loss', 'content': 0.13685335218906403, 'timestamp': '2025-10-02 00:26:23.436730', 'step': 8886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:23.492288', 'step': 8886, 'epoch': 1}
{'type': 'loss', 'content': 0.08863523602485657, 'timestamp': '2025-10-02 00:26:23.494909', 'step': 8887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:23.549779', 'step': 8887, 'epoch': 1}
{'type': 'loss', 'content': 0.202002614736557, 'timestamp': '2025-10-02 00:26:23.555800', 'step': 8888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:23.609624', 'step': 8888, 'epoch': 1}
{'type': 'loss', 'content': 0.05341176688671112, 'timestamp': '2025-10-02 00:26:23.612905', 'step': 8889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:23.669398', 'step': 8889, 'epoch': 1}
{'type': 'loss', 'content': 0.1287052482366562, 'timestamp': '2025-10-02 00:26:23.671975', 'step': 8890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:23.726581', 'step': 8890, 'epoch': 1}
{'type': 'loss', 'content': 0.01566937193274498, 'timestamp': '2025-10-02 00:26:23.728894', 'step': 8891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:23.784594', 'step': 8891, 'epoch': 1}
{'type': 'loss', 'content': 0.07045993208885193, 'timestamp': '2025-10-02 00:26:23.790170', 'step': 8892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:23.846947', 'step': 8892, 'epoch': 1}
{'type': 'loss', 'content': 0.27958518266677856, 'timestamp': '2025-10-02 00:26:23.849236', 'step': 8893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:23.902404', 'step': 8893, 'epoch': 1}
{'type': 'loss', 'content': 0.09562178701162338, 'timestamp': '2025-10-02 00:26:23.904826', 'step': 8894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:26:23.974212', 'step': 8894, 'epoch': 1}
{'type': 'loss', 'content': 0.024867458269000053, 'timestamp': '2025-10-02 00:26:23.986524', 'step': 8895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:24.041045', 'step': 8895, 'epoch': 1}
{'type': 'loss', 'content': 0.00654871528968215, 'timestamp': '2025-10-02 00:26:24.049309', 'step': 8896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:24.103999', 'step': 8896, 'epoch': 1}
{'type': 'loss', 'content': 0.07639618217945099, 'timestamp': '2025-10-02 00:26:24.109519', 'step': 8897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:26:24.174339', 'step': 8897, 'epoch': 1}
{'type': 'loss', 'content': 0.029921304434537888, 'timestamp': '2025-10-02 00:26:24.184993', 'step': 8898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:24.240261', 'step': 8898, 'epoch': 1}
{'type': 'loss', 'content': 0.02957461029291153, 'timestamp': '2025-10-02 00:26:24.242693', 'step': 8899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:24.297681', 'step': 8899, 'epoch': 1}
{'type': 'loss', 'content': 0.19412948191165924, 'timestamp': '2025-10-02 00:26:24.304005', 'step': 8900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:26:24.365272', 'step': 8900, 'epoch': 1}
{'type': 'loss', 'content': 0.033779457211494446, 'timestamp': '2025-10-02 00:26:24.377042', 'step': 8901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:24.432233', 'step': 8901, 'epoch': 1}
{'type': 'loss', 'content': 0.11138144135475159, 'timestamp': '2025-10-02 00:26:24.439450', 'step': 8902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:24.494312', 'step': 8902, 'epoch': 1}
{'type': 'loss', 'content': 0.05761668086051941, 'timestamp': '2025-10-02 00:26:24.496820', 'step': 8903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:24.550264', 'step': 8903, 'epoch': 1}
{'type': 'loss', 'content': 0.2719464600086212, 'timestamp': '2025-10-02 00:26:24.556002', 'step': 8904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:24.609103', 'step': 8904, 'epoch': 1}
{'type': 'loss', 'content': 0.12194065004587173, 'timestamp': '2025-10-02 00:26:24.611428', 'step': 8905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:24.665134', 'step': 8905, 'epoch': 1}
{'type': 'loss', 'content': 0.01600085012614727, 'timestamp': '2025-10-02 00:26:24.667379', 'step': 8906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:24.721951', 'step': 8906, 'epoch': 1}
{'type': 'loss', 'content': 0.032722778618335724, 'timestamp': '2025-10-02 00:26:24.725629', 'step': 8907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:24.780160', 'step': 8907, 'epoch': 1}
{'type': 'loss', 'content': 0.05683094263076782, 'timestamp': '2025-10-02 00:26:24.786314', 'step': 8908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:24.839957', 'step': 8908, 'epoch': 1}
{'type': 'loss', 'content': 0.03771361708641052, 'timestamp': '2025-10-02 00:26:24.845806', 'step': 8909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:24.901027', 'step': 8909, 'epoch': 1}
{'type': 'loss', 'content': 0.010408859699964523, 'timestamp': '2025-10-02 00:26:24.910388', 'step': 8910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:24.964507', 'step': 8910, 'epoch': 1}
{'type': 'loss', 'content': 0.056147824972867966, 'timestamp': '2025-10-02 00:26:24.971663', 'step': 8911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:25.026687', 'step': 8911, 'epoch': 1}
{'type': 'loss', 'content': 0.020392650738358498, 'timestamp': '2025-10-02 00:26:25.033578', 'step': 8912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:25.087101', 'step': 8912, 'epoch': 1}
{'type': 'loss', 'content': 0.15154780447483063, 'timestamp': '2025-10-02 00:26:25.089685', 'step': 8913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:25.144002', 'step': 8913, 'epoch': 1}
{'type': 'loss', 'content': 0.17224976420402527, 'timestamp': '2025-10-02 00:26:25.146023', 'step': 8914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:25.199775', 'step': 8914, 'epoch': 1}
{'type': 'loss', 'content': 0.1257806420326233, 'timestamp': '2025-10-02 00:26:25.202275', 'step': 8915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:25.256267', 'step': 8915, 'epoch': 1}
{'type': 'loss', 'content': 0.028454184532165527, 'timestamp': '2025-10-02 00:26:25.262912', 'step': 8916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:25.317126', 'step': 8916, 'epoch': 1}
{'type': 'loss', 'content': 0.0699104517698288, 'timestamp': '2025-10-02 00:26:25.319302', 'step': 8917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:25.373410', 'step': 8917, 'epoch': 1}
{'type': 'loss', 'content': 0.08108393102884293, 'timestamp': '2025-10-02 00:26:25.375522', 'step': 8918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:25.429633', 'step': 8918, 'epoch': 1}
{'type': 'loss', 'content': 0.07755576074123383, 'timestamp': '2025-10-02 00:26:25.438999', 'step': 8919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:25.493944', 'step': 8919, 'epoch': 1}
{'type': 'loss', 'content': 0.02496134303510189, 'timestamp': '2025-10-02 00:26:25.500148', 'step': 8920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:25.553364', 'step': 8920, 'epoch': 1}
{'type': 'loss', 'content': 0.18815597891807556, 'timestamp': '2025-10-02 00:26:25.555740', 'step': 8921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:25.617244', 'step': 8921, 'epoch': 1}
{'type': 'loss', 'content': 0.0124661298468709, 'timestamp': '2025-10-02 00:26:25.627714', 'step': 8922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:25.682328', 'step': 8922, 'epoch': 1}
{'type': 'loss', 'content': 0.08290508389472961, 'timestamp': '2025-10-02 00:26:25.684754', 'step': 8923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:25.738806', 'step': 8923, 'epoch': 1}
{'type': 'loss', 'content': 0.10997132211923599, 'timestamp': '2025-10-02 00:26:25.744919', 'step': 8924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:25.798255', 'step': 8924, 'epoch': 1}
{'type': 'loss', 'content': 0.11842332780361176, 'timestamp': '2025-10-02 00:26:25.800283', 'step': 8925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:25.853693', 'step': 8925, 'epoch': 1}
{'type': 'loss', 'content': 0.08760666847229004, 'timestamp': '2025-10-02 00:26:25.855997', 'step': 8926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:25.910450', 'step': 8926, 'epoch': 1}
{'type': 'loss', 'content': 0.04666094854474068, 'timestamp': '2025-10-02 00:26:25.912762', 'step': 8927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:25.966867', 'step': 8927, 'epoch': 1}
{'type': 'loss', 'content': 0.05222098529338837, 'timestamp': '2025-10-02 00:26:25.972974', 'step': 8928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:26.026832', 'step': 8928, 'epoch': 1}
{'type': 'loss', 'content': 0.06967636197805405, 'timestamp': '2025-10-02 00:26:26.034303', 'step': 8929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:26.089654', 'step': 8929, 'epoch': 1}
{'type': 'loss', 'content': 0.01949773170053959, 'timestamp': '2025-10-02 00:26:26.091863', 'step': 8930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:26.147420', 'step': 8930, 'epoch': 1}
{'type': 'loss', 'content': 0.032752249389886856, 'timestamp': '2025-10-02 00:26:26.156982', 'step': 8931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:26.212634', 'step': 8931, 'epoch': 1}
{'type': 'loss', 'content': 0.09481624513864517, 'timestamp': '2025-10-02 00:26:26.219112', 'step': 8932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:26.277290', 'step': 8932, 'epoch': 1}
{'type': 'loss', 'content': 0.041505053639411926, 'timestamp': '2025-10-02 00:26:26.284740', 'step': 8933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:26.342215', 'step': 8933, 'epoch': 1}
{'type': 'loss', 'content': 0.04096631333231926, 'timestamp': '2025-10-02 00:26:26.351362', 'step': 8934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:26.408591', 'step': 8934, 'epoch': 1}
{'type': 'loss', 'content': 0.16837221384048462, 'timestamp': '2025-10-02 00:26:26.411591', 'step': 8935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:26.466691', 'step': 8935, 'epoch': 1}
{'type': 'loss', 'content': 0.07387272268533707, 'timestamp': '2025-10-02 00:26:26.472938', 'step': 8936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:26.527770', 'step': 8936, 'epoch': 1}
{'type': 'loss', 'content': 0.12099052965641022, 'timestamp': '2025-10-02 00:26:26.530385', 'step': 8937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:26.594591', 'step': 8937, 'epoch': 1}
{'type': 'loss', 'content': 0.07555459439754486, 'timestamp': '2025-10-02 00:26:26.601212', 'step': 8938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:26.661356', 'step': 8938, 'epoch': 1}
{'type': 'loss', 'content': 0.03264927864074707, 'timestamp': '2025-10-02 00:26:26.667195', 'step': 8939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:26.725588', 'step': 8939, 'epoch': 1}
{'type': 'loss', 'content': 0.25967657566070557, 'timestamp': '2025-10-02 00:26:26.732204', 'step': 8940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:26.787010', 'step': 8940, 'epoch': 1}
{'type': 'loss', 'content': 0.10393750667572021, 'timestamp': '2025-10-02 00:26:26.790645', 'step': 8941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:26.847467', 'step': 8941, 'epoch': 1}
{'type': 'loss', 'content': 0.11618942767381668, 'timestamp': '2025-10-02 00:26:26.850463', 'step': 8942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:26.908374', 'step': 8942, 'epoch': 1}
{'type': 'loss', 'content': 0.09201464056968689, 'timestamp': '2025-10-02 00:26:26.914140', 'step': 8943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:26:26.986400', 'step': 8943, 'epoch': 1}
{'type': 'loss', 'content': 0.038126397877931595, 'timestamp': '2025-10-02 00:26:26.999837', 'step': 8944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:27.054720', 'step': 8944, 'epoch': 1}
{'type': 'loss', 'content': 0.10801719129085541, 'timestamp': '2025-10-02 00:26:27.059131', 'step': 8945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:27.119786', 'step': 8945, 'epoch': 1}
{'type': 'loss', 'content': 0.03032793663442135, 'timestamp': '2025-10-02 00:26:27.129953', 'step': 8946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:27.186340', 'step': 8946, 'epoch': 1}
{'type': 'loss', 'content': 0.12282264232635498, 'timestamp': '2025-10-02 00:26:27.189164', 'step': 8947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:27.245146', 'step': 8947, 'epoch': 1}
{'type': 'loss', 'content': 0.06962092220783234, 'timestamp': '2025-10-02 00:26:27.251852', 'step': 8948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:27.314200', 'step': 8948, 'epoch': 1}
{'type': 'loss', 'content': 0.057887714356184006, 'timestamp': '2025-10-02 00:26:27.325482', 'step': 8949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:27.381557', 'step': 8949, 'epoch': 1}
{'type': 'loss', 'content': 0.035752248018980026, 'timestamp': '2025-10-02 00:26:27.387225', 'step': 8950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:27.443985', 'step': 8950, 'epoch': 1}
{'type': 'loss', 'content': 0.021560175344347954, 'timestamp': '2025-10-02 00:26:27.446836', 'step': 8951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:27.501827', 'step': 8951, 'epoch': 1}
{'type': 'loss', 'content': 0.1112275943160057, 'timestamp': '2025-10-02 00:26:27.508507', 'step': 8952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:27.562634', 'step': 8952, 'epoch': 1}
{'type': 'loss', 'content': 0.046420324593782425, 'timestamp': '2025-10-02 00:26:27.568621', 'step': 8953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:27.625069', 'step': 8953, 'epoch': 1}
{'type': 'loss', 'content': 0.023126468062400818, 'timestamp': '2025-10-02 00:26:27.634448', 'step': 8954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:27.690471', 'step': 8954, 'epoch': 1}
{'type': 'loss', 'content': 0.0477689690887928, 'timestamp': '2025-10-02 00:26:27.693420', 'step': 8955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:27.748977', 'step': 8955, 'epoch': 1}
{'type': 'loss', 'content': 0.10412757098674774, 'timestamp': '2025-10-02 00:26:27.755514', 'step': 8956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:27.813335', 'step': 8956, 'epoch': 1}
{'type': 'loss', 'content': 0.011940190568566322, 'timestamp': '2025-10-02 00:26:27.820906', 'step': 8957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:27.879250', 'step': 8957, 'epoch': 1}
{'type': 'loss', 'content': 0.08404006063938141, 'timestamp': '2025-10-02 00:26:27.881904', 'step': 8958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:27.935782', 'step': 8958, 'epoch': 1}
{'type': 'loss', 'content': 0.09146644175052643, 'timestamp': '2025-10-02 00:26:27.938236', 'step': 8959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:27.992101', 'step': 8959, 'epoch': 1}
{'type': 'loss', 'content': 0.05901072919368744, 'timestamp': '2025-10-02 00:26:27.998090', 'step': 8960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:28.052164', 'step': 8960, 'epoch': 1}
{'type': 'loss', 'content': 0.06914740055799484, 'timestamp': '2025-10-02 00:26:28.059711', 'step': 8961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:28.113983', 'step': 8961, 'epoch': 1}
{'type': 'loss', 'content': 0.06605802476406097, 'timestamp': '2025-10-02 00:26:28.116462', 'step': 8962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:28.173037', 'step': 8962, 'epoch': 1}
{'type': 'loss', 'content': 0.060879044234752655, 'timestamp': '2025-10-02 00:26:28.175282', 'step': 8963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:28.231858', 'step': 8963, 'epoch': 1}
{'type': 'loss', 'content': 0.06692183017730713, 'timestamp': '2025-10-02 00:26:28.240825', 'step': 8964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:26:28.301255', 'step': 8964, 'epoch': 1}
{'type': 'loss', 'content': 0.07110784202814102, 'timestamp': '2025-10-02 00:26:28.312748', 'step': 8965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:28.367657', 'step': 8965, 'epoch': 1}
{'type': 'loss', 'content': 0.11741193383932114, 'timestamp': '2025-10-02 00:26:28.375060', 'step': 8966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:26:28.437453', 'step': 8966, 'epoch': 1}
{'type': 'loss', 'content': 0.03669029474258423, 'timestamp': '2025-10-02 00:26:28.448127', 'step': 8967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:28.502808', 'step': 8967, 'epoch': 1}
{'type': 'loss', 'content': 0.09828107059001923, 'timestamp': '2025-10-02 00:26:28.509592', 'step': 8968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:28.562699', 'step': 8968, 'epoch': 1}
{'type': 'loss', 'content': 0.13781549036502838, 'timestamp': '2025-10-02 00:26:28.565102', 'step': 8969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:28.624283', 'step': 8969, 'epoch': 1}
{'type': 'loss', 'content': 0.014696384780108929, 'timestamp': '2025-10-02 00:26:28.634486', 'step': 8970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:28.689372', 'step': 8970, 'epoch': 1}
{'type': 'loss', 'content': 0.06460355967283249, 'timestamp': '2025-10-02 00:26:28.696733', 'step': 8971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:28.758379', 'step': 8971, 'epoch': 1}
{'type': 'loss', 'content': 0.01886122301220894, 'timestamp': '2025-10-02 00:26:28.769648', 'step': 8972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:28.823298', 'step': 8972, 'epoch': 1}
{'type': 'loss', 'content': 0.23918457329273224, 'timestamp': '2025-10-02 00:26:28.825476', 'step': 8973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:28.883187', 'step': 8973, 'epoch': 1}
{'type': 'loss', 'content': 0.07334780693054199, 'timestamp': '2025-10-02 00:26:28.885832', 'step': 8974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:28.940621', 'step': 8974, 'epoch': 1}
{'type': 'loss', 'content': 0.10684884339570999, 'timestamp': '2025-10-02 00:26:28.942946', 'step': 8975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:29.001093', 'step': 8975, 'epoch': 1}
{'type': 'loss', 'content': 0.05070897191762924, 'timestamp': '2025-10-02 00:26:29.007003', 'step': 8976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:29.060133', 'step': 8976, 'epoch': 1}
{'type': 'loss', 'content': 0.2537901997566223, 'timestamp': '2025-10-02 00:26:29.062548', 'step': 8977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:29.116056', 'step': 8977, 'epoch': 1}
{'type': 'loss', 'content': 0.13121241331100464, 'timestamp': '2025-10-02 00:26:29.118757', 'step': 8978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:29.181851', 'step': 8978, 'epoch': 1}
{'type': 'loss', 'content': 0.05365981161594391, 'timestamp': '2025-10-02 00:26:29.194649', 'step': 8979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:29.256492', 'step': 8979, 'epoch': 1}
{'type': 'loss', 'content': 0.052409008145332336, 'timestamp': '2025-10-02 00:26:29.264756', 'step': 8980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:29.318731', 'step': 8980, 'epoch': 1}
{'type': 'loss', 'content': 0.06215445324778557, 'timestamp': '2025-10-02 00:26:29.320645', 'step': 8981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:29.374477', 'step': 8981, 'epoch': 1}
{'type': 'loss', 'content': 0.11474763602018356, 'timestamp': '2025-10-02 00:26:29.376849', 'step': 8982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:29.431791', 'step': 8982, 'epoch': 1}
{'type': 'loss', 'content': 0.06996068358421326, 'timestamp': '2025-10-02 00:26:29.437737', 'step': 8983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:29.493302', 'step': 8983, 'epoch': 1}
{'type': 'loss', 'content': 0.24298398196697235, 'timestamp': '2025-10-02 00:26:29.499522', 'step': 8984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:29.554455', 'step': 8984, 'epoch': 1}
{'type': 'loss', 'content': 0.12820571660995483, 'timestamp': '2025-10-02 00:26:29.563616', 'step': 8985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:29.620985', 'step': 8985, 'epoch': 1}
{'type': 'loss', 'content': 0.17013807594776154, 'timestamp': '2025-10-02 00:26:29.623228', 'step': 8986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:26:29.685670', 'step': 8986, 'epoch': 1}
{'type': 'loss', 'content': 0.03862734138965607, 'timestamp': '2025-10-02 00:26:29.696512', 'step': 8987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:29.750071', 'step': 8987, 'epoch': 1}
{'type': 'loss', 'content': 0.10327453911304474, 'timestamp': '2025-10-02 00:26:29.757015', 'step': 8988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:29.812991', 'step': 8988, 'epoch': 1}
{'type': 'loss', 'content': 0.17310550808906555, 'timestamp': '2025-10-02 00:26:29.815375', 'step': 8989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:29.869867', 'step': 8989, 'epoch': 1}
{'type': 'loss', 'content': 0.050187159329652786, 'timestamp': '2025-10-02 00:26:29.877091', 'step': 8990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:26:29.930982', 'step': 8990, 'epoch': 1}
{'type': 'loss', 'content': 0.14026455581188202, 'timestamp': '2025-10-02 00:26:29.933359', 'step': 8991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:29.987663', 'step': 8991, 'epoch': 1}
{'type': 'loss', 'content': 0.06673598289489746, 'timestamp': '2025-10-02 00:26:29.995134', 'step': 8992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:30.061591', 'step': 8992, 'epoch': 1}
{'type': 'loss', 'content': 0.15726599097251892, 'timestamp': '2025-10-02 00:26:30.064990', 'step': 8993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:30.121286', 'step': 8993, 'epoch': 1}
{'type': 'loss', 'content': 0.13078509271144867, 'timestamp': '2025-10-02 00:26:30.123485', 'step': 8994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:30.178176', 'step': 8994, 'epoch': 1}
{'type': 'loss', 'content': 0.1103760376572609, 'timestamp': '2025-10-02 00:26:30.180400', 'step': 8995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:30.234521', 'step': 8995, 'epoch': 1}
{'type': 'loss', 'content': 0.13239091634750366, 'timestamp': '2025-10-02 00:26:30.241109', 'step': 8996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:30.295550', 'step': 8996, 'epoch': 1}
{'type': 'loss', 'content': 0.02775738760828972, 'timestamp': '2025-10-02 00:26:30.298234', 'step': 8997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:30.353018', 'step': 8997, 'epoch': 1}
{'type': 'loss', 'content': 0.0350719578564167, 'timestamp': '2025-10-02 00:26:30.355480', 'step': 8998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:30.409494', 'step': 8998, 'epoch': 1}
{'type': 'loss', 'content': 0.12405440956354141, 'timestamp': '2025-10-02 00:26:30.411870', 'step': 8999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:30.466706', 'step': 8999, 'epoch': 1}
{'type': 'loss', 'content': 0.06275472790002823, 'timestamp': '2025-10-02 00:26:30.472118', 'step': 9000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 9000', 'timestamp': '2025-10-02 00:26:30.893693', 'step': 9000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:30.955645', 'step': 9000, 'epoch': 1}
{'type': 'loss', 'content': 0.019768018275499344, 'timestamp': '2025-10-02 00:26:30.966896', 'step': 9001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:31.021387', 'step': 9001, 'epoch': 1}
{'type': 'loss', 'content': 0.06420101225376129, 'timestamp': '2025-10-02 00:26:31.028232', 'step': 9002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:31.082851', 'step': 9002, 'epoch': 1}
{'type': 'loss', 'content': 0.12444016337394714, 'timestamp': '2025-10-02 00:26:31.084674', 'step': 9003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:31.139426', 'step': 9003, 'epoch': 1}
{'type': 'loss', 'content': 0.10130735486745834, 'timestamp': '2025-10-02 00:26:31.144999', 'step': 9004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:31.199815', 'step': 9004, 'epoch': 1}
{'type': 'loss', 'content': 0.0663081482052803, 'timestamp': '2025-10-02 00:26:31.202097', 'step': 9005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:31.258129', 'step': 9005, 'epoch': 1}
{'type': 'loss', 'content': 0.10936684161424637, 'timestamp': '2025-10-02 00:26:31.260680', 'step': 9006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:31.315150', 'step': 9006, 'epoch': 1}
{'type': 'loss', 'content': 0.11898405849933624, 'timestamp': '2025-10-02 00:26:31.317390', 'step': 9007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:31.378431', 'step': 9007, 'epoch': 1}
{'type': 'loss', 'content': 0.06359249353408813, 'timestamp': '2025-10-02 00:26:31.389635', 'step': 9008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:31.443284', 'step': 9008, 'epoch': 1}
{'type': 'loss', 'content': 0.05594094842672348, 'timestamp': '2025-10-02 00:26:31.445649', 'step': 9009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:31.499781', 'step': 9009, 'epoch': 1}
{'type': 'loss', 'content': 0.05100145563483238, 'timestamp': '2025-10-02 00:26:31.509227', 'step': 9010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:31.564766', 'step': 9010, 'epoch': 1}
{'type': 'loss', 'content': 0.038239579647779465, 'timestamp': '2025-10-02 00:26:31.574026', 'step': 9011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:31.628481', 'step': 9011, 'epoch': 1}
{'type': 'loss', 'content': 0.05669397860765457, 'timestamp': '2025-10-02 00:26:31.634977', 'step': 9012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:26:31.688214', 'step': 9012, 'epoch': 1}
{'type': 'loss', 'content': 0.1049584373831749, 'timestamp': '2025-10-02 00:26:31.690917', 'step': 9013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:26:31.745272', 'step': 9013, 'epoch': 1}
{'type': 'loss', 'content': 0.15206186473369598, 'timestamp': '2025-10-02 00:26:31.747653', 'step': 9014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:31.802123', 'step': 9014, 'epoch': 1}
{'type': 'loss', 'content': 0.06943565607070923, 'timestamp': '2025-10-02 00:26:31.807749', 'step': 9015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:31.861964', 'step': 9015, 'epoch': 1}
{'type': 'loss', 'content': 0.13688495755195618, 'timestamp': '2025-10-02 00:26:31.867888', 'step': 9016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:26:31.927119', 'step': 9016, 'epoch': 1}
{'type': 'loss', 'content': 0.07649634033441544, 'timestamp': '2025-10-02 00:26:31.938412', 'step': 9017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:26:31.992593', 'step': 9017, 'epoch': 1}
{'type': 'loss', 'content': 0.08497578650712967, 'timestamp': '2025-10-02 00:26:31.994438', 'step': 9018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:32.048281', 'step': 9018, 'epoch': 1}
{'type': 'loss', 'content': 0.06683870404958725, 'timestamp': '2025-10-02 00:26:32.050875', 'step': 9019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:26:32.106810', 'step': 9019, 'epoch': 1}
{'type': 'loss', 'content': 0.04184141382575035, 'timestamp': '2025-10-02 00:26:32.117180', 'step': 9020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:32.173832', 'step': 9020, 'epoch': 1}
{'type': 'loss', 'content': 0.13100750744342804, 'timestamp': '2025-10-02 00:26:32.181223', 'step': 9021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:32.235488', 'step': 9021, 'epoch': 1}
{'type': 'loss', 'content': 0.13299132883548737, 'timestamp': '2025-10-02 00:26:32.237923', 'step': 9022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:32.292582', 'step': 9022, 'epoch': 1}
{'type': 'loss', 'content': 0.04355281963944435, 'timestamp': '2025-10-02 00:26:32.299644', 'step': 9023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:32.354292', 'step': 9023, 'epoch': 1}
{'type': 'loss', 'content': 0.042010869830846786, 'timestamp': '2025-10-02 00:26:32.360020', 'step': 9024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:32.413358', 'step': 9024, 'epoch': 1}
{'type': 'loss', 'content': 0.02981811948120594, 'timestamp': '2025-10-02 00:26:32.422966', 'step': 9025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:26:32.493511', 'step': 9025, 'epoch': 1}
{'type': 'loss', 'content': 0.03050030954182148, 'timestamp': '2025-10-02 00:26:32.505980', 'step': 9026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:26:32.561182', 'step': 9026, 'epoch': 1}
{'type': 'loss', 'content': 0.10196512937545776, 'timestamp': '2025-10-02 00:26:32.563406', 'step': 9027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:32.618743', 'step': 9027, 'epoch': 1}
{'type': 'loss', 'content': 0.07797356694936752, 'timestamp': '2025-10-02 00:26:32.625266', 'step': 9028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:32.681205', 'step': 9028, 'epoch': 1}
{'type': 'loss', 'content': 0.04790444299578667, 'timestamp': '2025-10-02 00:26:32.686789', 'step': 9029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:32.741311', 'step': 9029, 'epoch': 1}
{'type': 'loss', 'content': 0.054139379411935806, 'timestamp': '2025-10-02 00:26:32.743720', 'step': 9030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:26:32.798770', 'step': 9030, 'epoch': 1}
{'type': 'loss', 'content': 0.018035652115941048, 'timestamp': '2025-10-02 00:26:32.801221', 'step': 9031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:32.855735', 'step': 9031, 'epoch': 1}
{'type': 'loss', 'content': 0.03663528338074684, 'timestamp': '2025-10-02 00:26:32.865806', 'step': 9032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:26:32.922721', 'step': 9032, 'epoch': 1}
{'type': 'loss', 'content': 0.02664906531572342, 'timestamp': '2025-10-02 00:26:32.933695', 'step': 9033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:26:32.988763', 'step': 9033, 'epoch': 1}
{'type': 'loss', 'content': 0.12060927599668503, 'timestamp': '2025-10-02 00:26:32.990557', 'step': 9034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:33.044007', 'step': 9034, 'epoch': 1}
{'type': 'loss', 'content': 0.20811250805854797, 'timestamp': '2025-10-02 00:26:33.046887', 'step': 9035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:33.101143', 'step': 9035, 'epoch': 1}
{'type': 'loss', 'content': 0.13431885838508606, 'timestamp': '2025-10-02 00:26:33.106861', 'step': 9036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:26:33.160034', 'step': 9036, 'epoch': 1}
{'type': 'loss', 'content': 0.23810173571109772, 'timestamp': '2025-10-02 00:26:33.162315', 'step': 9037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:33.216647', 'step': 9037, 'epoch': 1}
{'type': 'loss', 'content': 0.1122315302491188, 'timestamp': '2025-10-02 00:26:33.225963', 'step': 9038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:26:33.280515', 'step': 9038, 'epoch': 1}
{'type': 'loss', 'content': 0.08787252753973007, 'timestamp': '2025-10-02 00:26:33.282709', 'step': 9039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:33.336513', 'step': 9039, 'epoch': 1}
{'type': 'loss', 'content': 0.04084361717104912, 'timestamp': '2025-10-02 00:26:33.346609', 'step': 9040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:26:33.400370', 'step': 9040, 'epoch': 1}
{'type': 'loss', 'content': 0.07642676681280136, 'timestamp': '2025-10-02 00:26:33.403143', 'step': 9041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:26:33.457178', 'step': 9041, 'epoch': 1}
{'type': 'loss', 'content': 0.08868885785341263, 'timestamp': '2025-10-02 00:26:33.462826', 'step': 9042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:33.517722', 'step': 9042, 'epoch': 1}
{'type': 'loss', 'content': 0.0545063316822052, 'timestamp': '2025-10-02 00:26:33.519537', 'step': 9043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:33.574045', 'step': 9043, 'epoch': 1}
{'type': 'loss', 'content': 0.02679733745753765, 'timestamp': '2025-10-02 00:26:33.582219', 'step': 9044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:26:33.635574', 'step': 9044, 'epoch': 1}
{'type': 'loss', 'content': 0.2355259656906128, 'timestamp': '2025-10-02 00:26:33.637650', 'step': 9045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:26:33.692205', 'step': 9045, 'epoch': 1}
{'type': 'loss', 'content': 0.1245761513710022, 'timestamp': '2025-10-02 00:26:33.694619', 'step': 9046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:26:33.749076', 'step': 9046, 'epoch': 1}
{'type': 'loss', 'content': 0.08794932812452316, 'timestamp': '2025-10-02 00:26:33.758293', 'step': 9047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:26:33.812060', 'step': 9047, 'epoch': 1}
{'type': 'loss', 'content': 0.07472502440214157, 'timestamp': '2025-10-02 00:26:33.820265', 'step': 9048, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:27:01.191069', 'step': 9048, 'epoch': 1}
{'type': 'pplx', 'content': 97.34901647308128, 'timestamp': '2025-10-02 00:27:01.194947', 'step': 9048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:01.251365', 'step': 9048, 'epoch': 1}
{'type': 'loss', 'content': 0.01472486648708582, 'timestamp': '2025-10-02 00:27:01.255476', 'step': 9049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:01.311366', 'step': 9049, 'epoch': 1}
{'type': 'loss', 'content': 0.060634974390268326, 'timestamp': '2025-10-02 00:27:01.316212', 'step': 9050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:01.370404', 'step': 9050, 'epoch': 1}
{'type': 'loss', 'content': 0.1536279022693634, 'timestamp': '2025-10-02 00:27:01.372761', 'step': 9051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:01.426864', 'step': 9051, 'epoch': 1}
{'type': 'loss', 'content': 0.1067100241780281, 'timestamp': '2025-10-02 00:27:01.433814', 'step': 9052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:01.489031', 'step': 9052, 'epoch': 1}
{'type': 'loss', 'content': 0.18338432908058167, 'timestamp': '2025-10-02 00:27:01.491730', 'step': 9053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:27:01.554790', 'step': 9053, 'epoch': 1}
{'type': 'loss', 'content': 0.031200198456645012, 'timestamp': '2025-10-02 00:27:01.565657', 'step': 9054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:01.621342', 'step': 9054, 'epoch': 1}
{'type': 'loss', 'content': 0.05719413608312607, 'timestamp': '2025-10-02 00:27:01.630882', 'step': 9055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:01.686174', 'step': 9055, 'epoch': 1}
{'type': 'loss', 'content': 0.05384562537074089, 'timestamp': '2025-10-02 00:27:01.696549', 'step': 9056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:01.751906', 'step': 9056, 'epoch': 1}
{'type': 'loss', 'content': 0.0337662547826767, 'timestamp': '2025-10-02 00:27:01.754226', 'step': 9057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:01.808581', 'step': 9057, 'epoch': 1}
{'type': 'loss', 'content': 0.04019922390580177, 'timestamp': '2025-10-02 00:27:01.816275', 'step': 9058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:01.871524', 'step': 9058, 'epoch': 1}
{'type': 'loss', 'content': 0.09874352067708969, 'timestamp': '2025-10-02 00:27:01.874183', 'step': 9059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:01.928167', 'step': 9059, 'epoch': 1}
{'type': 'loss', 'content': 0.02571081556379795, 'timestamp': '2025-10-02 00:27:01.938274', 'step': 9060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:27:01.999336', 'step': 9060, 'epoch': 1}
{'type': 'loss', 'content': 0.11633200943470001, 'timestamp': '2025-10-02 00:27:02.011142', 'step': 9061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:02.065064', 'step': 9061, 'epoch': 1}
{'type': 'loss', 'content': 0.07648460566997528, 'timestamp': '2025-10-02 00:27:02.067465', 'step': 9062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:27:02.130358', 'step': 9062, 'epoch': 1}
{'type': 'loss', 'content': 0.09923090785741806, 'timestamp': '2025-10-02 00:27:02.141211', 'step': 9063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:02.195487', 'step': 9063, 'epoch': 1}
{'type': 'loss', 'content': 0.2195838987827301, 'timestamp': '2025-10-02 00:27:02.201214', 'step': 9064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:02.254986', 'step': 9064, 'epoch': 1}
{'type': 'loss', 'content': 0.04764386638998985, 'timestamp': '2025-10-02 00:27:02.257251', 'step': 9065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:02.310989', 'step': 9065, 'epoch': 1}
{'type': 'loss', 'content': 0.148570716381073, 'timestamp': '2025-10-02 00:27:02.313443', 'step': 9066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:02.368756', 'step': 9066, 'epoch': 1}
{'type': 'loss', 'content': 0.07230961322784424, 'timestamp': '2025-10-02 00:27:02.371509', 'step': 9067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:02.426172', 'step': 9067, 'epoch': 1}
{'type': 'loss', 'content': 0.07171115279197693, 'timestamp': '2025-10-02 00:27:02.431840', 'step': 9068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:02.485703', 'step': 9068, 'epoch': 1}
{'type': 'loss', 'content': 0.023401737213134766, 'timestamp': '2025-10-02 00:27:02.488361', 'step': 9069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:02.542636', 'step': 9069, 'epoch': 1}
{'type': 'loss', 'content': 0.08360370248556137, 'timestamp': '2025-10-02 00:27:02.545247', 'step': 9070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:02.599604', 'step': 9070, 'epoch': 1}
{'type': 'loss', 'content': 0.18912677466869354, 'timestamp': '2025-10-02 00:27:02.602040', 'step': 9071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:27:02.655440', 'step': 9071, 'epoch': 1}
{'type': 'loss', 'content': 0.10698799788951874, 'timestamp': '2025-10-02 00:27:02.661456', 'step': 9072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:02.721050', 'step': 9072, 'epoch': 1}
{'type': 'loss', 'content': 0.06996210664510727, 'timestamp': '2025-10-02 00:27:02.723694', 'step': 9073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:02.777855', 'step': 9073, 'epoch': 1}
{'type': 'loss', 'content': 0.07518955320119858, 'timestamp': '2025-10-02 00:27:02.780326', 'step': 9074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:02.834717', 'step': 9074, 'epoch': 1}
{'type': 'loss', 'content': 0.06887614727020264, 'timestamp': '2025-10-02 00:27:02.841965', 'step': 9075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:02.900792', 'step': 9075, 'epoch': 1}
{'type': 'loss', 'content': 0.03812433406710625, 'timestamp': '2025-10-02 00:27:02.911776', 'step': 9076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:02.966702', 'step': 9076, 'epoch': 1}
{'type': 'loss', 'content': 0.028942670673131943, 'timestamp': '2025-10-02 00:27:02.976924', 'step': 9077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:03.031003', 'step': 9077, 'epoch': 1}
{'type': 'loss', 'content': 0.052607521414756775, 'timestamp': '2025-10-02 00:27:03.033499', 'step': 9078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:03.088415', 'step': 9078, 'epoch': 1}
{'type': 'loss', 'content': 0.18401272594928741, 'timestamp': '2025-10-02 00:27:03.090848', 'step': 9079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:03.146039', 'step': 9079, 'epoch': 1}
{'type': 'loss', 'content': 0.0727461576461792, 'timestamp': '2025-10-02 00:27:03.156192', 'step': 9080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:03.212399', 'step': 9080, 'epoch': 1}
{'type': 'loss', 'content': 0.12634922564029694, 'timestamp': '2025-10-02 00:27:03.220578', 'step': 9081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:03.277607', 'step': 9081, 'epoch': 1}
{'type': 'loss', 'content': 0.17880478501319885, 'timestamp': '2025-10-02 00:27:03.280714', 'step': 9082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:03.335309', 'step': 9082, 'epoch': 1}
{'type': 'loss', 'content': 0.01572314463555813, 'timestamp': '2025-10-02 00:27:03.340988', 'step': 9083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:03.395721', 'step': 9083, 'epoch': 1}
{'type': 'loss', 'content': 0.05933931842446327, 'timestamp': '2025-10-02 00:27:03.401845', 'step': 9084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:03.455575', 'step': 9084, 'epoch': 1}
{'type': 'loss', 'content': 0.2201230227947235, 'timestamp': '2025-10-02 00:27:03.457965', 'step': 9085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:03.512302', 'step': 9085, 'epoch': 1}
{'type': 'loss', 'content': 0.09262610226869583, 'timestamp': '2025-10-02 00:27:03.514657', 'step': 9086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:03.570401', 'step': 9086, 'epoch': 1}
{'type': 'loss', 'content': 0.0776737853884697, 'timestamp': '2025-10-02 00:27:03.574427', 'step': 9087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:03.630611', 'step': 9087, 'epoch': 1}
{'type': 'loss', 'content': 0.18406595289707184, 'timestamp': '2025-10-02 00:27:03.636173', 'step': 9088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:03.693568', 'step': 9088, 'epoch': 1}
{'type': 'loss', 'content': 0.08638505637645721, 'timestamp': '2025-10-02 00:27:03.704543', 'step': 9089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:03.759222', 'step': 9089, 'epoch': 1}
{'type': 'loss', 'content': 0.11145001649856567, 'timestamp': '2025-10-02 00:27:03.761820', 'step': 9090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:03.820012', 'step': 9090, 'epoch': 1}
{'type': 'loss', 'content': 0.06887472420930862, 'timestamp': '2025-10-02 00:27:03.827332', 'step': 9091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:03.881328', 'step': 9091, 'epoch': 1}
{'type': 'loss', 'content': 0.1264190673828125, 'timestamp': '2025-10-02 00:27:03.887423', 'step': 9092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:03.940942', 'step': 9092, 'epoch': 1}
{'type': 'loss', 'content': 0.08505789935588837, 'timestamp': '2025-10-02 00:27:03.943371', 'step': 9093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:03.997636', 'step': 9093, 'epoch': 1}
{'type': 'loss', 'content': 0.05562419444322586, 'timestamp': '2025-10-02 00:27:04.000305', 'step': 9094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:04.054758', 'step': 9094, 'epoch': 1}
{'type': 'loss', 'content': 0.03555932268500328, 'timestamp': '2025-10-02 00:27:04.058123', 'step': 9095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:04.112920', 'step': 9095, 'epoch': 1}
{'type': 'loss', 'content': 0.032382916659116745, 'timestamp': '2025-10-02 00:27:04.119842', 'step': 9096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:04.173707', 'step': 9096, 'epoch': 1}
{'type': 'loss', 'content': 0.0788937583565712, 'timestamp': '2025-10-02 00:27:04.176762', 'step': 9097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:04.232217', 'step': 9097, 'epoch': 1}
{'type': 'loss', 'content': 0.022367587313055992, 'timestamp': '2025-10-02 00:27:04.241790', 'step': 9098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:04.298388', 'step': 9098, 'epoch': 1}
{'type': 'loss', 'content': 0.08794727921485901, 'timestamp': '2025-10-02 00:27:04.302488', 'step': 9099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:04.359592', 'step': 9099, 'epoch': 1}
{'type': 'loss', 'content': 0.15590403974056244, 'timestamp': '2025-10-02 00:27:04.368154', 'step': 9100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:04.424101', 'step': 9100, 'epoch': 1}
{'type': 'loss', 'content': 0.06074085086584091, 'timestamp': '2025-10-02 00:27:04.427458', 'step': 9101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:04.484487', 'step': 9101, 'epoch': 1}
{'type': 'loss', 'content': 0.03560155630111694, 'timestamp': '2025-10-02 00:27:04.487234', 'step': 9102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:04.542847', 'step': 9102, 'epoch': 1}
{'type': 'loss', 'content': 0.047695767134428024, 'timestamp': '2025-10-02 00:27:04.547261', 'step': 9103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:04.607592', 'step': 9103, 'epoch': 1}
{'type': 'loss', 'content': 0.14722132682800293, 'timestamp': '2025-10-02 00:27:04.614197', 'step': 9104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:04.669593', 'step': 9104, 'epoch': 1}
{'type': 'loss', 'content': 0.06699666380882263, 'timestamp': '2025-10-02 00:27:04.672705', 'step': 9105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:04.728206', 'step': 9105, 'epoch': 1}
{'type': 'loss', 'content': 0.14726702868938446, 'timestamp': '2025-10-02 00:27:04.731284', 'step': 9106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:04.786667', 'step': 9106, 'epoch': 1}
{'type': 'loss', 'content': 0.18668010830879211, 'timestamp': '2025-10-02 00:27:04.789465', 'step': 9107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:04.844224', 'step': 9107, 'epoch': 1}
{'type': 'loss', 'content': 0.023190895095467567, 'timestamp': '2025-10-02 00:27:04.851029', 'step': 9108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:04.906448', 'step': 9108, 'epoch': 1}
{'type': 'loss', 'content': 0.05429087579250336, 'timestamp': '2025-10-02 00:27:04.909894', 'step': 9109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:04.973838', 'step': 9109, 'epoch': 1}
{'type': 'loss', 'content': 0.05194471403956413, 'timestamp': '2025-10-02 00:27:04.984512', 'step': 9110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:05.042997', 'step': 9110, 'epoch': 1}
{'type': 'loss', 'content': 0.022218897938728333, 'timestamp': '2025-10-02 00:27:05.052154', 'step': 9111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:05.113759', 'step': 9111, 'epoch': 1}
{'type': 'loss', 'content': 0.11841268837451935, 'timestamp': '2025-10-02 00:27:05.120859', 'step': 9112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:05.178353', 'step': 9112, 'epoch': 1}
{'type': 'loss', 'content': 0.009892266243696213, 'timestamp': '2025-10-02 00:27:05.187685', 'step': 9113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:05.244769', 'step': 9113, 'epoch': 1}
{'type': 'loss', 'content': 0.09506429731845856, 'timestamp': '2025-10-02 00:27:05.247195', 'step': 9114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:05.303298', 'step': 9114, 'epoch': 1}
{'type': 'loss', 'content': 0.1168026551604271, 'timestamp': '2025-10-02 00:27:05.312216', 'step': 9115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:05.368339', 'step': 9115, 'epoch': 1}
{'type': 'loss', 'content': 0.07937892526388168, 'timestamp': '2025-10-02 00:27:05.374991', 'step': 9116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:05.431268', 'step': 9116, 'epoch': 1}
{'type': 'loss', 'content': 0.01610778644680977, 'timestamp': '2025-10-02 00:27:05.434987', 'step': 9117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:05.491202', 'step': 9117, 'epoch': 1}
{'type': 'loss', 'content': 0.2404436618089676, 'timestamp': '2025-10-02 00:27:05.494000', 'step': 9118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:05.550138', 'step': 9118, 'epoch': 1}
{'type': 'loss', 'content': 0.13505592942237854, 'timestamp': '2025-10-02 00:27:05.552554', 'step': 9119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:05.607493', 'step': 9119, 'epoch': 1}
{'type': 'loss', 'content': 0.03636768460273743, 'timestamp': '2025-10-02 00:27:05.613455', 'step': 9120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:05.667168', 'step': 9120, 'epoch': 1}
{'type': 'loss', 'content': 0.01573985256254673, 'timestamp': '2025-10-02 00:27:05.674891', 'step': 9121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:05.729421', 'step': 9121, 'epoch': 1}
{'type': 'loss', 'content': 0.13164491951465607, 'timestamp': '2025-10-02 00:27:05.735176', 'step': 9122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:05.791141', 'step': 9122, 'epoch': 1}
{'type': 'loss', 'content': 0.043893203139305115, 'timestamp': '2025-10-02 00:27:05.797060', 'step': 9123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:05.852079', 'step': 9123, 'epoch': 1}
{'type': 'loss', 'content': 0.06392813473939896, 'timestamp': '2025-10-02 00:27:05.858023', 'step': 9124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:05.911573', 'step': 9124, 'epoch': 1}
{'type': 'loss', 'content': 0.04862815514206886, 'timestamp': '2025-10-02 00:27:05.918984', 'step': 9125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:05.973087', 'step': 9125, 'epoch': 1}
{'type': 'loss', 'content': 0.20153690874576569, 'timestamp': '2025-10-02 00:27:05.975565', 'step': 9126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:06.029888', 'step': 9126, 'epoch': 1}
{'type': 'loss', 'content': 0.06727290898561478, 'timestamp': '2025-10-02 00:27:06.032393', 'step': 9127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:06.086481', 'step': 9127, 'epoch': 1}
{'type': 'loss', 'content': 0.14132769405841827, 'timestamp': '2025-10-02 00:27:06.092567', 'step': 9128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:06.147075', 'step': 9128, 'epoch': 1}
{'type': 'loss', 'content': 0.09561390429735184, 'timestamp': '2025-10-02 00:27:06.150092', 'step': 9129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:06.212026', 'step': 9129, 'epoch': 1}
{'type': 'loss', 'content': 0.058687724173069, 'timestamp': '2025-10-02 00:27:06.222519', 'step': 9130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:06.276793', 'step': 9130, 'epoch': 1}
{'type': 'loss', 'content': 0.04199152812361717, 'timestamp': '2025-10-02 00:27:06.282574', 'step': 9131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:06.341684', 'step': 9131, 'epoch': 1}
{'type': 'loss', 'content': 0.017004527151584625, 'timestamp': '2025-10-02 00:27:06.352706', 'step': 9132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:27:06.419111', 'step': 9132, 'epoch': 1}
{'type': 'loss', 'content': 0.02043800801038742, 'timestamp': '2025-10-02 00:27:06.432123', 'step': 9133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:06.487632', 'step': 9133, 'epoch': 1}
{'type': 'loss', 'content': 0.07969573140144348, 'timestamp': '2025-10-02 00:27:06.490246', 'step': 9134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:06.544608', 'step': 9134, 'epoch': 1}
{'type': 'loss', 'content': 0.14683258533477783, 'timestamp': '2025-10-02 00:27:06.546960', 'step': 9135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:06.601353', 'step': 9135, 'epoch': 1}
{'type': 'loss', 'content': 0.06102535128593445, 'timestamp': '2025-10-02 00:27:06.608144', 'step': 9136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:06.662146', 'step': 9136, 'epoch': 1}
{'type': 'loss', 'content': 0.1828310191631317, 'timestamp': '2025-10-02 00:27:06.669701', 'step': 9137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:06.723373', 'step': 9137, 'epoch': 1}
{'type': 'loss', 'content': 0.18102975189685822, 'timestamp': '2025-10-02 00:27:06.725743', 'step': 9138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:06.779494', 'step': 9138, 'epoch': 1}
{'type': 'loss', 'content': 0.04863426834344864, 'timestamp': '2025-10-02 00:27:06.787078', 'step': 9139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:06.841097', 'step': 9139, 'epoch': 1}
{'type': 'loss', 'content': 0.139104425907135, 'timestamp': '2025-10-02 00:27:06.847395', 'step': 9140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:06.901672', 'step': 9140, 'epoch': 1}
{'type': 'loss', 'content': 0.03392486646771431, 'timestamp': '2025-10-02 00:27:06.904337', 'step': 9141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:06.959297', 'step': 9141, 'epoch': 1}
{'type': 'loss', 'content': 0.03590218350291252, 'timestamp': '2025-10-02 00:27:06.962322', 'step': 9142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:07.016339', 'step': 9142, 'epoch': 1}
{'type': 'loss', 'content': 0.27807196974754333, 'timestamp': '2025-10-02 00:27:07.018781', 'step': 9143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:07.073467', 'step': 9143, 'epoch': 1}
{'type': 'loss', 'content': 0.049454543739557266, 'timestamp': '2025-10-02 00:27:07.080300', 'step': 9144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:07.134622', 'step': 9144, 'epoch': 1}
{'type': 'loss', 'content': 0.1375526487827301, 'timestamp': '2025-10-02 00:27:07.137493', 'step': 9145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:07.192075', 'step': 9145, 'epoch': 1}
{'type': 'loss', 'content': 0.10069245100021362, 'timestamp': '2025-10-02 00:27:07.194232', 'step': 9146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:07.251280', 'step': 9146, 'epoch': 1}
{'type': 'loss', 'content': 0.03179130703210831, 'timestamp': '2025-10-02 00:27:07.253914', 'step': 9147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:07.307462', 'step': 9147, 'epoch': 1}
{'type': 'loss', 'content': 0.1887035071849823, 'timestamp': '2025-10-02 00:27:07.313356', 'step': 9148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:07.366711', 'step': 9148, 'epoch': 1}
{'type': 'loss', 'content': 0.049631133675575256, 'timestamp': '2025-10-02 00:27:07.369287', 'step': 9149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:07.425732', 'step': 9149, 'epoch': 1}
{'type': 'loss', 'content': 0.10790413618087769, 'timestamp': '2025-10-02 00:27:07.428121', 'step': 9150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:07.482269', 'step': 9150, 'epoch': 1}
{'type': 'loss', 'content': 0.07147126644849777, 'timestamp': '2025-10-02 00:27:07.484634', 'step': 9151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:07.539510', 'step': 9151, 'epoch': 1}
{'type': 'loss', 'content': 0.04858551174402237, 'timestamp': '2025-10-02 00:27:07.545394', 'step': 9152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:07.598999', 'step': 9152, 'epoch': 1}
{'type': 'loss', 'content': 0.053414393216371536, 'timestamp': '2025-10-02 00:27:07.606576', 'step': 9153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:07.660875', 'step': 9153, 'epoch': 1}
{'type': 'loss', 'content': 0.04864906892180443, 'timestamp': '2025-10-02 00:27:07.663232', 'step': 9154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:07.718395', 'step': 9154, 'epoch': 1}
{'type': 'loss', 'content': 0.06434911489486694, 'timestamp': '2025-10-02 00:27:07.720917', 'step': 9155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:07.783227', 'step': 9155, 'epoch': 1}
{'type': 'loss', 'content': 0.022065281867980957, 'timestamp': '2025-10-02 00:27:07.794456', 'step': 9156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:07.847989', 'step': 9156, 'epoch': 1}
{'type': 'loss', 'content': 0.05361991003155708, 'timestamp': '2025-10-02 00:27:07.850383', 'step': 9157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:07.904489', 'step': 9157, 'epoch': 1}
{'type': 'loss', 'content': 0.011303085833787918, 'timestamp': '2025-10-02 00:27:07.910403', 'step': 9158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:07.965531', 'step': 9158, 'epoch': 1}
{'type': 'loss', 'content': 0.04315103590488434, 'timestamp': '2025-10-02 00:27:07.971343', 'step': 9159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:27:08.034053', 'step': 9159, 'epoch': 1}
{'type': 'loss', 'content': 0.01782512478530407, 'timestamp': '2025-10-02 00:27:08.045691', 'step': 9160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:08.099041', 'step': 9160, 'epoch': 1}
{'type': 'loss', 'content': 0.10294878482818604, 'timestamp': '2025-10-02 00:27:08.104893', 'step': 9161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:08.159523', 'step': 9161, 'epoch': 1}
{'type': 'loss', 'content': 0.03351052105426788, 'timestamp': '2025-10-02 00:27:08.167170', 'step': 9162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:08.221757', 'step': 9162, 'epoch': 1}
{'type': 'loss', 'content': 0.1564207524061203, 'timestamp': '2025-10-02 00:27:08.227432', 'step': 9163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:08.284677', 'step': 9163, 'epoch': 1}
{'type': 'loss', 'content': 0.16421885788440704, 'timestamp': '2025-10-02 00:27:08.290686', 'step': 9164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:08.347406', 'step': 9164, 'epoch': 1}
{'type': 'loss', 'content': 0.057336822152137756, 'timestamp': '2025-10-02 00:27:08.358481', 'step': 9165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:08.417218', 'step': 9165, 'epoch': 1}
{'type': 'loss', 'content': 0.10717425495386124, 'timestamp': '2025-10-02 00:27:08.419371', 'step': 9166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:08.474667', 'step': 9166, 'epoch': 1}
{'type': 'loss', 'content': 0.072660431265831, 'timestamp': '2025-10-02 00:27:08.477212', 'step': 9167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:08.531054', 'step': 9167, 'epoch': 1}
{'type': 'loss', 'content': 0.13080503046512604, 'timestamp': '2025-10-02 00:27:08.537468', 'step': 9168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:08.591155', 'step': 9168, 'epoch': 1}
{'type': 'loss', 'content': 0.047975361347198486, 'timestamp': '2025-10-02 00:27:08.593991', 'step': 9169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:27:08.662977', 'step': 9169, 'epoch': 1}
{'type': 'loss', 'content': 0.05208415910601616, 'timestamp': '2025-10-02 00:27:08.693451', 'step': 9170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:08.783921', 'step': 9170, 'epoch': 1}
{'type': 'loss', 'content': 0.020973961800336838, 'timestamp': '2025-10-02 00:27:08.795475', 'step': 9171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:08.870338', 'step': 9171, 'epoch': 1}
{'type': 'loss', 'content': 0.02437998168170452, 'timestamp': '2025-10-02 00:27:08.885983', 'step': 9172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:27:08.975642', 'step': 9172, 'epoch': 1}
{'type': 'loss', 'content': 0.025138702243566513, 'timestamp': '2025-10-02 00:27:08.989242', 'step': 9173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:09.072304', 'step': 9173, 'epoch': 1}
{'type': 'loss', 'content': 0.14197568595409393, 'timestamp': '2025-10-02 00:27:09.082479', 'step': 9174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:09.158954', 'step': 9174, 'epoch': 1}
{'type': 'loss', 'content': 0.06320766359567642, 'timestamp': '2025-10-02 00:27:09.168844', 'step': 9175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:09.242719', 'step': 9175, 'epoch': 1}
{'type': 'loss', 'content': 0.05860108137130737, 'timestamp': '2025-10-02 00:27:09.259484', 'step': 9176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:09.338043', 'step': 9176, 'epoch': 1}
{'type': 'loss', 'content': 0.05118049681186676, 'timestamp': '2025-10-02 00:27:09.344341', 'step': 9177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:09.432128', 'step': 9177, 'epoch': 1}
{'type': 'loss', 'content': 0.020679181441664696, 'timestamp': '2025-10-02 00:27:09.444978', 'step': 9178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:09.517800', 'step': 9178, 'epoch': 1}
{'type': 'loss', 'content': 0.17926344275474548, 'timestamp': '2025-10-02 00:27:09.523387', 'step': 9179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:09.605277', 'step': 9179, 'epoch': 1}
{'type': 'loss', 'content': 0.030182024464011192, 'timestamp': '2025-10-02 00:27:09.617128', 'step': 9180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:09.696846', 'step': 9180, 'epoch': 1}
{'type': 'loss', 'content': 0.048164404928684235, 'timestamp': '2025-10-02 00:27:09.707130', 'step': 9181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:09.779106', 'step': 9181, 'epoch': 1}
{'type': 'loss', 'content': 0.09000556915998459, 'timestamp': '2025-10-02 00:27:09.790057', 'step': 9182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:09.858404', 'step': 9182, 'epoch': 1}
{'type': 'loss', 'content': 0.08336681872606277, 'timestamp': '2025-10-02 00:27:09.861041', 'step': 9183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:09.915241', 'step': 9183, 'epoch': 1}
{'type': 'loss', 'content': 0.09367778897285461, 'timestamp': '2025-10-02 00:27:09.921193', 'step': 9184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:09.975605', 'step': 9184, 'epoch': 1}
{'type': 'loss', 'content': 0.021038826555013657, 'timestamp': '2025-10-02 00:27:09.984754', 'step': 9185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:10.038806', 'step': 9185, 'epoch': 1}
{'type': 'loss', 'content': 0.13990117609500885, 'timestamp': '2025-10-02 00:27:10.040996', 'step': 9186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:10.096021', 'step': 9186, 'epoch': 1}
{'type': 'loss', 'content': 0.10699090361595154, 'timestamp': '2025-10-02 00:27:10.098594', 'step': 9187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:10.159838', 'step': 9187, 'epoch': 1}
{'type': 'loss', 'content': 0.04675440862774849, 'timestamp': '2025-10-02 00:27:10.171142', 'step': 9188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:10.226173', 'step': 9188, 'epoch': 1}
{'type': 'loss', 'content': 0.13269464671611786, 'timestamp': '2025-10-02 00:27:10.228375', 'step': 9189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:10.283621', 'step': 9189, 'epoch': 1}
{'type': 'loss', 'content': 0.08279475569725037, 'timestamp': '2025-10-02 00:27:10.289617', 'step': 9190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:10.347413', 'step': 9190, 'epoch': 1}
{'type': 'loss', 'content': 0.017723431810736656, 'timestamp': '2025-10-02 00:27:10.354812', 'step': 9191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:10.409345', 'step': 9191, 'epoch': 1}
{'type': 'loss', 'content': 0.025403978303074837, 'timestamp': '2025-10-02 00:27:10.415351', 'step': 9192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:10.469013', 'step': 9192, 'epoch': 1}
{'type': 'loss', 'content': 0.20948748290538788, 'timestamp': '2025-10-02 00:27:10.471303', 'step': 9193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:10.525642', 'step': 9193, 'epoch': 1}
{'type': 'loss', 'content': 0.07936982065439224, 'timestamp': '2025-10-02 00:27:10.528946', 'step': 9194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:10.585072', 'step': 9194, 'epoch': 1}
{'type': 'loss', 'content': 0.07343421876430511, 'timestamp': '2025-10-02 00:27:10.587574', 'step': 9195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:10.642055', 'step': 9195, 'epoch': 1}
{'type': 'loss', 'content': 0.1665288805961609, 'timestamp': '2025-10-02 00:27:10.650623', 'step': 9196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:10.712816', 'step': 9196, 'epoch': 1}
{'type': 'loss', 'content': 0.22042936086654663, 'timestamp': '2025-10-02 00:27:10.715085', 'step': 9197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:10.773935', 'step': 9197, 'epoch': 1}
{'type': 'loss', 'content': 0.058465637266635895, 'timestamp': '2025-10-02 00:27:10.784132', 'step': 9198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:10.841019', 'step': 9198, 'epoch': 1}
{'type': 'loss', 'content': 0.11778337508440018, 'timestamp': '2025-10-02 00:27:10.845683', 'step': 9199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:10.907352', 'step': 9199, 'epoch': 1}
{'type': 'loss', 'content': 0.2226054072380066, 'timestamp': '2025-10-02 00:27:10.915176', 'step': 9200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:10.970634', 'step': 9200, 'epoch': 1}
{'type': 'loss', 'content': 0.031164903193712234, 'timestamp': '2025-10-02 00:27:10.978084', 'step': 9201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:11.034481', 'step': 9201, 'epoch': 1}
{'type': 'loss', 'content': 0.05889346823096275, 'timestamp': '2025-10-02 00:27:11.036828', 'step': 9202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:11.092106', 'step': 9202, 'epoch': 1}
{'type': 'loss', 'content': 0.04471462219953537, 'timestamp': '2025-10-02 00:27:11.101682', 'step': 9203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:11.156721', 'step': 9203, 'epoch': 1}
{'type': 'loss', 'content': 0.11877483874559402, 'timestamp': '2025-10-02 00:27:11.162979', 'step': 9204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:11.216982', 'step': 9204, 'epoch': 1}
{'type': 'loss', 'content': 0.07519929856061935, 'timestamp': '2025-10-02 00:27:11.219607', 'step': 9205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:11.281752', 'step': 9205, 'epoch': 1}
{'type': 'loss', 'content': 0.03888691961765289, 'timestamp': '2025-10-02 00:27:11.292242', 'step': 9206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:11.347541', 'step': 9206, 'epoch': 1}
{'type': 'loss', 'content': 0.03826405853033066, 'timestamp': '2025-10-02 00:27:11.349911', 'step': 9207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:11.404319', 'step': 9207, 'epoch': 1}
{'type': 'loss', 'content': 0.07106233388185501, 'timestamp': '2025-10-02 00:27:11.410865', 'step': 9208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:11.473859', 'step': 9208, 'epoch': 1}
{'type': 'loss', 'content': 0.01365544181317091, 'timestamp': '2025-10-02 00:27:11.485194', 'step': 9209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:11.550019', 'step': 9209, 'epoch': 1}
{'type': 'loss', 'content': 0.051409415900707245, 'timestamp': '2025-10-02 00:27:11.553067', 'step': 9210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:11.607221', 'step': 9210, 'epoch': 1}
{'type': 'loss', 'content': 0.08036121726036072, 'timestamp': '2025-10-02 00:27:11.609717', 'step': 9211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:11.663697', 'step': 9211, 'epoch': 1}
{'type': 'loss', 'content': 0.2471400946378708, 'timestamp': '2025-10-02 00:27:11.669809', 'step': 9212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:11.723243', 'step': 9212, 'epoch': 1}
{'type': 'loss', 'content': 0.02962356247007847, 'timestamp': '2025-10-02 00:27:11.729182', 'step': 9213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:11.783808', 'step': 9213, 'epoch': 1}
{'type': 'loss', 'content': 0.060366544872522354, 'timestamp': '2025-10-02 00:27:11.789703', 'step': 9214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:11.844203', 'step': 9214, 'epoch': 1}
{'type': 'loss', 'content': 0.04660487920045853, 'timestamp': '2025-10-02 00:27:11.851658', 'step': 9215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:11.909209', 'step': 9215, 'epoch': 1}
{'type': 'loss', 'content': 0.04705562815070152, 'timestamp': '2025-10-02 00:27:11.917460', 'step': 9216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:11.974277', 'step': 9216, 'epoch': 1}
{'type': 'loss', 'content': 0.20802095532417297, 'timestamp': '2025-10-02 00:27:11.977589', 'step': 9217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:12.040934', 'step': 9217, 'epoch': 1}
{'type': 'loss', 'content': 0.0818805918097496, 'timestamp': '2025-10-02 00:27:12.044113', 'step': 9218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:12.101406', 'step': 9218, 'epoch': 1}
{'type': 'loss', 'content': 0.05906229838728905, 'timestamp': '2025-10-02 00:27:12.108856', 'step': 9219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:12.166853', 'step': 9219, 'epoch': 1}
{'type': 'loss', 'content': 0.13283197581768036, 'timestamp': '2025-10-02 00:27:12.173362', 'step': 9220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:12.231217', 'step': 9220, 'epoch': 1}
{'type': 'loss', 'content': 0.04831484332680702, 'timestamp': '2025-10-02 00:27:12.233926', 'step': 9221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:12.289279', 'step': 9221, 'epoch': 1}
{'type': 'loss', 'content': 0.07713869214057922, 'timestamp': '2025-10-02 00:27:12.292657', 'step': 9222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:12.352393', 'step': 9222, 'epoch': 1}
{'type': 'loss', 'content': 0.09529630094766617, 'timestamp': '2025-10-02 00:27:12.357473', 'step': 9223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:12.421664', 'step': 9223, 'epoch': 1}
{'type': 'loss', 'content': 0.1342555731534958, 'timestamp': '2025-10-02 00:27:12.431739', 'step': 9224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:12.487577', 'step': 9224, 'epoch': 1}
{'type': 'loss', 'content': 0.06724053621292114, 'timestamp': '2025-10-02 00:27:12.490953', 'step': 9225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:27:12.560951', 'step': 9225, 'epoch': 1}
{'type': 'loss', 'content': 0.06558770686388016, 'timestamp': '2025-10-02 00:27:12.573296', 'step': 9226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:12.634941', 'step': 9226, 'epoch': 1}
{'type': 'loss', 'content': 0.04835420101881027, 'timestamp': '2025-10-02 00:27:12.645157', 'step': 9227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:12.701855', 'step': 9227, 'epoch': 1}
{'type': 'loss', 'content': 0.20199401676654816, 'timestamp': '2025-10-02 00:27:12.708663', 'step': 9228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:12.764676', 'step': 9228, 'epoch': 1}
{'type': 'loss', 'content': 0.11615489423274994, 'timestamp': '2025-10-02 00:27:12.768421', 'step': 9229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:12.824850', 'step': 9229, 'epoch': 1}
{'type': 'loss', 'content': 0.11100265383720398, 'timestamp': '2025-10-02 00:27:12.828171', 'step': 9230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:12.884359', 'step': 9230, 'epoch': 1}
{'type': 'loss', 'content': 0.1121506467461586, 'timestamp': '2025-10-02 00:27:12.887237', 'step': 9231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:12.944184', 'step': 9231, 'epoch': 1}
{'type': 'loss', 'content': 0.032965436577796936, 'timestamp': '2025-10-02 00:27:12.951426', 'step': 9232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:13.008577', 'step': 9232, 'epoch': 1}
{'type': 'loss', 'content': 0.14545470476150513, 'timestamp': '2025-10-02 00:27:13.012036', 'step': 9233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:13.070993', 'step': 9233, 'epoch': 1}
{'type': 'loss', 'content': 0.02331124246120453, 'timestamp': '2025-10-02 00:27:13.080363', 'step': 9234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:13.137308', 'step': 9234, 'epoch': 1}
{'type': 'loss', 'content': 0.07356249541044235, 'timestamp': '2025-10-02 00:27:13.141833', 'step': 9235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:13.197351', 'step': 9235, 'epoch': 1}
{'type': 'loss', 'content': 0.1348854899406433, 'timestamp': '2025-10-02 00:27:13.204365', 'step': 9236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:13.260606', 'step': 9236, 'epoch': 1}
{'type': 'loss', 'content': 0.07364318519830704, 'timestamp': '2025-10-02 00:27:13.266770', 'step': 9237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:13.323574', 'step': 9237, 'epoch': 1}
{'type': 'loss', 'content': 0.1843041181564331, 'timestamp': '2025-10-02 00:27:13.326039', 'step': 9238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:13.382192', 'step': 9238, 'epoch': 1}
{'type': 'loss', 'content': 0.045920778065919876, 'timestamp': '2025-10-02 00:27:13.389660', 'step': 9239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:13.456930', 'step': 9239, 'epoch': 1}
{'type': 'loss', 'content': 0.010952597483992577, 'timestamp': '2025-10-02 00:27:13.468269', 'step': 9240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:13.532270', 'step': 9240, 'epoch': 1}
{'type': 'loss', 'content': 0.07997722178697586, 'timestamp': '2025-10-02 00:27:13.543271', 'step': 9241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:13.598735', 'step': 9241, 'epoch': 1}
{'type': 'loss', 'content': 0.05081209912896156, 'timestamp': '2025-10-02 00:27:13.602988', 'step': 9242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:13.660378', 'step': 9242, 'epoch': 1}
{'type': 'loss', 'content': 0.06677300482988358, 'timestamp': '2025-10-02 00:27:13.669963', 'step': 9243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:13.727810', 'step': 9243, 'epoch': 1}
{'type': 'loss', 'content': 0.10954979062080383, 'timestamp': '2025-10-02 00:27:13.734493', 'step': 9244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:13.790867', 'step': 9244, 'epoch': 1}
{'type': 'loss', 'content': 0.0885547623038292, 'timestamp': '2025-10-02 00:27:13.798391', 'step': 9245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:13.854352', 'step': 9245, 'epoch': 1}
{'type': 'loss', 'content': 0.09473514556884766, 'timestamp': '2025-10-02 00:27:13.856904', 'step': 9246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:13.911705', 'step': 9246, 'epoch': 1}
{'type': 'loss', 'content': 0.21498124301433563, 'timestamp': '2025-10-02 00:27:13.914994', 'step': 9247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:27:13.994392', 'step': 9247, 'epoch': 1}
{'type': 'loss', 'content': 0.00594206340610981, 'timestamp': '2025-10-02 00:27:14.007457', 'step': 9248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:14.063711', 'step': 9248, 'epoch': 1}
{'type': 'loss', 'content': 0.10676026344299316, 'timestamp': '2025-10-02 00:27:14.067056', 'step': 9249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:14.123642', 'step': 9249, 'epoch': 1}
{'type': 'loss', 'content': 0.013572298921644688, 'timestamp': '2025-10-02 00:27:14.126448', 'step': 9250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:14.193606', 'step': 9250, 'epoch': 1}
{'type': 'loss', 'content': 0.07217366248369217, 'timestamp': '2025-10-02 00:27:14.204216', 'step': 9251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:14.261186', 'step': 9251, 'epoch': 1}
{'type': 'loss', 'content': 0.06774402409791946, 'timestamp': '2025-10-02 00:27:14.267458', 'step': 9252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:14.325217', 'step': 9252, 'epoch': 1}
{'type': 'loss', 'content': 0.02769465744495392, 'timestamp': '2025-10-02 00:27:14.336219', 'step': 9253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:14.390382', 'step': 9253, 'epoch': 1}
{'type': 'loss', 'content': 0.09245546907186508, 'timestamp': '2025-10-02 00:27:14.393322', 'step': 9254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:14.448718', 'step': 9254, 'epoch': 1}
{'type': 'loss', 'content': 0.09783679991960526, 'timestamp': '2025-10-02 00:27:14.456298', 'step': 9255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:14.511026', 'step': 9255, 'epoch': 1}
{'type': 'loss', 'content': 0.09867800772190094, 'timestamp': '2025-10-02 00:27:14.519226', 'step': 9256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:14.580568', 'step': 9256, 'epoch': 1}
{'type': 'loss', 'content': 0.025991788133978844, 'timestamp': '2025-10-02 00:27:14.592083', 'step': 9257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:14.646912', 'step': 9257, 'epoch': 1}
{'type': 'loss', 'content': 0.07039853185415268, 'timestamp': '2025-10-02 00:27:14.649617', 'step': 9258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:14.705431', 'step': 9258, 'epoch': 1}
{'type': 'loss', 'content': 0.050503380596637726, 'timestamp': '2025-10-02 00:27:14.712894', 'step': 9259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:14.767641', 'step': 9259, 'epoch': 1}
{'type': 'loss', 'content': 0.06730329245328903, 'timestamp': '2025-10-02 00:27:14.773811', 'step': 9260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:14.828337', 'step': 9260, 'epoch': 1}
{'type': 'loss', 'content': 0.17325928807258606, 'timestamp': '2025-10-02 00:27:14.830697', 'step': 9261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:14.887048', 'step': 9261, 'epoch': 1}
{'type': 'loss', 'content': 0.12618780136108398, 'timestamp': '2025-10-02 00:27:14.889789', 'step': 9262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:14.945340', 'step': 9262, 'epoch': 1}
{'type': 'loss', 'content': 0.07882017642259598, 'timestamp': '2025-10-02 00:27:14.947720', 'step': 9263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:15.001977', 'step': 9263, 'epoch': 1}
{'type': 'loss', 'content': 0.10657845437526703, 'timestamp': '2025-10-02 00:27:15.008110', 'step': 9264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:15.061793', 'step': 9264, 'epoch': 1}
{'type': 'loss', 'content': 0.08306597173213959, 'timestamp': '2025-10-02 00:27:15.064240', 'step': 9265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:15.125119', 'step': 9265, 'epoch': 1}
{'type': 'loss', 'content': 0.1912086457014084, 'timestamp': '2025-10-02 00:27:15.128576', 'step': 9266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:15.182806', 'step': 9266, 'epoch': 1}
{'type': 'loss', 'content': 0.11974349617958069, 'timestamp': '2025-10-02 00:27:15.185069', 'step': 9267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:15.239308', 'step': 9267, 'epoch': 1}
{'type': 'loss', 'content': 0.10052856802940369, 'timestamp': '2025-10-02 00:27:15.245460', 'step': 9268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:15.299316', 'step': 9268, 'epoch': 1}
{'type': 'loss', 'content': 0.034523822367191315, 'timestamp': '2025-10-02 00:27:15.301667', 'step': 9269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:15.355496', 'step': 9269, 'epoch': 1}
{'type': 'loss', 'content': 0.07009895890951157, 'timestamp': '2025-10-02 00:27:15.363074', 'step': 9270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:15.417586', 'step': 9270, 'epoch': 1}
{'type': 'loss', 'content': 0.07642441987991333, 'timestamp': '2025-10-02 00:27:15.420072', 'step': 9271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:15.475124', 'step': 9271, 'epoch': 1}
{'type': 'loss', 'content': 0.10464118421077728, 'timestamp': '2025-10-02 00:27:15.481355', 'step': 9272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:15.535043', 'step': 9272, 'epoch': 1}
{'type': 'loss', 'content': 0.15396098792552948, 'timestamp': '2025-10-02 00:27:15.538323', 'step': 9273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:15.592367', 'step': 9273, 'epoch': 1}
{'type': 'loss', 'content': 0.08823372423648834, 'timestamp': '2025-10-02 00:27:15.599992', 'step': 9274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:15.653825', 'step': 9274, 'epoch': 1}
{'type': 'loss', 'content': 0.063712477684021, 'timestamp': '2025-10-02 00:27:15.656137', 'step': 9275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:15.718775', 'step': 9275, 'epoch': 1}
{'type': 'loss', 'content': 0.05224486440420151, 'timestamp': '2025-10-02 00:27:15.730234', 'step': 9276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:15.783245', 'step': 9276, 'epoch': 1}
{'type': 'loss', 'content': 0.11825988441705704, 'timestamp': '2025-10-02 00:27:15.785653', 'step': 9277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:15.840269', 'step': 9277, 'epoch': 1}
{'type': 'loss', 'content': 0.14263863861560822, 'timestamp': '2025-10-02 00:27:15.846186', 'step': 9278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:15.900565', 'step': 9278, 'epoch': 1}
{'type': 'loss', 'content': 0.17662924528121948, 'timestamp': '2025-10-02 00:27:15.903198', 'step': 9279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:15.957042', 'step': 9279, 'epoch': 1}
{'type': 'loss', 'content': 0.17371760308742523, 'timestamp': '2025-10-02 00:27:15.963043', 'step': 9280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:16.018736', 'step': 9280, 'epoch': 1}
{'type': 'loss', 'content': 0.09944096952676773, 'timestamp': '2025-10-02 00:27:16.024682', 'step': 9281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:16.079293', 'step': 9281, 'epoch': 1}
{'type': 'loss', 'content': 0.1579204499721527, 'timestamp': '2025-10-02 00:27:16.081490', 'step': 9282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:16.135278', 'step': 9282, 'epoch': 1}
{'type': 'loss', 'content': 0.1045060083270073, 'timestamp': '2025-10-02 00:27:16.137683', 'step': 9283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:16.192327', 'step': 9283, 'epoch': 1}
{'type': 'loss', 'content': 0.08849336206912994, 'timestamp': '2025-10-02 00:27:16.202507', 'step': 9284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:16.256668', 'step': 9284, 'epoch': 1}
{'type': 'loss', 'content': 0.1279013603925705, 'timestamp': '2025-10-02 00:27:16.259374', 'step': 9285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:16.314265', 'step': 9285, 'epoch': 1}
{'type': 'loss', 'content': 0.043500419706106186, 'timestamp': '2025-10-02 00:27:16.316693', 'step': 9286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:16.371007', 'step': 9286, 'epoch': 1}
{'type': 'loss', 'content': 0.029427405446767807, 'timestamp': '2025-10-02 00:27:16.373460', 'step': 9287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:16.426947', 'step': 9287, 'epoch': 1}
{'type': 'loss', 'content': 0.15770700573921204, 'timestamp': '2025-10-02 00:27:16.434799', 'step': 9288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:16.489683', 'step': 9288, 'epoch': 1}
{'type': 'loss', 'content': 0.223324716091156, 'timestamp': '2025-10-02 00:27:16.492051', 'step': 9289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:16.546505', 'step': 9289, 'epoch': 1}
{'type': 'loss', 'content': 0.07200274616479874, 'timestamp': '2025-10-02 00:27:16.554050', 'step': 9290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:16.609382', 'step': 9290, 'epoch': 1}
{'type': 'loss', 'content': 0.07830481976270676, 'timestamp': '2025-10-02 00:27:16.611975', 'step': 9291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:16.667478', 'step': 9291, 'epoch': 1}
{'type': 'loss', 'content': 0.06364285200834274, 'timestamp': '2025-10-02 00:27:16.673736', 'step': 9292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:16.729613', 'step': 9292, 'epoch': 1}
{'type': 'loss', 'content': 0.08224468678236008, 'timestamp': '2025-10-02 00:27:16.731826', 'step': 9293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:16.786178', 'step': 9293, 'epoch': 1}
{'type': 'loss', 'content': 0.10816110670566559, 'timestamp': '2025-10-02 00:27:16.788329', 'step': 9294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:16.842292', 'step': 9294, 'epoch': 1}
{'type': 'loss', 'content': 0.10240384936332703, 'timestamp': '2025-10-02 00:27:16.844393', 'step': 9295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:16.898417', 'step': 9295, 'epoch': 1}
{'type': 'loss', 'content': 0.08021024614572525, 'timestamp': '2025-10-02 00:27:16.908548', 'step': 9296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:16.961392', 'step': 9296, 'epoch': 1}
{'type': 'loss', 'content': 0.23093120753765106, 'timestamp': '2025-10-02 00:27:16.964783', 'step': 9297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:17.019388', 'step': 9297, 'epoch': 1}
{'type': 'loss', 'content': 0.01094426866620779, 'timestamp': '2025-10-02 00:27:17.026941', 'step': 9298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:17.081877', 'step': 9298, 'epoch': 1}
{'type': 'loss', 'content': 0.07516434788703918, 'timestamp': '2025-10-02 00:27:17.084466', 'step': 9299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:17.145713', 'step': 9299, 'epoch': 1}
{'type': 'loss', 'content': 0.1406235545873642, 'timestamp': '2025-10-02 00:27:17.156990', 'step': 9300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:17.212495', 'step': 9300, 'epoch': 1}
{'type': 'loss', 'content': 0.03656332939863205, 'timestamp': '2025-10-02 00:27:17.220059', 'step': 9301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:17.279214', 'step': 9301, 'epoch': 1}
{'type': 'loss', 'content': 0.01955108530819416, 'timestamp': '2025-10-02 00:27:17.289434', 'step': 9302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:17.344490', 'step': 9302, 'epoch': 1}
{'type': 'loss', 'content': 0.02434656396508217, 'timestamp': '2025-10-02 00:27:17.351533', 'step': 9303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:17.405359', 'step': 9303, 'epoch': 1}
{'type': 'loss', 'content': 0.15317796170711517, 'timestamp': '2025-10-02 00:27:17.411616', 'step': 9304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:17.466638', 'step': 9304, 'epoch': 1}
{'type': 'loss', 'content': 0.12977272272109985, 'timestamp': '2025-10-02 00:27:17.469143', 'step': 9305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:17.524678', 'step': 9305, 'epoch': 1}
{'type': 'loss', 'content': 0.06505409628152847, 'timestamp': '2025-10-02 00:27:17.526988', 'step': 9306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:17.581768', 'step': 9306, 'epoch': 1}
{'type': 'loss', 'content': 0.15694831311702728, 'timestamp': '2025-10-02 00:27:17.584508', 'step': 9307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:17.639977', 'step': 9307, 'epoch': 1}
{'type': 'loss', 'content': 0.07783643901348114, 'timestamp': '2025-10-02 00:27:17.646307', 'step': 9308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:17.701473', 'step': 9308, 'epoch': 1}
{'type': 'loss', 'content': 0.04848922789096832, 'timestamp': '2025-10-02 00:27:17.711596', 'step': 9309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:17.767201', 'step': 9309, 'epoch': 1}
{'type': 'loss', 'content': 0.14457601308822632, 'timestamp': '2025-10-02 00:27:17.769399', 'step': 9310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:17.826657', 'step': 9310, 'epoch': 1}
{'type': 'loss', 'content': 0.07372540980577469, 'timestamp': '2025-10-02 00:27:17.836135', 'step': 9311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:17.890344', 'step': 9311, 'epoch': 1}
{'type': 'loss', 'content': 0.1479855328798294, 'timestamp': '2025-10-02 00:27:17.896434', 'step': 9312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:17.950334', 'step': 9312, 'epoch': 1}
{'type': 'loss', 'content': 0.037397924810647964, 'timestamp': '2025-10-02 00:27:17.952756', 'step': 9313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:18.007179', 'step': 9313, 'epoch': 1}
{'type': 'loss', 'content': 0.06629044562578201, 'timestamp': '2025-10-02 00:27:18.009653', 'step': 9314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:18.064850', 'step': 9314, 'epoch': 1}
{'type': 'loss', 'content': 0.018668048083782196, 'timestamp': '2025-10-02 00:27:18.067568', 'step': 9315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:18.121502', 'step': 9315, 'epoch': 1}
{'type': 'loss', 'content': 0.07983514666557312, 'timestamp': '2025-10-02 00:27:18.128179', 'step': 9316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:27:18.194138', 'step': 9316, 'epoch': 1}
{'type': 'loss', 'content': 0.028941284865140915, 'timestamp': '2025-10-02 00:27:18.207100', 'step': 9317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:18.262048', 'step': 9317, 'epoch': 1}
{'type': 'loss', 'content': 0.05545877292752266, 'timestamp': '2025-10-02 00:27:18.269509', 'step': 9318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:18.324606', 'step': 9318, 'epoch': 1}
{'type': 'loss', 'content': 0.0452323779463768, 'timestamp': '2025-10-02 00:27:18.326934', 'step': 9319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:18.381599', 'step': 9319, 'epoch': 1}
{'type': 'loss', 'content': 0.1025492325425148, 'timestamp': '2025-10-02 00:27:18.387364', 'step': 9320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:18.440958', 'step': 9320, 'epoch': 1}
{'type': 'loss', 'content': 0.0855492427945137, 'timestamp': '2025-10-02 00:27:18.443600', 'step': 9321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:18.498911', 'step': 9321, 'epoch': 1}
{'type': 'loss', 'content': 0.0486832819879055, 'timestamp': '2025-10-02 00:27:18.506496', 'step': 9322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:18.561430', 'step': 9322, 'epoch': 1}
{'type': 'loss', 'content': 0.030886145308613777, 'timestamp': '2025-10-02 00:27:18.568895', 'step': 9323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:18.624507', 'step': 9323, 'epoch': 1}
{'type': 'loss', 'content': 0.1088038980960846, 'timestamp': '2025-10-02 00:27:18.634351', 'step': 9324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:18.688923', 'step': 9324, 'epoch': 1}
{'type': 'loss', 'content': 0.02806057780981064, 'timestamp': '2025-10-02 00:27:18.699201', 'step': 9325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:18.763946', 'step': 9325, 'epoch': 1}
{'type': 'loss', 'content': 0.010321851819753647, 'timestamp': '2025-10-02 00:27:18.774645', 'step': 9326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:18.832374', 'step': 9326, 'epoch': 1}
{'type': 'loss', 'content': 0.06733258068561554, 'timestamp': '2025-10-02 00:27:18.834822', 'step': 9327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:18.888892', 'step': 9327, 'epoch': 1}
{'type': 'loss', 'content': 0.09585071355104446, 'timestamp': '2025-10-02 00:27:18.895003', 'step': 9328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:18.949062', 'step': 9328, 'epoch': 1}
{'type': 'loss', 'content': 0.07110258936882019, 'timestamp': '2025-10-02 00:27:18.954975', 'step': 9329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:19.010923', 'step': 9329, 'epoch': 1}
{'type': 'loss', 'content': 0.098948173224926, 'timestamp': '2025-10-02 00:27:19.014331', 'step': 9330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:19.070033', 'step': 9330, 'epoch': 1}
{'type': 'loss', 'content': 0.07448817044496536, 'timestamp': '2025-10-02 00:27:19.079596', 'step': 9331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:19.133450', 'step': 9331, 'epoch': 1}
{'type': 'loss', 'content': 0.1209845021367073, 'timestamp': '2025-10-02 00:27:19.139351', 'step': 9332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:19.193465', 'step': 9332, 'epoch': 1}
{'type': 'loss', 'content': 0.14269602298736572, 'timestamp': '2025-10-02 00:27:19.196241', 'step': 9333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:19.251560', 'step': 9333, 'epoch': 1}
{'type': 'loss', 'content': 0.06187824532389641, 'timestamp': '2025-10-02 00:27:19.259176', 'step': 9334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:19.313607', 'step': 9334, 'epoch': 1}
{'type': 'loss', 'content': 0.13455747067928314, 'timestamp': '2025-10-02 00:27:19.316294', 'step': 9335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:19.370181', 'step': 9335, 'epoch': 1}
{'type': 'loss', 'content': 0.07404689490795135, 'timestamp': '2025-10-02 00:27:19.376173', 'step': 9336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:19.429699', 'step': 9336, 'epoch': 1}
{'type': 'loss', 'content': 0.11181695759296417, 'timestamp': '2025-10-02 00:27:19.432221', 'step': 9337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:19.486438', 'step': 9337, 'epoch': 1}
{'type': 'loss', 'content': 0.1974591314792633, 'timestamp': '2025-10-02 00:27:19.488833', 'step': 9338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:19.543500', 'step': 9338, 'epoch': 1}
{'type': 'loss', 'content': 0.06269223988056183, 'timestamp': '2025-10-02 00:27:19.545855', 'step': 9339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:19.599782', 'step': 9339, 'epoch': 1}
{'type': 'loss', 'content': 0.09119072556495667, 'timestamp': '2025-10-02 00:27:19.605683', 'step': 9340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:19.660332', 'step': 9340, 'epoch': 1}
{'type': 'loss', 'content': 0.13370908796787262, 'timestamp': '2025-10-02 00:27:19.662715', 'step': 9341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:19.716961', 'step': 9341, 'epoch': 1}
{'type': 'loss', 'content': 0.1996494084596634, 'timestamp': '2025-10-02 00:27:19.719480', 'step': 9342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:19.774597', 'step': 9342, 'epoch': 1}
{'type': 'loss', 'content': 0.024372193962335587, 'timestamp': '2025-10-02 00:27:19.782196', 'step': 9343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:19.838515', 'step': 9343, 'epoch': 1}
{'type': 'loss', 'content': 0.030702536925673485, 'timestamp': '2025-10-02 00:27:19.844586', 'step': 9344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:19.906946', 'step': 9344, 'epoch': 1}
{'type': 'loss', 'content': 0.03840414807200432, 'timestamp': '2025-10-02 00:27:19.918476', 'step': 9345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:19.972740', 'step': 9345, 'epoch': 1}
{'type': 'loss', 'content': 0.15515394508838654, 'timestamp': '2025-10-02 00:27:19.976340', 'step': 9346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:20.030502', 'step': 9346, 'epoch': 1}
{'type': 'loss', 'content': 0.042230039834976196, 'timestamp': '2025-10-02 00:27:20.033027', 'step': 9347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:20.087337', 'step': 9347, 'epoch': 1}
{'type': 'loss', 'content': 0.24482369422912598, 'timestamp': '2025-10-02 00:27:20.093382', 'step': 9348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:20.147191', 'step': 9348, 'epoch': 1}
{'type': 'loss', 'content': 0.021133607253432274, 'timestamp': '2025-10-02 00:27:20.156738', 'step': 9349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:20.211025', 'step': 9349, 'epoch': 1}
{'type': 'loss', 'content': 0.05818319693207741, 'timestamp': '2025-10-02 00:27:20.213276', 'step': 9350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:27:20.284411', 'step': 9350, 'epoch': 1}
{'type': 'loss', 'content': 0.019042624160647392, 'timestamp': '2025-10-02 00:27:20.297064', 'step': 9351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:20.351777', 'step': 9351, 'epoch': 1}
{'type': 'loss', 'content': 0.07110566645860672, 'timestamp': '2025-10-02 00:27:20.360149', 'step': 9352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:20.414193', 'step': 9352, 'epoch': 1}
{'type': 'loss', 'content': 0.12077262252569199, 'timestamp': '2025-10-02 00:27:20.423419', 'step': 9353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:20.477425', 'step': 9353, 'epoch': 1}
{'type': 'loss', 'content': 0.18423090875148773, 'timestamp': '2025-10-02 00:27:20.479829', 'step': 9354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:20.534705', 'step': 9354, 'epoch': 1}
{'type': 'loss', 'content': 0.15413716435432434, 'timestamp': '2025-10-02 00:27:20.536943', 'step': 9355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:20.590945', 'step': 9355, 'epoch': 1}
{'type': 'loss', 'content': 0.07471607625484467, 'timestamp': '2025-10-02 00:27:20.596919', 'step': 9356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:20.651446', 'step': 9356, 'epoch': 1}
{'type': 'loss', 'content': 0.13370972871780396, 'timestamp': '2025-10-02 00:27:20.653800', 'step': 9357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:20.707910', 'step': 9357, 'epoch': 1}
{'type': 'loss', 'content': 0.14132285118103027, 'timestamp': '2025-10-02 00:27:20.710240', 'step': 9358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:20.764639', 'step': 9358, 'epoch': 1}
{'type': 'loss', 'content': 0.14202044904232025, 'timestamp': '2025-10-02 00:27:20.766874', 'step': 9359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:20.820597', 'step': 9359, 'epoch': 1}
{'type': 'loss', 'content': 0.10752834379673004, 'timestamp': '2025-10-02 00:27:20.826589', 'step': 9360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:20.881706', 'step': 9360, 'epoch': 1}
{'type': 'loss', 'content': 0.03684520721435547, 'timestamp': '2025-10-02 00:27:20.887691', 'step': 9361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:20.943028', 'step': 9361, 'epoch': 1}
{'type': 'loss', 'content': 0.15632814168930054, 'timestamp': '2025-10-02 00:27:20.945253', 'step': 9362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:21.003220', 'step': 9362, 'epoch': 1}
{'type': 'loss', 'content': 0.025552894920110703, 'timestamp': '2025-10-02 00:27:21.005732', 'step': 9363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:21.060861', 'step': 9363, 'epoch': 1}
{'type': 'loss', 'content': 0.1047147735953331, 'timestamp': '2025-10-02 00:27:21.068072', 'step': 9364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:21.122892', 'step': 9364, 'epoch': 1}
{'type': 'loss', 'content': 0.11430931091308594, 'timestamp': '2025-10-02 00:27:21.140956', 'step': 9365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:21.244124', 'step': 9365, 'epoch': 1}
{'type': 'loss', 'content': 0.17793972790241241, 'timestamp': '2025-10-02 00:27:21.247906', 'step': 9366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:21.327460', 'step': 9366, 'epoch': 1}
{'type': 'loss', 'content': 0.03777328506112099, 'timestamp': '2025-10-02 00:27:21.337003', 'step': 9367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:21.413212', 'step': 9367, 'epoch': 1}
{'type': 'loss', 'content': 0.0907059907913208, 'timestamp': '2025-10-02 00:27:21.422551', 'step': 9368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:27:21.485773', 'step': 9368, 'epoch': 1}
{'type': 'loss', 'content': 0.06458604335784912, 'timestamp': '2025-10-02 00:27:21.497518', 'step': 9369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:21.562505', 'step': 9369, 'epoch': 1}
{'type': 'loss', 'content': 0.038663070648908615, 'timestamp': '2025-10-02 00:27:21.572069', 'step': 9370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:21.627944', 'step': 9370, 'epoch': 1}
{'type': 'loss', 'content': 0.09588268399238586, 'timestamp': '2025-10-02 00:27:21.630522', 'step': 9371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:21.690669', 'step': 9371, 'epoch': 1}
{'type': 'loss', 'content': 0.08790474385023117, 'timestamp': '2025-10-02 00:27:21.698138', 'step': 9372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:21.765582', 'step': 9372, 'epoch': 1}
{'type': 'loss', 'content': 0.07033215463161469, 'timestamp': '2025-10-02 00:27:21.778870', 'step': 9373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:21.837168', 'step': 9373, 'epoch': 1}
{'type': 'loss', 'content': 0.09725669771432877, 'timestamp': '2025-10-02 00:27:21.842976', 'step': 9374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:21.901789', 'step': 9374, 'epoch': 1}
{'type': 'loss', 'content': 0.04758019000291824, 'timestamp': '2025-10-02 00:27:21.907606', 'step': 9375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:21.965172', 'step': 9375, 'epoch': 1}
{'type': 'loss', 'content': 0.04463833197951317, 'timestamp': '2025-10-02 00:27:21.973469', 'step': 9376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:22.028793', 'step': 9376, 'epoch': 1}
{'type': 'loss', 'content': 0.17350390553474426, 'timestamp': '2025-10-02 00:27:22.032289', 'step': 9377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:22.089275', 'step': 9377, 'epoch': 1}
{'type': 'loss', 'content': 0.06725601851940155, 'timestamp': '2025-10-02 00:27:22.091925', 'step': 9378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:22.148427', 'step': 9378, 'epoch': 1}
{'type': 'loss', 'content': 0.1807362586259842, 'timestamp': '2025-10-02 00:27:22.152542', 'step': 9379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:22.218304', 'step': 9379, 'epoch': 1}
{'type': 'loss', 'content': 0.057647187262773514, 'timestamp': '2025-10-02 00:27:22.228630', 'step': 9380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:22.284117', 'step': 9380, 'epoch': 1}
{'type': 'loss', 'content': 0.10074888914823532, 'timestamp': '2025-10-02 00:27:22.286811', 'step': 9381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:22.342344', 'step': 9381, 'epoch': 1}
{'type': 'loss', 'content': 0.08179829269647598, 'timestamp': '2025-10-02 00:27:22.349001', 'step': 9382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:22.415041', 'step': 9382, 'epoch': 1}
{'type': 'loss', 'content': 0.04568438231945038, 'timestamp': '2025-10-02 00:27:22.420847', 'step': 9383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:27:22.512760', 'step': 9383, 'epoch': 1}
{'type': 'loss', 'content': 0.04007478430867195, 'timestamp': '2025-10-02 00:27:22.526230', 'step': 9384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:22.581272', 'step': 9384, 'epoch': 1}
{'type': 'loss', 'content': 0.12879766523838043, 'timestamp': '2025-10-02 00:27:22.584936', 'step': 9385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:22.640037', 'step': 9385, 'epoch': 1}
{'type': 'loss', 'content': 0.18530656397342682, 'timestamp': '2025-10-02 00:27:22.643681', 'step': 9386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:22.703193', 'step': 9386, 'epoch': 1}
{'type': 'loss', 'content': 0.06349321454763412, 'timestamp': '2025-10-02 00:27:22.708444', 'step': 9387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:22.783346', 'step': 9387, 'epoch': 1}
{'type': 'loss', 'content': 0.11771406978368759, 'timestamp': '2025-10-02 00:27:22.794223', 'step': 9388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:22.854243', 'step': 9388, 'epoch': 1}
{'type': 'loss', 'content': 0.1019924208521843, 'timestamp': '2025-10-02 00:27:22.856756', 'step': 9389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:22.913174', 'step': 9389, 'epoch': 1}
{'type': 'loss', 'content': 0.06859616935253143, 'timestamp': '2025-10-02 00:27:22.919142', 'step': 9390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:22.980591', 'step': 9390, 'epoch': 1}
{'type': 'loss', 'content': 0.06520615518093109, 'timestamp': '2025-10-02 00:27:22.983750', 'step': 9391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:23.040079', 'step': 9391, 'epoch': 1}
{'type': 'loss', 'content': 0.1333555430173874, 'timestamp': '2025-10-02 00:27:23.046901', 'step': 9392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:23.116854', 'step': 9392, 'epoch': 1}
{'type': 'loss', 'content': 0.1378348469734192, 'timestamp': '2025-10-02 00:27:23.125398', 'step': 9393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:23.200112', 'step': 9393, 'epoch': 1}
{'type': 'loss', 'content': 0.07985422760248184, 'timestamp': '2025-10-02 00:27:23.203115', 'step': 9394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:23.279121', 'step': 9394, 'epoch': 1}
{'type': 'loss', 'content': 0.10180164128541946, 'timestamp': '2025-10-02 00:27:23.282782', 'step': 9395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:23.348541', 'step': 9395, 'epoch': 1}
{'type': 'loss', 'content': 0.1046183705329895, 'timestamp': '2025-10-02 00:27:23.356833', 'step': 9396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:23.415081', 'step': 9396, 'epoch': 1}
{'type': 'loss', 'content': 0.1252664178609848, 'timestamp': '2025-10-02 00:27:23.418414', 'step': 9397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:23.479766', 'step': 9397, 'epoch': 1}
{'type': 'loss', 'content': 0.1723446547985077, 'timestamp': '2025-10-02 00:27:23.482835', 'step': 9398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:23.544330', 'step': 9398, 'epoch': 1}
{'type': 'loss', 'content': 0.10961051285266876, 'timestamp': '2025-10-02 00:27:23.547745', 'step': 9399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:23.620499', 'step': 9399, 'epoch': 1}
{'type': 'loss', 'content': 0.056412648409605026, 'timestamp': '2025-10-02 00:27:23.630675', 'step': 9400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:23.687861', 'step': 9400, 'epoch': 1}
{'type': 'loss', 'content': 0.1345164030790329, 'timestamp': '2025-10-02 00:27:23.690949', 'step': 9401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:23.754214', 'step': 9401, 'epoch': 1}
{'type': 'loss', 'content': 0.08146154880523682, 'timestamp': '2025-10-02 00:27:23.763590', 'step': 9402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:23.832398', 'step': 9402, 'epoch': 1}
{'type': 'loss', 'content': 0.07606814801692963, 'timestamp': '2025-10-02 00:27:23.841229', 'step': 9403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:23.920897', 'step': 9403, 'epoch': 1}
{'type': 'loss', 'content': 0.053074032068252563, 'timestamp': '2025-10-02 00:27:23.934794', 'step': 9404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:23.997423', 'step': 9404, 'epoch': 1}
{'type': 'loss', 'content': 0.20826278626918793, 'timestamp': '2025-10-02 00:27:24.004250', 'step': 9405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:24.079614', 'step': 9405, 'epoch': 1}
{'type': 'loss', 'content': 0.04514347016811371, 'timestamp': '2025-10-02 00:27:24.089142', 'step': 9406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:24.145503', 'step': 9406, 'epoch': 1}
{'type': 'loss', 'content': 0.09094366431236267, 'timestamp': '2025-10-02 00:27:24.154563', 'step': 9407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:27:24.228038', 'step': 9407, 'epoch': 1}
{'type': 'loss', 'content': 0.30664846301078796, 'timestamp': '2025-10-02 00:27:24.238855', 'step': 9408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:24.302480', 'step': 9408, 'epoch': 1}
{'type': 'loss', 'content': 0.09147230535745621, 'timestamp': '2025-10-02 00:27:24.308265', 'step': 9409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:24.364939', 'step': 9409, 'epoch': 1}
{'type': 'loss', 'content': 0.19821959733963013, 'timestamp': '2025-10-02 00:27:24.367836', 'step': 9410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:24.435385', 'step': 9410, 'epoch': 1}
{'type': 'loss', 'content': 0.10275501012802124, 'timestamp': '2025-10-02 00:27:24.442753', 'step': 9411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:24.505851', 'step': 9411, 'epoch': 1}
{'type': 'loss', 'content': 0.08717519789934158, 'timestamp': '2025-10-02 00:27:24.516822', 'step': 9412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:24.572282', 'step': 9412, 'epoch': 1}
{'type': 'loss', 'content': 0.0341932475566864, 'timestamp': '2025-10-02 00:27:24.578114', 'step': 9413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:24.634682', 'step': 9413, 'epoch': 1}
{'type': 'loss', 'content': 0.04034465178847313, 'timestamp': '2025-10-02 00:27:24.637539', 'step': 9414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:24.694211', 'step': 9414, 'epoch': 1}
{'type': 'loss', 'content': 0.0471661314368248, 'timestamp': '2025-10-02 00:27:24.697221', 'step': 9415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:24.755910', 'step': 9415, 'epoch': 1}
{'type': 'loss', 'content': 0.0285677257925272, 'timestamp': '2025-10-02 00:27:24.766068', 'step': 9416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:24.825502', 'step': 9416, 'epoch': 1}
{'type': 'loss', 'content': 0.09452985227108002, 'timestamp': '2025-10-02 00:27:24.834466', 'step': 9417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:24.895156', 'step': 9417, 'epoch': 1}
{'type': 'loss', 'content': 0.1657850593328476, 'timestamp': '2025-10-02 00:27:24.902669', 'step': 9418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:24.970181', 'step': 9418, 'epoch': 1}
{'type': 'loss', 'content': 0.044629476964473724, 'timestamp': '2025-10-02 00:27:24.976108', 'step': 9419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:25.033377', 'step': 9419, 'epoch': 1}
{'type': 'loss', 'content': 0.04734138771891594, 'timestamp': '2025-10-02 00:27:25.043735', 'step': 9420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:25.105615', 'step': 9420, 'epoch': 1}
{'type': 'loss', 'content': 0.03533690795302391, 'timestamp': '2025-10-02 00:27:25.113039', 'step': 9421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:25.176279', 'step': 9421, 'epoch': 1}
{'type': 'loss', 'content': 0.03804831951856613, 'timestamp': '2025-10-02 00:27:25.185810', 'step': 9422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:27:25.257306', 'step': 9422, 'epoch': 1}
{'type': 'loss', 'content': 0.05559391528367996, 'timestamp': '2025-10-02 00:27:25.268097', 'step': 9423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:25.323793', 'step': 9423, 'epoch': 1}
{'type': 'loss', 'content': 0.06891121715307236, 'timestamp': '2025-10-02 00:27:25.329977', 'step': 9424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:25.395784', 'step': 9424, 'epoch': 1}
{'type': 'loss', 'content': 0.0205707848072052, 'timestamp': '2025-10-02 00:27:25.407352', 'step': 9425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:25.464010', 'step': 9425, 'epoch': 1}
{'type': 'loss', 'content': 0.07658515125513077, 'timestamp': '2025-10-02 00:27:25.467841', 'step': 9426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:25.522856', 'step': 9426, 'epoch': 1}
{'type': 'loss', 'content': 0.14066264033317566, 'timestamp': '2025-10-02 00:27:25.526427', 'step': 9427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:25.582915', 'step': 9427, 'epoch': 1}
{'type': 'loss', 'content': 0.06221015378832817, 'timestamp': '2025-10-02 00:27:25.589611', 'step': 9428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:25.645839', 'step': 9428, 'epoch': 1}
{'type': 'loss', 'content': 0.1063062995672226, 'timestamp': '2025-10-02 00:27:25.649403', 'step': 9429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:25.707495', 'step': 9429, 'epoch': 1}
{'type': 'loss', 'content': 0.12944048643112183, 'timestamp': '2025-10-02 00:27:25.711091', 'step': 9430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:25.767662', 'step': 9430, 'epoch': 1}
{'type': 'loss', 'content': 0.1418853998184204, 'timestamp': '2025-10-02 00:27:25.775464', 'step': 9431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:25.838874', 'step': 9431, 'epoch': 1}
{'type': 'loss', 'content': 0.10317765176296234, 'timestamp': '2025-10-02 00:27:25.848492', 'step': 9432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:25.912581', 'step': 9432, 'epoch': 1}
{'type': 'loss', 'content': 0.06389366090297699, 'timestamp': '2025-10-02 00:27:25.918514', 'step': 9433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:25.974893', 'step': 9433, 'epoch': 1}
{'type': 'loss', 'content': 0.1899404376745224, 'timestamp': '2025-10-02 00:27:25.978009', 'step': 9434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:26.034303', 'step': 9434, 'epoch': 1}
{'type': 'loss', 'content': 0.1203407272696495, 'timestamp': '2025-10-02 00:27:26.040074', 'step': 9435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:26.105549', 'step': 9435, 'epoch': 1}
{'type': 'loss', 'content': 0.016867032274603844, 'timestamp': '2025-10-02 00:27:26.116518', 'step': 9436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:27:26.186039', 'step': 9436, 'epoch': 1}
{'type': 'loss', 'content': 0.010942903347313404, 'timestamp': '2025-10-02 00:27:26.199436', 'step': 9437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:26.268336', 'step': 9437, 'epoch': 1}
{'type': 'loss', 'content': 0.0782518982887268, 'timestamp': '2025-10-02 00:27:26.271025', 'step': 9438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:26.328334', 'step': 9438, 'epoch': 1}
{'type': 'loss', 'content': 0.015548277646303177, 'timestamp': '2025-10-02 00:27:26.331022', 'step': 9439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:26.412600', 'step': 9439, 'epoch': 1}
{'type': 'loss', 'content': 0.06019933149218559, 'timestamp': '2025-10-02 00:27:26.423875', 'step': 9440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:26.479139', 'step': 9440, 'epoch': 1}
{'type': 'loss', 'content': 0.09651284664869308, 'timestamp': '2025-10-02 00:27:26.482518', 'step': 9441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:26.538978', 'step': 9441, 'epoch': 1}
{'type': 'loss', 'content': 0.17783817648887634, 'timestamp': '2025-10-02 00:27:26.542653', 'step': 9442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:26.602179', 'step': 9442, 'epoch': 1}
{'type': 'loss', 'content': 0.10026570409536362, 'timestamp': '2025-10-02 00:27:26.605163', 'step': 9443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:26.668296', 'step': 9443, 'epoch': 1}
{'type': 'loss', 'content': 0.053076550364494324, 'timestamp': '2025-10-02 00:27:26.676076', 'step': 9444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:26.733275', 'step': 9444, 'epoch': 1}
{'type': 'loss', 'content': 0.17135055363178253, 'timestamp': '2025-10-02 00:27:26.737274', 'step': 9445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:26.792845', 'step': 9445, 'epoch': 1}
{'type': 'loss', 'content': 0.166863813996315, 'timestamp': '2025-10-02 00:27:26.795846', 'step': 9446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:26.854695', 'step': 9446, 'epoch': 1}
{'type': 'loss', 'content': 0.025875085964798927, 'timestamp': '2025-10-02 00:27:26.862097', 'step': 9447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:26.928645', 'step': 9447, 'epoch': 1}
{'type': 'loss', 'content': 0.025799447670578957, 'timestamp': '2025-10-02 00:27:26.946224', 'step': 9448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:27.005683', 'step': 9448, 'epoch': 1}
{'type': 'loss', 'content': 0.034023456275463104, 'timestamp': '2025-10-02 00:27:27.015152', 'step': 9449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:27.083001', 'step': 9449, 'epoch': 1}
{'type': 'loss', 'content': 0.019718388095498085, 'timestamp': '2025-10-02 00:27:27.088945', 'step': 9450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:27.149763', 'step': 9450, 'epoch': 1}
{'type': 'loss', 'content': 0.04217281565070152, 'timestamp': '2025-10-02 00:27:27.153479', 'step': 9451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:27.224465', 'step': 9451, 'epoch': 1}
{'type': 'loss', 'content': 0.13412915170192719, 'timestamp': '2025-10-02 00:27:27.231123', 'step': 9452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:27.297769', 'step': 9452, 'epoch': 1}
{'type': 'loss', 'content': 0.051545850932598114, 'timestamp': '2025-10-02 00:27:27.301210', 'step': 9453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:27.357351', 'step': 9453, 'epoch': 1}
{'type': 'loss', 'content': 0.2634071111679077, 'timestamp': '2025-10-02 00:27:27.361080', 'step': 9454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:27.427008', 'step': 9454, 'epoch': 1}
{'type': 'loss', 'content': 0.06218449026346207, 'timestamp': '2025-10-02 00:27:27.437667', 'step': 9455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:27.493467', 'step': 9455, 'epoch': 1}
{'type': 'loss', 'content': 0.08849333971738815, 'timestamp': '2025-10-02 00:27:27.500446', 'step': 9456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:27.555888', 'step': 9456, 'epoch': 1}
{'type': 'loss', 'content': 0.033439915627241135, 'timestamp': '2025-10-02 00:27:27.558857', 'step': 9457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:27:27.615363', 'step': 9457, 'epoch': 1}
{'type': 'loss', 'content': 0.3642009496688843, 'timestamp': '2025-10-02 00:27:27.620435', 'step': 9458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:27.685276', 'step': 9458, 'epoch': 1}
{'type': 'loss', 'content': 0.1653248816728592, 'timestamp': '2025-10-02 00:27:27.691127', 'step': 9459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:27.766652', 'step': 9459, 'epoch': 1}
{'type': 'loss', 'content': 0.19930602610111237, 'timestamp': '2025-10-02 00:27:27.773290', 'step': 9460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:27:27.860355', 'step': 9460, 'epoch': 1}
{'type': 'loss', 'content': 0.129564106464386, 'timestamp': '2025-10-02 00:27:27.863852', 'step': 9461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:27.920401', 'step': 9461, 'epoch': 1}
{'type': 'loss', 'content': 0.02021373249590397, 'timestamp': '2025-10-02 00:27:27.929909', 'step': 9462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:27.992109', 'step': 9462, 'epoch': 1}
{'type': 'loss', 'content': 0.04992758110165596, 'timestamp': '2025-10-02 00:27:28.002618', 'step': 9463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:28.064611', 'step': 9463, 'epoch': 1}
{'type': 'loss', 'content': 0.025127237662672997, 'timestamp': '2025-10-02 00:27:28.075912', 'step': 9464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:28.137889', 'step': 9464, 'epoch': 1}
{'type': 'loss', 'content': 0.022732721641659737, 'timestamp': '2025-10-02 00:27:28.149256', 'step': 9465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:28.204360', 'step': 9465, 'epoch': 1}
{'type': 'loss', 'content': 0.027873193845152855, 'timestamp': '2025-10-02 00:27:28.207020', 'step': 9466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:28.262239', 'step': 9466, 'epoch': 1}
{'type': 'loss', 'content': 0.15204383432865143, 'timestamp': '2025-10-02 00:27:28.265060', 'step': 9467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:27:28.319458', 'step': 9467, 'epoch': 1}
{'type': 'loss', 'content': 0.0993943065404892, 'timestamp': '2025-10-02 00:27:28.325821', 'step': 9468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:28.387912', 'step': 9468, 'epoch': 1}
{'type': 'loss', 'content': 0.08768776059150696, 'timestamp': '2025-10-02 00:27:28.394367', 'step': 9469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:28.458825', 'step': 9469, 'epoch': 1}
{'type': 'loss', 'content': 0.13624510169029236, 'timestamp': '2025-10-02 00:27:28.463134', 'step': 9470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:28.527676', 'step': 9470, 'epoch': 1}
{'type': 'loss', 'content': 0.08010410517454147, 'timestamp': '2025-10-02 00:27:28.535135', 'step': 9471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:28.603867', 'step': 9471, 'epoch': 1}
{'type': 'loss', 'content': 0.07449357956647873, 'timestamp': '2025-10-02 00:27:28.618510', 'step': 9472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:28.684514', 'step': 9472, 'epoch': 1}
{'type': 'loss', 'content': 0.1290651112794876, 'timestamp': '2025-10-02 00:27:28.688438', 'step': 9473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:28.758942', 'step': 9473, 'epoch': 1}
{'type': 'loss', 'content': 0.17943665385246277, 'timestamp': '2025-10-02 00:27:28.763868', 'step': 9474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:28.845118', 'step': 9474, 'epoch': 1}
{'type': 'loss', 'content': 0.04226934164762497, 'timestamp': '2025-10-02 00:27:28.863802', 'step': 9475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:28.931147', 'step': 9475, 'epoch': 1}
{'type': 'loss', 'content': 0.05213165283203125, 'timestamp': '2025-10-02 00:27:28.946797', 'step': 9476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:29.021813', 'step': 9476, 'epoch': 1}
{'type': 'loss', 'content': 0.2069113850593567, 'timestamp': '2025-10-02 00:27:29.025150', 'step': 9477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:27:29.106665', 'step': 9477, 'epoch': 1}
{'type': 'loss', 'content': 0.08475980162620544, 'timestamp': '2025-10-02 00:27:29.117520', 'step': 9478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:29.188926', 'step': 9478, 'epoch': 1}
{'type': 'loss', 'content': 0.025162477046251297, 'timestamp': '2025-10-02 00:27:29.198513', 'step': 9479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:29.267669', 'step': 9479, 'epoch': 1}
{'type': 'loss', 'content': 0.05224696546792984, 'timestamp': '2025-10-02 00:27:29.279521', 'step': 9480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:29.343042', 'step': 9480, 'epoch': 1}
{'type': 'loss', 'content': 0.06851664930582047, 'timestamp': '2025-10-02 00:27:29.350262', 'step': 9481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:29.429873', 'step': 9481, 'epoch': 1}
{'type': 'loss', 'content': 0.02731155790388584, 'timestamp': '2025-10-02 00:27:29.440408', 'step': 9482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:29.499056', 'step': 9482, 'epoch': 1}
{'type': 'loss', 'content': 0.0786445215344429, 'timestamp': '2025-10-02 00:27:29.502384', 'step': 9483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:29.562194', 'step': 9483, 'epoch': 1}
{'type': 'loss', 'content': 0.09851469844579697, 'timestamp': '2025-10-02 00:27:29.569047', 'step': 9484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:29.626262', 'step': 9484, 'epoch': 1}
{'type': 'loss', 'content': 0.049757808446884155, 'timestamp': '2025-10-02 00:27:29.629975', 'step': 9485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:29.687639', 'step': 9485, 'epoch': 1}
{'type': 'loss', 'content': 0.06459793448448181, 'timestamp': '2025-10-02 00:27:29.691219', 'step': 9486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:29.747805', 'step': 9486, 'epoch': 1}
{'type': 'loss', 'content': 0.09474819153547287, 'timestamp': '2025-10-02 00:27:29.750887', 'step': 9487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:29.808666', 'step': 9487, 'epoch': 1}
{'type': 'loss', 'content': 0.09361245483160019, 'timestamp': '2025-10-02 00:27:29.821484', 'step': 9488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:29.877344', 'step': 9488, 'epoch': 1}
{'type': 'loss', 'content': 0.0447743758559227, 'timestamp': '2025-10-02 00:27:29.886537', 'step': 9489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:29.941256', 'step': 9489, 'epoch': 1}
{'type': 'loss', 'content': 0.030014673247933388, 'timestamp': '2025-10-02 00:27:29.943680', 'step': 9490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:29.999117', 'step': 9490, 'epoch': 1}
{'type': 'loss', 'content': 0.02942473255097866, 'timestamp': '2025-10-02 00:27:30.001681', 'step': 9491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:30.056344', 'step': 9491, 'epoch': 1}
{'type': 'loss', 'content': 0.04953421652317047, 'timestamp': '2025-10-02 00:27:30.068079', 'step': 9492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:30.135330', 'step': 9492, 'epoch': 1}
{'type': 'loss', 'content': 0.13996222615242004, 'timestamp': '2025-10-02 00:27:30.138384', 'step': 9493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:30.197319', 'step': 9493, 'epoch': 1}
{'type': 'loss', 'content': 0.14310574531555176, 'timestamp': '2025-10-02 00:27:30.201077', 'step': 9494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:30.322365', 'step': 9494, 'epoch': 1}
{'type': 'loss', 'content': 0.13669584691524506, 'timestamp': '2025-10-02 00:27:30.326548', 'step': 9495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:30.407901', 'step': 9495, 'epoch': 1}
{'type': 'loss', 'content': 0.04598574712872505, 'timestamp': '2025-10-02 00:27:30.418809', 'step': 9496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:30.483572', 'step': 9496, 'epoch': 1}
{'type': 'loss', 'content': 0.058443088084459305, 'timestamp': '2025-10-02 00:27:30.505271', 'step': 9497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:30.597746', 'step': 9497, 'epoch': 1}
{'type': 'loss', 'content': 0.08150284737348557, 'timestamp': '2025-10-02 00:27:30.603609', 'step': 9498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:30.681053', 'step': 9498, 'epoch': 1}
{'type': 'loss', 'content': 0.11954416334629059, 'timestamp': '2025-10-02 00:27:30.688032', 'step': 9499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:30.761538', 'step': 9499, 'epoch': 1}
{'type': 'loss', 'content': 0.05630473420023918, 'timestamp': '2025-10-02 00:27:30.771887', 'step': 9500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 9500', 'timestamp': '2025-10-02 00:27:31.352780', 'step': 9500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:31.437998', 'step': 9500, 'epoch': 1}
{'type': 'loss', 'content': 0.16378812491893768, 'timestamp': '2025-10-02 00:27:31.442464', 'step': 9501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:31.501565', 'step': 9501, 'epoch': 1}
{'type': 'loss', 'content': 0.13312366604804993, 'timestamp': '2025-10-02 00:27:31.506746', 'step': 9502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:31.570527', 'step': 9502, 'epoch': 1}
{'type': 'loss', 'content': 0.02147117629647255, 'timestamp': '2025-10-02 00:27:31.580670', 'step': 9503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:27:31.647934', 'step': 9503, 'epoch': 1}
{'type': 'loss', 'content': 0.014884646981954575, 'timestamp': '2025-10-02 00:27:31.659490', 'step': 9504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:31.717645', 'step': 9504, 'epoch': 1}
{'type': 'loss', 'content': 0.1343848705291748, 'timestamp': '2025-10-02 00:27:31.721225', 'step': 9505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:31.783581', 'step': 9505, 'epoch': 1}
{'type': 'loss', 'content': 0.049062490463256836, 'timestamp': '2025-10-02 00:27:31.793690', 'step': 9506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:31.859303', 'step': 9506, 'epoch': 1}
{'type': 'loss', 'content': 0.017799239605665207, 'timestamp': '2025-10-02 00:27:31.869674', 'step': 9507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:31.930023', 'step': 9507, 'epoch': 1}
{'type': 'loss', 'content': 0.06994828581809998, 'timestamp': '2025-10-02 00:27:31.945235', 'step': 9508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:32.003928', 'step': 9508, 'epoch': 1}
{'type': 'loss', 'content': 0.09041482210159302, 'timestamp': '2025-10-02 00:27:32.007164', 'step': 9509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:32.065458', 'step': 9509, 'epoch': 1}
{'type': 'loss', 'content': 0.05892793461680412, 'timestamp': '2025-10-02 00:27:32.069959', 'step': 9510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:32.135096', 'step': 9510, 'epoch': 1}
{'type': 'loss', 'content': 0.09773683547973633, 'timestamp': '2025-10-02 00:27:32.139116', 'step': 9511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:32.196318', 'step': 9511, 'epoch': 1}
{'type': 'loss', 'content': 0.1922166496515274, 'timestamp': '2025-10-02 00:27:32.202527', 'step': 9512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:32.258475', 'step': 9512, 'epoch': 1}
{'type': 'loss', 'content': 0.023452628403902054, 'timestamp': '2025-10-02 00:27:32.263037', 'step': 9513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:32.328106', 'step': 9513, 'epoch': 1}
{'type': 'loss', 'content': 0.05626688152551651, 'timestamp': '2025-10-02 00:27:32.332361', 'step': 9514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:32.399274', 'step': 9514, 'epoch': 1}
{'type': 'loss', 'content': 0.13464346528053284, 'timestamp': '2025-10-02 00:27:32.402786', 'step': 9515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:32.480272', 'step': 9515, 'epoch': 1}
{'type': 'loss', 'content': 0.03879014775156975, 'timestamp': '2025-10-02 00:27:32.488957', 'step': 9516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:32.548355', 'step': 9516, 'epoch': 1}
{'type': 'loss', 'content': 0.09957177191972733, 'timestamp': '2025-10-02 00:27:32.551383', 'step': 9517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:32.609101', 'step': 9517, 'epoch': 1}
{'type': 'loss', 'content': 0.04376978427171707, 'timestamp': '2025-10-02 00:27:32.618309', 'step': 9518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:32.687793', 'step': 9518, 'epoch': 1}
{'type': 'loss', 'content': 0.08487118035554886, 'timestamp': '2025-10-02 00:27:32.691588', 'step': 9519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:32.757410', 'step': 9519, 'epoch': 1}
{'type': 'loss', 'content': 0.031058037653565407, 'timestamp': '2025-10-02 00:27:32.768827', 'step': 9520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:32.827357', 'step': 9520, 'epoch': 1}
{'type': 'loss', 'content': 0.2051973193883896, 'timestamp': '2025-10-02 00:27:32.831838', 'step': 9521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:32.898735', 'step': 9521, 'epoch': 1}
{'type': 'loss', 'content': 0.09880141913890839, 'timestamp': '2025-10-02 00:27:32.901942', 'step': 9522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:32.973348', 'step': 9522, 'epoch': 1}
{'type': 'loss', 'content': 0.058664027601480484, 'timestamp': '2025-10-02 00:27:32.977475', 'step': 9523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:33.044513', 'step': 9523, 'epoch': 1}
{'type': 'loss', 'content': 0.1377483606338501, 'timestamp': '2025-10-02 00:27:33.052508', 'step': 9524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:33.123175', 'step': 9524, 'epoch': 1}
{'type': 'loss', 'content': 0.16668249666690826, 'timestamp': '2025-10-02 00:27:33.129121', 'step': 9525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:33.196788', 'step': 9525, 'epoch': 1}
{'type': 'loss', 'content': 0.03298350051045418, 'timestamp': '2025-10-02 00:27:33.204204', 'step': 9526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:33.278962', 'step': 9526, 'epoch': 1}
{'type': 'loss', 'content': 0.08818946033716202, 'timestamp': '2025-10-02 00:27:33.284750', 'step': 9527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:33.343347', 'step': 9527, 'epoch': 1}
{'type': 'loss', 'content': 0.02689756266772747, 'timestamp': '2025-10-02 00:27:33.351569', 'step': 9528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:33.410018', 'step': 9528, 'epoch': 1}
{'type': 'loss', 'content': 0.11250276863574982, 'timestamp': '2025-10-02 00:27:33.413642', 'step': 9529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:33.484203', 'step': 9529, 'epoch': 1}
{'type': 'loss', 'content': 0.0427081473171711, 'timestamp': '2025-10-02 00:27:33.488459', 'step': 9530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:33.560019', 'step': 9530, 'epoch': 1}
{'type': 'loss', 'content': 0.02325047180056572, 'timestamp': '2025-10-02 00:27:33.577798', 'step': 9531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:33.661306', 'step': 9531, 'epoch': 1}
{'type': 'loss', 'content': 0.14274707436561584, 'timestamp': '2025-10-02 00:27:33.678903', 'step': 9532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:33.748010', 'step': 9532, 'epoch': 1}
{'type': 'loss', 'content': 0.03804188221693039, 'timestamp': '2025-10-02 00:27:33.763267', 'step': 9533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:33.851752', 'step': 9533, 'epoch': 1}
{'type': 'loss', 'content': 0.1029500737786293, 'timestamp': '2025-10-02 00:27:33.869046', 'step': 9534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:33.950385', 'step': 9534, 'epoch': 1}
{'type': 'loss', 'content': 0.015321195125579834, 'timestamp': '2025-10-02 00:27:33.955203', 'step': 9535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:34.046290', 'step': 9535, 'epoch': 1}
{'type': 'loss', 'content': 0.06873970478773117, 'timestamp': '2025-10-02 00:27:34.053025', 'step': 9536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:34.148320', 'step': 9536, 'epoch': 1}
{'type': 'loss', 'content': 0.10717575997114182, 'timestamp': '2025-10-02 00:27:34.161979', 'step': 9537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:34.234675', 'step': 9537, 'epoch': 1}
{'type': 'loss', 'content': 0.09028713405132294, 'timestamp': '2025-10-02 00:27:34.240432', 'step': 9538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:34.311734', 'step': 9538, 'epoch': 1}
{'type': 'loss', 'content': 0.05176716670393944, 'timestamp': '2025-10-02 00:27:34.332987', 'step': 9539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:34.424470', 'step': 9539, 'epoch': 1}
{'type': 'loss', 'content': 0.043618787080049515, 'timestamp': '2025-10-02 00:27:34.435451', 'step': 9540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:34.493758', 'step': 9540, 'epoch': 1}
{'type': 'loss', 'content': 0.19797569513320923, 'timestamp': '2025-10-02 00:27:34.496685', 'step': 9541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:34.584605', 'step': 9541, 'epoch': 1}
{'type': 'loss', 'content': 0.058433178812265396, 'timestamp': '2025-10-02 00:27:34.601280', 'step': 9542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:27:34.682158', 'step': 9542, 'epoch': 1}
{'type': 'loss', 'content': 0.12936672568321228, 'timestamp': '2025-10-02 00:27:34.698150', 'step': 9543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:34.787815', 'step': 9543, 'epoch': 1}
{'type': 'loss', 'content': 0.07438987493515015, 'timestamp': '2025-10-02 00:27:34.795601', 'step': 9544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:34.852640', 'step': 9544, 'epoch': 1}
{'type': 'loss', 'content': 0.06834519654512405, 'timestamp': '2025-10-02 00:27:34.856865', 'step': 9545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:34.915116', 'step': 9545, 'epoch': 1}
{'type': 'loss', 'content': 0.10489203035831451, 'timestamp': '2025-10-02 00:27:34.918805', 'step': 9546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:34.989348', 'step': 9546, 'epoch': 1}
{'type': 'loss', 'content': 0.08326926082372665, 'timestamp': '2025-10-02 00:27:34.993254', 'step': 9547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:35.073585', 'step': 9547, 'epoch': 1}
{'type': 'loss', 'content': 0.07654508203268051, 'timestamp': '2025-10-02 00:27:35.090130', 'step': 9548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:35.158219', 'step': 9548, 'epoch': 1}
{'type': 'loss', 'content': 0.121731698513031, 'timestamp': '2025-10-02 00:27:35.172771', 'step': 9549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:35.262011', 'step': 9549, 'epoch': 1}
{'type': 'loss', 'content': 0.10814082622528076, 'timestamp': '2025-10-02 00:27:35.278411', 'step': 9550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:35.366491', 'step': 9550, 'epoch': 1}
{'type': 'loss', 'content': 0.032495349645614624, 'timestamp': '2025-10-02 00:27:35.370008', 'step': 9551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:35.430106', 'step': 9551, 'epoch': 1}
{'type': 'loss', 'content': 0.02796665020287037, 'timestamp': '2025-10-02 00:27:35.439728', 'step': 9552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:27:35.522148', 'step': 9552, 'epoch': 1}
{'type': 'loss', 'content': 0.03102993592619896, 'timestamp': '2025-10-02 00:27:35.535851', 'step': 9553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:35.612976', 'step': 9553, 'epoch': 1}
{'type': 'loss', 'content': 0.16068264842033386, 'timestamp': '2025-10-02 00:27:35.625551', 'step': 9554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:35.694330', 'step': 9554, 'epoch': 1}
{'type': 'loss', 'content': 0.12741340696811676, 'timestamp': '2025-10-02 00:27:35.706528', 'step': 9555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:27:35.777236', 'step': 9555, 'epoch': 1}
{'type': 'loss', 'content': 0.22251293063163757, 'timestamp': '2025-10-02 00:27:35.784983', 'step': 9556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:35.866706', 'step': 9556, 'epoch': 1}
{'type': 'loss', 'content': 0.08036923408508301, 'timestamp': '2025-10-02 00:27:35.870156', 'step': 9557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:35.949256', 'step': 9557, 'epoch': 1}
{'type': 'loss', 'content': 0.009723825380206108, 'timestamp': '2025-10-02 00:27:35.959481', 'step': 9558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:36.021789', 'step': 9558, 'epoch': 1}
{'type': 'loss', 'content': 0.09321355074644089, 'timestamp': '2025-10-02 00:27:36.038036', 'step': 9559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:36.145675', 'step': 9559, 'epoch': 1}
{'type': 'loss', 'content': 0.06736823916435242, 'timestamp': '2025-10-02 00:27:36.165307', 'step': 9560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:36.250686', 'step': 9560, 'epoch': 1}
{'type': 'loss', 'content': 0.1049734503030777, 'timestamp': '2025-10-02 00:27:36.264984', 'step': 9561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:36.351839', 'step': 9561, 'epoch': 1}
{'type': 'loss', 'content': 0.06385527551174164, 'timestamp': '2025-10-02 00:27:36.361174', 'step': 9562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:36.451265', 'step': 9562, 'epoch': 1}
{'type': 'loss', 'content': 0.011632108129560947, 'timestamp': '2025-10-02 00:27:36.465537', 'step': 9563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:36.546170', 'step': 9563, 'epoch': 1}
{'type': 'loss', 'content': 0.14225293695926666, 'timestamp': '2025-10-02 00:27:36.553949', 'step': 9564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:36.612799', 'step': 9564, 'epoch': 1}
{'type': 'loss', 'content': 0.020485427230596542, 'timestamp': '2025-10-02 00:27:36.616639', 'step': 9565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:36.703046', 'step': 9565, 'epoch': 1}
{'type': 'loss', 'content': 0.1486034095287323, 'timestamp': '2025-10-02 00:27:36.707751', 'step': 9566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:36.785079', 'step': 9566, 'epoch': 1}
{'type': 'loss', 'content': 0.08680419623851776, 'timestamp': '2025-10-02 00:27:36.798466', 'step': 9567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:36.890484', 'step': 9567, 'epoch': 1}
{'type': 'loss', 'content': 0.04348539561033249, 'timestamp': '2025-10-02 00:27:36.910495', 'step': 9568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:37.005854', 'step': 9568, 'epoch': 1}
{'type': 'loss', 'content': 0.09343886375427246, 'timestamp': '2025-10-02 00:27:37.016781', 'step': 9569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:37.109408', 'step': 9569, 'epoch': 1}
{'type': 'loss', 'content': 0.056189827620983124, 'timestamp': '2025-10-02 00:27:37.119630', 'step': 9570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:37.200604', 'step': 9570, 'epoch': 1}
{'type': 'loss', 'content': 0.08112333714962006, 'timestamp': '2025-10-02 00:27:37.205607', 'step': 9571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:37.274642', 'step': 9571, 'epoch': 1}
{'type': 'loss', 'content': 0.09965411573648453, 'timestamp': '2025-10-02 00:27:37.281678', 'step': 9572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:27:37.381258', 'step': 9572, 'epoch': 1}
{'type': 'loss', 'content': 0.022396670654416084, 'timestamp': '2025-10-02 00:27:37.395958', 'step': 9573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:37.492593', 'step': 9573, 'epoch': 1}
{'type': 'loss', 'content': 0.012630374170839787, 'timestamp': '2025-10-02 00:27:37.508006', 'step': 9574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:37.602336', 'step': 9574, 'epoch': 1}
{'type': 'loss', 'content': 0.13072511553764343, 'timestamp': '2025-10-02 00:27:37.619870', 'step': 9575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:37.688128', 'step': 9575, 'epoch': 1}
{'type': 'loss', 'content': 0.14614702761173248, 'timestamp': '2025-10-02 00:27:37.695364', 'step': 9576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:37.784085', 'step': 9576, 'epoch': 1}
{'type': 'loss', 'content': 0.0720176175236702, 'timestamp': '2025-10-02 00:27:37.788224', 'step': 9577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:37.874949', 'step': 9577, 'epoch': 1}
{'type': 'loss', 'content': 0.11929582059383392, 'timestamp': '2025-10-02 00:27:37.888726', 'step': 9578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:37.967048', 'step': 9578, 'epoch': 1}
{'type': 'loss', 'content': 0.08486836403608322, 'timestamp': '2025-10-02 00:27:37.971628', 'step': 9579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:38.041531', 'step': 9579, 'epoch': 1}
{'type': 'loss', 'content': 0.12788310647010803, 'timestamp': '2025-10-02 00:27:38.049172', 'step': 9580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:38.137009', 'step': 9580, 'epoch': 1}
{'type': 'loss', 'content': 0.04704568535089493, 'timestamp': '2025-10-02 00:27:38.141062', 'step': 9581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:38.213253', 'step': 9581, 'epoch': 1}
{'type': 'loss', 'content': 0.054149508476257324, 'timestamp': '2025-10-02 00:27:38.225465', 'step': 9582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:38.316712', 'step': 9582, 'epoch': 1}
{'type': 'loss', 'content': 0.03038904443383217, 'timestamp': '2025-10-02 00:27:38.323847', 'step': 9583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:38.398151', 'step': 9583, 'epoch': 1}
{'type': 'loss', 'content': 0.15871647000312805, 'timestamp': '2025-10-02 00:27:38.407194', 'step': 9584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:38.463709', 'step': 9584, 'epoch': 1}
{'type': 'loss', 'content': 0.048251260071992874, 'timestamp': '2025-10-02 00:27:38.481659', 'step': 9585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:38.561648', 'step': 9585, 'epoch': 1}
{'type': 'loss', 'content': 0.1393597573041916, 'timestamp': '2025-10-02 00:27:38.566817', 'step': 9586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:38.636131', 'step': 9586, 'epoch': 1}
{'type': 'loss', 'content': 0.17977696657180786, 'timestamp': '2025-10-02 00:27:38.640493', 'step': 9587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:38.712863', 'step': 9587, 'epoch': 1}
{'type': 'loss', 'content': 0.07366450130939484, 'timestamp': '2025-10-02 00:27:38.726893', 'step': 9588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:38.785980', 'step': 9588, 'epoch': 1}
{'type': 'loss', 'content': 0.086239755153656, 'timestamp': '2025-10-02 00:27:38.791021', 'step': 9589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:38.871879', 'step': 9589, 'epoch': 1}
{'type': 'loss', 'content': 0.04859716445207596, 'timestamp': '2025-10-02 00:27:38.876026', 'step': 9590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:38.962820', 'step': 9590, 'epoch': 1}
{'type': 'loss', 'content': 0.21746480464935303, 'timestamp': '2025-10-02 00:27:38.966391', 'step': 9591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:39.024005', 'step': 9591, 'epoch': 1}
{'type': 'loss', 'content': 0.09551462531089783, 'timestamp': '2025-10-02 00:27:39.031533', 'step': 9592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:39.088932', 'step': 9592, 'epoch': 1}
{'type': 'loss', 'content': 0.15455210208892822, 'timestamp': '2025-10-02 00:27:39.091903', 'step': 9593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:39.168033', 'step': 9593, 'epoch': 1}
{'type': 'loss', 'content': 0.09799594432115555, 'timestamp': '2025-10-02 00:27:39.171753', 'step': 9594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:39.232831', 'step': 9594, 'epoch': 1}
{'type': 'loss', 'content': 0.010316719301044941, 'timestamp': '2025-10-02 00:27:39.241818', 'step': 9595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:39.302389', 'step': 9595, 'epoch': 1}
{'type': 'loss', 'content': 0.06088871508836746, 'timestamp': '2025-10-02 00:27:39.312705', 'step': 9596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:39.379650', 'step': 9596, 'epoch': 1}
{'type': 'loss', 'content': 0.07751841843128204, 'timestamp': '2025-10-02 00:27:39.399859', 'step': 9597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:39.501106', 'step': 9597, 'epoch': 1}
{'type': 'loss', 'content': 0.05813344568014145, 'timestamp': '2025-10-02 00:27:39.518054', 'step': 9598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:39.587774', 'step': 9598, 'epoch': 1}
{'type': 'loss', 'content': 0.03379939869046211, 'timestamp': '2025-10-02 00:27:39.597119', 'step': 9599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:39.686571', 'step': 9599, 'epoch': 1}
{'type': 'loss', 'content': 0.03182356804609299, 'timestamp': '2025-10-02 00:27:39.693372', 'step': 9600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:39.759073', 'step': 9600, 'epoch': 1}
{'type': 'loss', 'content': 0.12108447402715683, 'timestamp': '2025-10-02 00:27:39.762797', 'step': 9601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:39.834868', 'step': 9601, 'epoch': 1}
{'type': 'loss', 'content': 0.1673254817724228, 'timestamp': '2025-10-02 00:27:39.839760', 'step': 9602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:39.900785', 'step': 9602, 'epoch': 1}
{'type': 'loss', 'content': 0.1327543556690216, 'timestamp': '2025-10-02 00:27:39.904924', 'step': 9603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:39.987654', 'step': 9603, 'epoch': 1}
{'type': 'loss', 'content': 0.05143050104379654, 'timestamp': '2025-10-02 00:27:39.997857', 'step': 9604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:40.068069', 'step': 9604, 'epoch': 1}
{'type': 'loss', 'content': 0.14986254274845123, 'timestamp': '2025-10-02 00:27:40.072041', 'step': 9605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:40.139900', 'step': 9605, 'epoch': 1}
{'type': 'loss', 'content': 0.02351400814950466, 'timestamp': '2025-10-02 00:27:40.157995', 'step': 9606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:40.216894', 'step': 9606, 'epoch': 1}
{'type': 'loss', 'content': 0.12852898240089417, 'timestamp': '2025-10-02 00:27:40.219706', 'step': 9607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:27:40.294744', 'step': 9607, 'epoch': 1}
{'type': 'loss', 'content': 0.189226433634758, 'timestamp': '2025-10-02 00:27:40.313264', 'step': 9608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:40.380476', 'step': 9608, 'epoch': 1}
{'type': 'loss', 'content': 0.02645629271864891, 'timestamp': '2025-10-02 00:27:40.386051', 'step': 9609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:40.464750', 'step': 9609, 'epoch': 1}
{'type': 'loss', 'content': 0.036150239408016205, 'timestamp': '2025-10-02 00:27:40.478288', 'step': 9610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:40.548914', 'step': 9610, 'epoch': 1}
{'type': 'loss', 'content': 0.17224092781543732, 'timestamp': '2025-10-02 00:27:40.552252', 'step': 9611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:40.625091', 'step': 9611, 'epoch': 1}
{'type': 'loss', 'content': 0.07244430482387543, 'timestamp': '2025-10-02 00:27:40.631528', 'step': 9612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:40.701698', 'step': 9612, 'epoch': 1}
{'type': 'loss', 'content': 0.15030574798583984, 'timestamp': '2025-10-02 00:27:40.716482', 'step': 9613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:27:40.797775', 'step': 9613, 'epoch': 1}
{'type': 'loss', 'content': 0.06278035789728165, 'timestamp': '2025-10-02 00:27:40.811179', 'step': 9614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:40.873675', 'step': 9614, 'epoch': 1}
{'type': 'loss', 'content': 0.11552608758211136, 'timestamp': '2025-10-02 00:27:40.876876', 'step': 9615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:40.946354', 'step': 9615, 'epoch': 1}
{'type': 'loss', 'content': 0.20788653194904327, 'timestamp': '2025-10-02 00:27:40.954088', 'step': 9616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:41.012660', 'step': 9616, 'epoch': 1}
{'type': 'loss', 'content': 0.1133500412106514, 'timestamp': '2025-10-02 00:27:41.017387', 'step': 9617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:41.086496', 'step': 9617, 'epoch': 1}
{'type': 'loss', 'content': 0.09244643151760101, 'timestamp': '2025-10-02 00:27:41.093930', 'step': 9618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:41.151497', 'step': 9618, 'epoch': 1}
{'type': 'loss', 'content': 0.12473809719085693, 'timestamp': '2025-10-02 00:27:41.154940', 'step': 9619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:41.213282', 'step': 9619, 'epoch': 1}
{'type': 'loss', 'content': 0.11168473958969116, 'timestamp': '2025-10-02 00:27:41.220555', 'step': 9620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:41.287707', 'step': 9620, 'epoch': 1}
{'type': 'loss', 'content': 0.0257955901324749, 'timestamp': '2025-10-02 00:27:41.292025', 'step': 9621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:41.350222', 'step': 9621, 'epoch': 1}
{'type': 'loss', 'content': 0.09343475103378296, 'timestamp': '2025-10-02 00:27:41.365930', 'step': 9622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:41.457776', 'step': 9622, 'epoch': 1}
{'type': 'loss', 'content': 0.14262212812900543, 'timestamp': '2025-10-02 00:27:41.460718', 'step': 9623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:27:41.560219', 'step': 9623, 'epoch': 1}
{'type': 'loss', 'content': 0.047535937279462814, 'timestamp': '2025-10-02 00:27:41.572953', 'step': 9624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:41.644508', 'step': 9624, 'epoch': 1}
{'type': 'loss', 'content': 0.23016737401485443, 'timestamp': '2025-10-02 00:27:41.648417', 'step': 9625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:41.740392', 'step': 9625, 'epoch': 1}
{'type': 'loss', 'content': 0.03121917136013508, 'timestamp': '2025-10-02 00:27:41.751033', 'step': 9626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:41.848897', 'step': 9626, 'epoch': 1}
{'type': 'loss', 'content': 0.20120075345039368, 'timestamp': '2025-10-02 00:27:41.863079', 'step': 9627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:41.921511', 'step': 9627, 'epoch': 1}
{'type': 'loss', 'content': 0.10320387780666351, 'timestamp': '2025-10-02 00:27:41.931594', 'step': 9628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:42.003031', 'step': 9628, 'epoch': 1}
{'type': 'loss', 'content': 0.022017652168869972, 'timestamp': '2025-10-02 00:27:42.013230', 'step': 9629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:42.101599', 'step': 9629, 'epoch': 1}
{'type': 'loss', 'content': 0.10959597676992416, 'timestamp': '2025-10-02 00:27:42.104487', 'step': 9630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:27:42.182849', 'step': 9630, 'epoch': 1}
{'type': 'loss', 'content': 0.059431225061416626, 'timestamp': '2025-10-02 00:27:42.186123', 'step': 9631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:42.247874', 'step': 9631, 'epoch': 1}
{'type': 'loss', 'content': 0.0292317233979702, 'timestamp': '2025-10-02 00:27:42.267305', 'step': 9632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:42.336640', 'step': 9632, 'epoch': 1}
{'type': 'loss', 'content': 0.06141353026032448, 'timestamp': '2025-10-02 00:27:42.340326', 'step': 9633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:42.398179', 'step': 9633, 'epoch': 1}
{'type': 'loss', 'content': 0.09160491079092026, 'timestamp': '2025-10-02 00:27:42.414109', 'step': 9634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:42.488653', 'step': 9634, 'epoch': 1}
{'type': 'loss', 'content': 0.06098279356956482, 'timestamp': '2025-10-02 00:27:42.500913', 'step': 9635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:42.581662', 'step': 9635, 'epoch': 1}
{'type': 'loss', 'content': 0.027179572731256485, 'timestamp': '2025-10-02 00:27:42.589869', 'step': 9636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:42.677432', 'step': 9636, 'epoch': 1}
{'type': 'loss', 'content': 0.11820786446332932, 'timestamp': '2025-10-02 00:27:42.692339', 'step': 9637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:42.778976', 'step': 9637, 'epoch': 1}
{'type': 'loss', 'content': 0.02300357073545456, 'timestamp': '2025-10-02 00:27:42.790624', 'step': 9638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:42.884877', 'step': 9638, 'epoch': 1}
{'type': 'loss', 'content': 0.08112889528274536, 'timestamp': '2025-10-02 00:27:42.894444', 'step': 9639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:42.966401', 'step': 9639, 'epoch': 1}
{'type': 'loss', 'content': 0.1943632960319519, 'timestamp': '2025-10-02 00:27:42.973307', 'step': 9640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:43.065415', 'step': 9640, 'epoch': 1}
{'type': 'loss', 'content': 0.16328899562358856, 'timestamp': '2025-10-02 00:27:43.070140', 'step': 9641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:43.165950', 'step': 9641, 'epoch': 1}
{'type': 'loss', 'content': 0.07685240358114243, 'timestamp': '2025-10-02 00:27:43.188247', 'step': 9642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:43.245812', 'step': 9642, 'epoch': 1}
{'type': 'loss', 'content': 0.08154671639204025, 'timestamp': '2025-10-02 00:27:43.251486', 'step': 9643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:43.324901', 'step': 9643, 'epoch': 1}
{'type': 'loss', 'content': 0.07475807517766953, 'timestamp': '2025-10-02 00:27:43.333269', 'step': 9644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:43.423881', 'step': 9644, 'epoch': 1}
{'type': 'loss', 'content': 0.11272032558917999, 'timestamp': '2025-10-02 00:27:43.428883', 'step': 9645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:43.492765', 'step': 9645, 'epoch': 1}
{'type': 'loss', 'content': 0.04919284209609032, 'timestamp': '2025-10-02 00:27:43.496971', 'step': 9646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:43.557203', 'step': 9646, 'epoch': 1}
{'type': 'loss', 'content': 0.047874659299850464, 'timestamp': '2025-10-02 00:27:43.561904', 'step': 9647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:43.638761', 'step': 9647, 'epoch': 1}
{'type': 'loss', 'content': 0.25952136516571045, 'timestamp': '2025-10-02 00:27:43.645906', 'step': 9648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:43.703179', 'step': 9648, 'epoch': 1}
{'type': 'loss', 'content': 0.1721249371767044, 'timestamp': '2025-10-02 00:27:43.706272', 'step': 9649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:43.769798', 'step': 9649, 'epoch': 1}
{'type': 'loss', 'content': 0.022776180878281593, 'timestamp': '2025-10-02 00:27:43.774445', 'step': 9650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:43.834417', 'step': 9650, 'epoch': 1}
{'type': 'loss', 'content': 0.04199885576963425, 'timestamp': '2025-10-02 00:27:43.838510', 'step': 9651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:43.898391', 'step': 9651, 'epoch': 1}
{'type': 'loss', 'content': 0.054949451237916946, 'timestamp': '2025-10-02 00:27:43.905060', 'step': 9652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:43.967854', 'step': 9652, 'epoch': 1}
{'type': 'loss', 'content': 0.07533670961856842, 'timestamp': '2025-10-02 00:27:43.971392', 'step': 9653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:44.040671', 'step': 9653, 'epoch': 1}
{'type': 'loss', 'content': 0.10432086884975433, 'timestamp': '2025-10-02 00:27:44.046107', 'step': 9654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:44.104655', 'step': 9654, 'epoch': 1}
{'type': 'loss', 'content': 0.13223977386951447, 'timestamp': '2025-10-02 00:27:44.113113', 'step': 9655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:44.173308', 'step': 9655, 'epoch': 1}
{'type': 'loss', 'content': 0.03700058162212372, 'timestamp': '2025-10-02 00:27:44.189599', 'step': 9656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:44.255043', 'step': 9656, 'epoch': 1}
{'type': 'loss', 'content': 0.08844748139381409, 'timestamp': '2025-10-02 00:27:44.269547', 'step': 9657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:44.371589', 'step': 9657, 'epoch': 1}
{'type': 'loss', 'content': 0.027577340602874756, 'timestamp': '2025-10-02 00:27:44.381738', 'step': 9658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:44.475880', 'step': 9658, 'epoch': 1}
{'type': 'loss', 'content': 0.048986513167619705, 'timestamp': '2025-10-02 00:27:44.493406', 'step': 9659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:44.589649', 'step': 9659, 'epoch': 1}
{'type': 'loss', 'content': 0.04635036364197731, 'timestamp': '2025-10-02 00:27:44.597628', 'step': 9660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:44.675248', 'step': 9660, 'epoch': 1}
{'type': 'loss', 'content': 0.1668369174003601, 'timestamp': '2025-10-02 00:27:44.680462', 'step': 9661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:44.759283', 'step': 9661, 'epoch': 1}
{'type': 'loss', 'content': 0.05138883367180824, 'timestamp': '2025-10-02 00:27:44.772341', 'step': 9662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:44.848978', 'step': 9662, 'epoch': 1}
{'type': 'loss', 'content': 0.07750353962182999, 'timestamp': '2025-10-02 00:27:44.856314', 'step': 9663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:44.928470', 'step': 9663, 'epoch': 1}
{'type': 'loss', 'content': 0.2015039622783661, 'timestamp': '2025-10-02 00:27:44.940214', 'step': 9664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:44.998588', 'step': 9664, 'epoch': 1}
{'type': 'loss', 'content': 0.02630634233355522, 'timestamp': '2025-10-02 00:27:45.009919', 'step': 9665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:45.078611', 'step': 9665, 'epoch': 1}
{'type': 'loss', 'content': 0.08056909590959549, 'timestamp': '2025-10-02 00:27:45.087011', 'step': 9666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:45.151793', 'step': 9666, 'epoch': 1}
{'type': 'loss', 'content': 0.12796512246131897, 'timestamp': '2025-10-02 00:27:45.160808', 'step': 9667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:45.229787', 'step': 9667, 'epoch': 1}
{'type': 'loss', 'content': 0.06451059877872467, 'timestamp': '2025-10-02 00:27:45.236573', 'step': 9668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:45.305989', 'step': 9668, 'epoch': 1}
{'type': 'loss', 'content': 0.163025364279747, 'timestamp': '2025-10-02 00:27:45.313405', 'step': 9669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:27:45.382320', 'step': 9669, 'epoch': 1}
{'type': 'loss', 'content': 0.05289889872074127, 'timestamp': '2025-10-02 00:27:45.385405', 'step': 9670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:45.464959', 'step': 9670, 'epoch': 1}
{'type': 'loss', 'content': 0.07040291279554367, 'timestamp': '2025-10-02 00:27:45.467860', 'step': 9671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:45.524109', 'step': 9671, 'epoch': 1}
{'type': 'loss', 'content': 0.10853707045316696, 'timestamp': '2025-10-02 00:27:45.545859', 'step': 9672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:45.608822', 'step': 9672, 'epoch': 1}
{'type': 'loss', 'content': 0.05015266314148903, 'timestamp': '2025-10-02 00:27:45.612520', 'step': 9673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:45.671937', 'step': 9673, 'epoch': 1}
{'type': 'loss', 'content': 0.031111225485801697, 'timestamp': '2025-10-02 00:27:45.681515', 'step': 9674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:45.740181', 'step': 9674, 'epoch': 1}
{'type': 'loss', 'content': 0.07592109590768814, 'timestamp': '2025-10-02 00:27:45.757712', 'step': 9675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:27:45.818871', 'step': 9675, 'epoch': 1}
{'type': 'loss', 'content': 0.13509641587734222, 'timestamp': '2025-10-02 00:27:45.825744', 'step': 9676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:45.888019', 'step': 9676, 'epoch': 1}
{'type': 'loss', 'content': 0.11080685257911682, 'timestamp': '2025-10-02 00:27:45.891118', 'step': 9677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:45.953698', 'step': 9677, 'epoch': 1}
{'type': 'loss', 'content': 0.1720406413078308, 'timestamp': '2025-10-02 00:27:45.959719', 'step': 9678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:46.026878', 'step': 9678, 'epoch': 1}
{'type': 'loss', 'content': 0.040503133088350296, 'timestamp': '2025-10-02 00:27:46.036382', 'step': 9679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:46.095491', 'step': 9679, 'epoch': 1}
{'type': 'loss', 'content': 0.10515028238296509, 'timestamp': '2025-10-02 00:27:46.101998', 'step': 9680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:46.156624', 'step': 9680, 'epoch': 1}
{'type': 'loss', 'content': 0.1638355404138565, 'timestamp': '2025-10-02 00:27:46.158985', 'step': 9681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:46.216077', 'step': 9681, 'epoch': 1}
{'type': 'loss', 'content': 0.09292032569646835, 'timestamp': '2025-10-02 00:27:46.219056', 'step': 9682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:46.280576', 'step': 9682, 'epoch': 1}
{'type': 'loss', 'content': 0.06587467342615128, 'timestamp': '2025-10-02 00:27:46.283532', 'step': 9683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:46.340413', 'step': 9683, 'epoch': 1}
{'type': 'loss', 'content': 0.09215562045574188, 'timestamp': '2025-10-02 00:27:46.346798', 'step': 9684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:27:46.419965', 'step': 9684, 'epoch': 1}
{'type': 'loss', 'content': 0.025325536727905273, 'timestamp': '2025-10-02 00:27:46.432955', 'step': 9685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:46.490983', 'step': 9685, 'epoch': 1}
{'type': 'loss', 'content': 0.07647678256034851, 'timestamp': '2025-10-02 00:27:46.500519', 'step': 9686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:46.555659', 'step': 9686, 'epoch': 1}
{'type': 'loss', 'content': 0.1246902346611023, 'timestamp': '2025-10-02 00:27:46.562085', 'step': 9687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:46.626077', 'step': 9687, 'epoch': 1}
{'type': 'loss', 'content': 0.28151935338974, 'timestamp': '2025-10-02 00:27:46.638148', 'step': 9688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:46.704991', 'step': 9688, 'epoch': 1}
{'type': 'loss', 'content': 0.09537487477064133, 'timestamp': '2025-10-02 00:27:46.716622', 'step': 9689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:46.786961', 'step': 9689, 'epoch': 1}
{'type': 'loss', 'content': 0.13997091352939606, 'timestamp': '2025-10-02 00:27:46.795209', 'step': 9690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:46.858608', 'step': 9690, 'epoch': 1}
{'type': 'loss', 'content': 0.04531358554959297, 'timestamp': '2025-10-02 00:27:46.864909', 'step': 9691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:46.934312', 'step': 9691, 'epoch': 1}
{'type': 'loss', 'content': 0.10626833140850067, 'timestamp': '2025-10-02 00:27:46.945255', 'step': 9692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:47.016802', 'step': 9692, 'epoch': 1}
{'type': 'loss', 'content': 0.010611935518682003, 'timestamp': '2025-10-02 00:27:47.024070', 'step': 9693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:47.087930', 'step': 9693, 'epoch': 1}
{'type': 'loss', 'content': 0.10052531957626343, 'timestamp': '2025-10-02 00:27:47.096918', 'step': 9694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:47.176360', 'step': 9694, 'epoch': 1}
{'type': 'loss', 'content': 0.08723528683185577, 'timestamp': '2025-10-02 00:27:47.182223', 'step': 9695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:47.251221', 'step': 9695, 'epoch': 1}
{'type': 'loss', 'content': 0.02869109809398651, 'timestamp': '2025-10-02 00:27:47.262445', 'step': 9696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:47.322923', 'step': 9696, 'epoch': 1}
{'type': 'loss', 'content': 0.16647076606750488, 'timestamp': '2025-10-02 00:27:47.325806', 'step': 9697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:47.383213', 'step': 9697, 'epoch': 1}
{'type': 'loss', 'content': 0.036026839166879654, 'timestamp': '2025-10-02 00:27:47.392755', 'step': 9698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:47.461061', 'step': 9698, 'epoch': 1}
{'type': 'loss', 'content': 0.0236562080681324, 'timestamp': '2025-10-02 00:27:47.470806', 'step': 9699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:47.543106', 'step': 9699, 'epoch': 1}
{'type': 'loss', 'content': 0.032329779118299484, 'timestamp': '2025-10-02 00:27:47.552634', 'step': 9700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:47.613304', 'step': 9700, 'epoch': 1}
{'type': 'loss', 'content': 0.04066698998212814, 'timestamp': '2025-10-02 00:27:47.616642', 'step': 9701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:47.678296', 'step': 9701, 'epoch': 1}
{'type': 'loss', 'content': 0.12149839848279953, 'timestamp': '2025-10-02 00:27:47.681471', 'step': 9702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:47.743530', 'step': 9702, 'epoch': 1}
{'type': 'loss', 'content': 0.10235144197940826, 'timestamp': '2025-10-02 00:27:47.746407', 'step': 9703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:47.806181', 'step': 9703, 'epoch': 1}
{'type': 'loss', 'content': 0.05196793004870415, 'timestamp': '2025-10-02 00:27:47.818468', 'step': 9704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:27:47.891135', 'step': 9704, 'epoch': 1}
{'type': 'loss', 'content': 0.11110979318618774, 'timestamp': '2025-10-02 00:27:47.894253', 'step': 9705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:27:47.957131', 'step': 9705, 'epoch': 1}
{'type': 'loss', 'content': 0.04518841579556465, 'timestamp': '2025-10-02 00:27:47.959879', 'step': 9706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:48.023079', 'step': 9706, 'epoch': 1}
{'type': 'loss', 'content': 0.024999814108014107, 'timestamp': '2025-10-02 00:27:48.030387', 'step': 9707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:27:48.086616', 'step': 9707, 'epoch': 1}
{'type': 'loss', 'content': 0.14721620082855225, 'timestamp': '2025-10-02 00:27:48.099420', 'step': 9708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:27:48.174580', 'step': 9708, 'epoch': 1}
{'type': 'loss', 'content': 0.03910912945866585, 'timestamp': '2025-10-02 00:27:48.185920', 'step': 9709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:48.241709', 'step': 9709, 'epoch': 1}
{'type': 'loss', 'content': 0.07625280320644379, 'timestamp': '2025-10-02 00:27:48.244669', 'step': 9710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:48.302307', 'step': 9710, 'epoch': 1}
{'type': 'loss', 'content': 0.017733385786414146, 'timestamp': '2025-10-02 00:27:48.307965', 'step': 9711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:48.364337', 'step': 9711, 'epoch': 1}
{'type': 'loss', 'content': 0.08399403095245361, 'timestamp': '2025-10-02 00:27:48.371090', 'step': 9712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:27:48.428154', 'step': 9712, 'epoch': 1}
{'type': 'loss', 'content': 0.0238207895308733, 'timestamp': '2025-10-02 00:27:48.438426', 'step': 9713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:48.494069', 'step': 9713, 'epoch': 1}
{'type': 'loss', 'content': 0.316494345664978, 'timestamp': '2025-10-02 00:27:48.497577', 'step': 9714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:48.557249', 'step': 9714, 'epoch': 1}
{'type': 'loss', 'content': 0.19376108050346375, 'timestamp': '2025-10-02 00:27:48.564887', 'step': 9715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:27:48.625808', 'step': 9715, 'epoch': 1}
{'type': 'loss', 'content': 0.09768148511648178, 'timestamp': '2025-10-02 00:27:48.631714', 'step': 9716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:48.685372', 'step': 9716, 'epoch': 1}
{'type': 'loss', 'content': 0.12971435487270355, 'timestamp': '2025-10-02 00:27:48.691096', 'step': 9717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:48.746445', 'step': 9717, 'epoch': 1}
{'type': 'loss', 'content': 0.19385766983032227, 'timestamp': '2025-10-02 00:27:48.749634', 'step': 9718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:48.805061', 'step': 9718, 'epoch': 1}
{'type': 'loss', 'content': 0.023800048977136612, 'timestamp': '2025-10-02 00:27:48.807461', 'step': 9719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:48.862506', 'step': 9719, 'epoch': 1}
{'type': 'loss', 'content': 0.15079547464847565, 'timestamp': '2025-10-02 00:27:48.868313', 'step': 9720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:27:48.922653', 'step': 9720, 'epoch': 1}
{'type': 'loss', 'content': 0.19592197239398956, 'timestamp': '2025-10-02 00:27:48.926016', 'step': 9721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:48.980562', 'step': 9721, 'epoch': 1}
{'type': 'loss', 'content': 0.1259710043668747, 'timestamp': '2025-10-02 00:27:48.983230', 'step': 9722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:49.038224', 'step': 9722, 'epoch': 1}
{'type': 'loss', 'content': 0.23234602808952332, 'timestamp': '2025-10-02 00:27:49.040883', 'step': 9723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:27:49.097492', 'step': 9723, 'epoch': 1}
{'type': 'loss', 'content': 0.03200807049870491, 'timestamp': '2025-10-02 00:27:49.105410', 'step': 9724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:49.159324', 'step': 9724, 'epoch': 1}
{'type': 'loss', 'content': 0.06432731449604034, 'timestamp': '2025-10-02 00:27:49.168595', 'step': 9725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:27:49.236247', 'step': 9725, 'epoch': 1}
{'type': 'loss', 'content': 0.012823659926652908, 'timestamp': '2025-10-02 00:27:49.248237', 'step': 9726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:49.307960', 'step': 9726, 'epoch': 1}
{'type': 'loss', 'content': 0.03970862552523613, 'timestamp': '2025-10-02 00:27:49.318129', 'step': 9727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:27:49.379005', 'step': 9727, 'epoch': 1}
{'type': 'loss', 'content': 0.01714549958705902, 'timestamp': '2025-10-02 00:27:49.389958', 'step': 9728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:49.447639', 'step': 9728, 'epoch': 1}
{'type': 'loss', 'content': 0.04455924406647682, 'timestamp': '2025-10-02 00:27:49.449885', 'step': 9729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:49.504493', 'step': 9729, 'epoch': 1}
{'type': 'loss', 'content': 0.09783623367547989, 'timestamp': '2025-10-02 00:27:49.510322', 'step': 9730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:27:49.577698', 'step': 9730, 'epoch': 1}
{'type': 'loss', 'content': 0.059695396572351456, 'timestamp': '2025-10-02 00:27:49.589662', 'step': 9731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:27:49.652824', 'step': 9731, 'epoch': 1}
{'type': 'loss', 'content': 0.02047950029373169, 'timestamp': '2025-10-02 00:27:49.664296', 'step': 9732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:27:49.719121', 'step': 9732, 'epoch': 1}
{'type': 'loss', 'content': 0.11139803379774094, 'timestamp': '2025-10-02 00:27:49.721583', 'step': 9733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:49.776203', 'step': 9733, 'epoch': 1}
{'type': 'loss', 'content': 0.15889689326286316, 'timestamp': '2025-10-02 00:27:49.778482', 'step': 9734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:27:49.833900', 'step': 9734, 'epoch': 1}
{'type': 'loss', 'content': 0.07717861980199814, 'timestamp': '2025-10-02 00:27:49.836333', 'step': 9735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:49.894302', 'step': 9735, 'epoch': 1}
{'type': 'loss', 'content': 0.035014353692531586, 'timestamp': '2025-10-02 00:27:49.901685', 'step': 9736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:49.956771', 'step': 9736, 'epoch': 1}
{'type': 'loss', 'content': 0.20590274035930634, 'timestamp': '2025-10-02 00:27:49.960226', 'step': 9737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:50.016882', 'step': 9737, 'epoch': 1}
{'type': 'loss', 'content': 0.04157354682683945, 'timestamp': '2025-10-02 00:27:50.020058', 'step': 9738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:50.077048', 'step': 9738, 'epoch': 1}
{'type': 'loss', 'content': 0.041893843561410904, 'timestamp': '2025-10-02 00:27:50.082883', 'step': 9739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:27:50.138033', 'step': 9739, 'epoch': 1}
{'type': 'loss', 'content': 0.12306962162256241, 'timestamp': '2025-10-02 00:27:50.144602', 'step': 9740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:27:50.199310', 'step': 9740, 'epoch': 1}
{'type': 'loss', 'content': 0.035605091601610184, 'timestamp': '2025-10-02 00:27:50.205086', 'step': 9741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:27:50.260331', 'step': 9741, 'epoch': 1}
{'type': 'loss', 'content': 0.08808331191539764, 'timestamp': '2025-10-02 00:27:50.263869', 'step': 9742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:50.319710', 'step': 9742, 'epoch': 1}
{'type': 'loss', 'content': 0.022231820970773697, 'timestamp': '2025-10-02 00:27:50.329042', 'step': 9743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:27:50.385856', 'step': 9743, 'epoch': 1}
{'type': 'loss', 'content': 0.052453603595495224, 'timestamp': '2025-10-02 00:27:50.396002', 'step': 9744, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:28:17.595977', 'step': 9744, 'epoch': 1}
{'type': 'pplx', 'content': 103.48183126004407, 'timestamp': '2025-10-02 00:28:17.602360', 'step': 9744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:17.667867', 'step': 9744, 'epoch': 1}
{'type': 'loss', 'content': 0.02527984417974949, 'timestamp': '2025-10-02 00:28:17.675050', 'step': 9745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:17.743140', 'step': 9745, 'epoch': 1}
{'type': 'loss', 'content': 0.14571413397789001, 'timestamp': '2025-10-02 00:28:17.746181', 'step': 9746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:28:17.810123', 'step': 9746, 'epoch': 1}
{'type': 'loss', 'content': 0.049766235053539276, 'timestamp': '2025-10-02 00:28:17.820786', 'step': 9747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:17.876049', 'step': 9747, 'epoch': 1}
{'type': 'loss', 'content': 0.09446577727794647, 'timestamp': '2025-10-02 00:28:17.883456', 'step': 9748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:17.940784', 'step': 9748, 'epoch': 1}
{'type': 'loss', 'content': 0.06323430687189102, 'timestamp': '2025-10-02 00:28:17.946384', 'step': 9749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:18.010078', 'step': 9749, 'epoch': 1}
{'type': 'loss', 'content': 0.011729264631867409, 'timestamp': '2025-10-02 00:28:18.020223', 'step': 9750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:18.074932', 'step': 9750, 'epoch': 1}
{'type': 'loss', 'content': 0.07199929654598236, 'timestamp': '2025-10-02 00:28:18.077438', 'step': 9751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:18.131438', 'step': 9751, 'epoch': 1}
{'type': 'loss', 'content': 0.0940292477607727, 'timestamp': '2025-10-02 00:28:18.137415', 'step': 9752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:18.194768', 'step': 9752, 'epoch': 1}
{'type': 'loss', 'content': 0.08277816325426102, 'timestamp': '2025-10-02 00:28:18.198957', 'step': 9753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:18.293593', 'step': 9753, 'epoch': 1}
{'type': 'loss', 'content': 0.01572585664689541, 'timestamp': '2025-10-02 00:28:18.304077', 'step': 9754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:28:18.390799', 'step': 9754, 'epoch': 1}
{'type': 'loss', 'content': 0.012825621291995049, 'timestamp': '2025-10-02 00:28:18.403253', 'step': 9755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:18.474661', 'step': 9755, 'epoch': 1}
{'type': 'loss', 'content': 0.017104391008615494, 'timestamp': '2025-10-02 00:28:18.483473', 'step': 9756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:18.556178', 'step': 9756, 'epoch': 1}
{'type': 'loss', 'content': 0.07339484244585037, 'timestamp': '2025-10-02 00:28:18.572291', 'step': 9757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:18.666097', 'step': 9757, 'epoch': 1}
{'type': 'loss', 'content': 0.15551425516605377, 'timestamp': '2025-10-02 00:28:18.672017', 'step': 9758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:18.734302', 'step': 9758, 'epoch': 1}
{'type': 'loss', 'content': 0.13306991755962372, 'timestamp': '2025-10-02 00:28:18.739865', 'step': 9759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:18.811299', 'step': 9759, 'epoch': 1}
{'type': 'loss', 'content': 0.14100155234336853, 'timestamp': '2025-10-02 00:28:18.819533', 'step': 9760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:18.889607', 'step': 9760, 'epoch': 1}
{'type': 'loss', 'content': 0.048510678112506866, 'timestamp': '2025-10-02 00:28:18.893354', 'step': 9761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:28:18.962783', 'step': 9761, 'epoch': 1}
{'type': 'loss', 'content': 0.0307645071297884, 'timestamp': '2025-10-02 00:28:18.973441', 'step': 9762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:19.041662', 'step': 9762, 'epoch': 1}
{'type': 'loss', 'content': 0.06231268495321274, 'timestamp': '2025-10-02 00:28:19.052192', 'step': 9763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:19.129261', 'step': 9763, 'epoch': 1}
{'type': 'loss', 'content': 0.05148031935095787, 'timestamp': '2025-10-02 00:28:19.150404', 'step': 9764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:19.231607', 'step': 9764, 'epoch': 1}
{'type': 'loss', 'content': 0.061294835060834885, 'timestamp': '2025-10-02 00:28:19.242625', 'step': 9765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:19.318758', 'step': 9765, 'epoch': 1}
{'type': 'loss', 'content': 0.1044326201081276, 'timestamp': '2025-10-02 00:28:19.324737', 'step': 9766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:19.404552', 'step': 9766, 'epoch': 1}
{'type': 'loss', 'content': 0.11723917722702026, 'timestamp': '2025-10-02 00:28:19.409094', 'step': 9767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:19.502664', 'step': 9767, 'epoch': 1}
{'type': 'loss', 'content': 0.037090398371219635, 'timestamp': '2025-10-02 00:28:19.510936', 'step': 9768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:19.627874', 'step': 9768, 'epoch': 1}
{'type': 'loss', 'content': 0.06364569067955017, 'timestamp': '2025-10-02 00:28:19.645827', 'step': 9769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:19.731878', 'step': 9769, 'epoch': 1}
{'type': 'loss', 'content': 0.04580371826887131, 'timestamp': '2025-10-02 00:28:19.748898', 'step': 9770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:19.830059', 'step': 9770, 'epoch': 1}
{'type': 'loss', 'content': 0.08690876513719559, 'timestamp': '2025-10-02 00:28:19.833643', 'step': 9771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:19.895396', 'step': 9771, 'epoch': 1}
{'type': 'loss', 'content': 0.03700153902173042, 'timestamp': '2025-10-02 00:28:19.903668', 'step': 9772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:19.966501', 'step': 9772, 'epoch': 1}
{'type': 'loss', 'content': 0.07805328071117401, 'timestamp': '2025-10-02 00:28:19.971283', 'step': 9773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:20.045961', 'step': 9773, 'epoch': 1}
{'type': 'loss', 'content': 0.06656041741371155, 'timestamp': '2025-10-02 00:28:20.051164', 'step': 9774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:20.112137', 'step': 9774, 'epoch': 1}
{'type': 'loss', 'content': 0.1219465360045433, 'timestamp': '2025-10-02 00:28:20.117937', 'step': 9775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:20.197203', 'step': 9775, 'epoch': 1}
{'type': 'loss', 'content': 0.06884251534938812, 'timestamp': '2025-10-02 00:28:20.217453', 'step': 9776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:20.291787', 'step': 9776, 'epoch': 1}
{'type': 'loss', 'content': 0.1121414452791214, 'timestamp': '2025-10-02 00:28:20.297433', 'step': 9777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:20.358495', 'step': 9777, 'epoch': 1}
{'type': 'loss', 'content': 0.1690375804901123, 'timestamp': '2025-10-02 00:28:20.378612', 'step': 9778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:20.468365', 'step': 9778, 'epoch': 1}
{'type': 'loss', 'content': 0.058753687888383865, 'timestamp': '2025-10-02 00:28:20.482866', 'step': 9779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:20.584662', 'step': 9779, 'epoch': 1}
{'type': 'loss', 'content': 0.1256425380706787, 'timestamp': '2025-10-02 00:28:20.592529', 'step': 9780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:20.700119', 'step': 9780, 'epoch': 1}
{'type': 'loss', 'content': 0.09880489110946655, 'timestamp': '2025-10-02 00:28:20.705474', 'step': 9781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:20.781686', 'step': 9781, 'epoch': 1}
{'type': 'loss', 'content': 0.044720642268657684, 'timestamp': '2025-10-02 00:28:20.786826', 'step': 9782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:20.848064', 'step': 9782, 'epoch': 1}
{'type': 'loss', 'content': 0.03851023688912392, 'timestamp': '2025-10-02 00:28:20.867999', 'step': 9783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:20.952288', 'step': 9783, 'epoch': 1}
{'type': 'loss', 'content': 0.07057662308216095, 'timestamp': '2025-10-02 00:28:20.960057', 'step': 9784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:21.036679', 'step': 9784, 'epoch': 1}
{'type': 'loss', 'content': 0.06411320716142654, 'timestamp': '2025-10-02 00:28:21.044131', 'step': 9785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:21.106028', 'step': 9785, 'epoch': 1}
{'type': 'loss', 'content': 0.07003423571586609, 'timestamp': '2025-10-02 00:28:21.110628', 'step': 9786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:21.182715', 'step': 9786, 'epoch': 1}
{'type': 'loss', 'content': 0.16677413880825043, 'timestamp': '2025-10-02 00:28:21.198689', 'step': 9787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:21.272017', 'step': 9787, 'epoch': 1}
{'type': 'loss', 'content': 0.06332574039697647, 'timestamp': '2025-10-02 00:28:21.280109', 'step': 9788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:21.366110', 'step': 9788, 'epoch': 1}
{'type': 'loss', 'content': 0.00772523321211338, 'timestamp': '2025-10-02 00:28:21.380645', 'step': 9789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:21.463915', 'step': 9789, 'epoch': 1}
{'type': 'loss', 'content': 0.05554720386862755, 'timestamp': '2025-10-02 00:28:21.478984', 'step': 9790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:21.565763', 'step': 9790, 'epoch': 1}
{'type': 'loss', 'content': 0.13221560418605804, 'timestamp': '2025-10-02 00:28:21.570069', 'step': 9791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:21.641257', 'step': 9791, 'epoch': 1}
{'type': 'loss', 'content': 0.06516461819410324, 'timestamp': '2025-10-02 00:28:21.648783', 'step': 9792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:21.712920', 'step': 9792, 'epoch': 1}
{'type': 'loss', 'content': 0.13231319189071655, 'timestamp': '2025-10-02 00:28:21.716255', 'step': 9793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:21.788605', 'step': 9793, 'epoch': 1}
{'type': 'loss', 'content': 0.041117846965789795, 'timestamp': '2025-10-02 00:28:21.791828', 'step': 9794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:21.878887', 'step': 9794, 'epoch': 1}
{'type': 'loss', 'content': 0.035674694925546646, 'timestamp': '2025-10-02 00:28:21.889033', 'step': 9795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:21.964205', 'step': 9795, 'epoch': 1}
{'type': 'loss', 'content': 0.08349550515413284, 'timestamp': '2025-10-02 00:28:21.985679', 'step': 9796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:28:22.078083', 'step': 9796, 'epoch': 1}
{'type': 'loss', 'content': 0.03181978687644005, 'timestamp': '2025-10-02 00:28:22.089645', 'step': 9797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:22.163333', 'step': 9797, 'epoch': 1}
{'type': 'loss', 'content': 0.0734897255897522, 'timestamp': '2025-10-02 00:28:22.167522', 'step': 9798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:28:22.271745', 'step': 9798, 'epoch': 1}
{'type': 'loss', 'content': 0.06218097358942032, 'timestamp': '2025-10-02 00:28:22.284005', 'step': 9799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:22.345755', 'step': 9799, 'epoch': 1}
{'type': 'loss', 'content': 0.10372152924537659, 'timestamp': '2025-10-02 00:28:22.353298', 'step': 9800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:22.443664', 'step': 9800, 'epoch': 1}
{'type': 'loss', 'content': 0.04128347337245941, 'timestamp': '2025-10-02 00:28:22.447343', 'step': 9801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:22.516045', 'step': 9801, 'epoch': 1}
{'type': 'loss', 'content': 0.10716517269611359, 'timestamp': '2025-10-02 00:28:22.531014', 'step': 9802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:22.641270', 'step': 9802, 'epoch': 1}
{'type': 'loss', 'content': 0.2687418460845947, 'timestamp': '2025-10-02 00:28:22.658665', 'step': 9803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:22.721512', 'step': 9803, 'epoch': 1}
{'type': 'loss', 'content': 0.17750690877437592, 'timestamp': '2025-10-02 00:28:22.732311', 'step': 9804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:22.838161', 'step': 9804, 'epoch': 1}
{'type': 'loss', 'content': 0.18221473693847656, 'timestamp': '2025-10-02 00:28:22.842332', 'step': 9805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:22.944952', 'step': 9805, 'epoch': 1}
{'type': 'loss', 'content': 0.05928153544664383, 'timestamp': '2025-10-02 00:28:22.961385', 'step': 9806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:23.060670', 'step': 9806, 'epoch': 1}
{'type': 'loss', 'content': 0.0536002553999424, 'timestamp': '2025-10-02 00:28:23.064812', 'step': 9807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:23.145855', 'step': 9807, 'epoch': 1}
{'type': 'loss', 'content': 0.013063955120742321, 'timestamp': '2025-10-02 00:28:23.156162', 'step': 9808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:23.216801', 'step': 9808, 'epoch': 1}
{'type': 'loss', 'content': 0.18143649399280548, 'timestamp': '2025-10-02 00:28:23.220556', 'step': 9809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:23.285415', 'step': 9809, 'epoch': 1}
{'type': 'loss', 'content': 0.09319660067558289, 'timestamp': '2025-10-02 00:28:23.292629', 'step': 9810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:23.367828', 'step': 9810, 'epoch': 1}
{'type': 'loss', 'content': 0.1127660796046257, 'timestamp': '2025-10-02 00:28:23.372416', 'step': 9811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:23.462348', 'step': 9811, 'epoch': 1}
{'type': 'loss', 'content': 0.04481145739555359, 'timestamp': '2025-10-02 00:28:23.473554', 'step': 9812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:23.531157', 'step': 9812, 'epoch': 1}
{'type': 'loss', 'content': 0.04324500262737274, 'timestamp': '2025-10-02 00:28:23.535085', 'step': 9813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:23.595599', 'step': 9813, 'epoch': 1}
{'type': 'loss', 'content': 0.08189383894205093, 'timestamp': '2025-10-02 00:28:23.602578', 'step': 9814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:23.673897', 'step': 9814, 'epoch': 1}
{'type': 'loss', 'content': 0.03939563408493996, 'timestamp': '2025-10-02 00:28:23.688387', 'step': 9815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:23.769040', 'step': 9815, 'epoch': 1}
{'type': 'loss', 'content': 0.22513040900230408, 'timestamp': '2025-10-02 00:28:23.778021', 'step': 9816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:23.850027', 'step': 9816, 'epoch': 1}
{'type': 'loss', 'content': 0.05471910536289215, 'timestamp': '2025-10-02 00:28:23.855872', 'step': 9817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:23.939401', 'step': 9817, 'epoch': 1}
{'type': 'loss', 'content': 0.05779192969202995, 'timestamp': '2025-10-02 00:28:23.953521', 'step': 9818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:24.031201', 'step': 9818, 'epoch': 1}
{'type': 'loss', 'content': 0.04655363783240318, 'timestamp': '2025-10-02 00:28:24.035595', 'step': 9819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:24.094497', 'step': 9819, 'epoch': 1}
{'type': 'loss', 'content': 0.15870268642902374, 'timestamp': '2025-10-02 00:28:24.101502', 'step': 9820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:24.162636', 'step': 9820, 'epoch': 1}
{'type': 'loss', 'content': 0.10307440906763077, 'timestamp': '2025-10-02 00:28:24.165847', 'step': 9821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:24.226305', 'step': 9821, 'epoch': 1}
{'type': 'loss', 'content': 0.024707060307264328, 'timestamp': '2025-10-02 00:28:24.235697', 'step': 9822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:24.298097', 'step': 9822, 'epoch': 1}
{'type': 'loss', 'content': 0.05848328769207001, 'timestamp': '2025-10-02 00:28:24.303381', 'step': 9823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:24.375638', 'step': 9823, 'epoch': 1}
{'type': 'loss', 'content': 0.06918758898973465, 'timestamp': '2025-10-02 00:28:24.383874', 'step': 9824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:24.467726', 'step': 9824, 'epoch': 1}
{'type': 'loss', 'content': 0.10778020322322845, 'timestamp': '2025-10-02 00:28:24.482038', 'step': 9825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:24.552635', 'step': 9825, 'epoch': 1}
{'type': 'loss', 'content': 0.16620445251464844, 'timestamp': '2025-10-02 00:28:24.556911', 'step': 9826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:24.638286', 'step': 9826, 'epoch': 1}
{'type': 'loss', 'content': 0.05917296186089516, 'timestamp': '2025-10-02 00:28:24.642140', 'step': 9827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:24.704373', 'step': 9827, 'epoch': 1}
{'type': 'loss', 'content': 0.07960468530654907, 'timestamp': '2025-10-02 00:28:24.711954', 'step': 9828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:24.771775', 'step': 9828, 'epoch': 1}
{'type': 'loss', 'content': 0.15000741183757782, 'timestamp': '2025-10-02 00:28:24.774941', 'step': 9829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:24.834158', 'step': 9829, 'epoch': 1}
{'type': 'loss', 'content': 0.08930148929357529, 'timestamp': '2025-10-02 00:28:24.837345', 'step': 9830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:24.899608', 'step': 9830, 'epoch': 1}
{'type': 'loss', 'content': 0.01821150816977024, 'timestamp': '2025-10-02 00:28:24.909160', 'step': 9831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:24.968323', 'step': 9831, 'epoch': 1}
{'type': 'loss', 'content': 0.12563456594944, 'timestamp': '2025-10-02 00:28:24.976386', 'step': 9832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:25.040961', 'step': 9832, 'epoch': 1}
{'type': 'loss', 'content': 0.07121409475803375, 'timestamp': '2025-10-02 00:28:25.051979', 'step': 9833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:25.112787', 'step': 9833, 'epoch': 1}
{'type': 'loss', 'content': 0.06144709140062332, 'timestamp': '2025-10-02 00:28:25.116601', 'step': 9834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:25.191413', 'step': 9834, 'epoch': 1}
{'type': 'loss', 'content': 0.03991850093007088, 'timestamp': '2025-10-02 00:28:25.198837', 'step': 9835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:25.271861', 'step': 9835, 'epoch': 1}
{'type': 'loss', 'content': 0.060430824756622314, 'timestamp': '2025-10-02 00:28:25.279751', 'step': 9836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:25.353701', 'step': 9836, 'epoch': 1}
{'type': 'loss', 'content': 0.22460101544857025, 'timestamp': '2025-10-02 00:28:25.359550', 'step': 9837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:25.419978', 'step': 9837, 'epoch': 1}
{'type': 'loss', 'content': 0.1385861337184906, 'timestamp': '2025-10-02 00:28:25.424327', 'step': 9838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:25.483925', 'step': 9838, 'epoch': 1}
{'type': 'loss', 'content': 0.03878448158502579, 'timestamp': '2025-10-02 00:28:25.487168', 'step': 9839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:25.559794', 'step': 9839, 'epoch': 1}
{'type': 'loss', 'content': 0.07923606783151627, 'timestamp': '2025-10-02 00:28:25.569959', 'step': 9840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:25.640012', 'step': 9840, 'epoch': 1}
{'type': 'loss', 'content': 0.07438100874423981, 'timestamp': '2025-10-02 00:28:25.647484', 'step': 9841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:25.708629', 'step': 9841, 'epoch': 1}
{'type': 'loss', 'content': 0.17727942764759064, 'timestamp': '2025-10-02 00:28:25.712555', 'step': 9842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:25.772016', 'step': 9842, 'epoch': 1}
{'type': 'loss', 'content': 0.044233016669750214, 'timestamp': '2025-10-02 00:28:25.781405', 'step': 9843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:25.852385', 'step': 9843, 'epoch': 1}
{'type': 'loss', 'content': 0.10410701483488083, 'timestamp': '2025-10-02 00:28:25.859213', 'step': 9844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:25.929016', 'step': 9844, 'epoch': 1}
{'type': 'loss', 'content': 0.14423085749149323, 'timestamp': '2025-10-02 00:28:25.934464', 'step': 9845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:25.996808', 'step': 9845, 'epoch': 1}
{'type': 'loss', 'content': 0.16991662979125977, 'timestamp': '2025-10-02 00:28:26.002352', 'step': 9846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:26.064104', 'step': 9846, 'epoch': 1}
{'type': 'loss', 'content': 0.10801491886377335, 'timestamp': '2025-10-02 00:28:26.068179', 'step': 9847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:26.148201', 'step': 9847, 'epoch': 1}
{'type': 'loss', 'content': 0.16329391300678253, 'timestamp': '2025-10-02 00:28:26.155134', 'step': 9848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:26.229366', 'step': 9848, 'epoch': 1}
{'type': 'loss', 'content': 0.046090830117464066, 'timestamp': '2025-10-02 00:28:26.232545', 'step': 9849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:26.290147', 'step': 9849, 'epoch': 1}
{'type': 'loss', 'content': 0.1408204287290573, 'timestamp': '2025-10-02 00:28:26.293479', 'step': 9850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:26.351431', 'step': 9850, 'epoch': 1}
{'type': 'loss', 'content': 0.030390029773116112, 'timestamp': '2025-10-02 00:28:26.360728', 'step': 9851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:26.419220', 'step': 9851, 'epoch': 1}
{'type': 'loss', 'content': 0.07928002625703812, 'timestamp': '2025-10-02 00:28:26.427059', 'step': 9852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:28:26.499008', 'step': 9852, 'epoch': 1}
{'type': 'loss', 'content': 0.02841212972998619, 'timestamp': '2025-10-02 00:28:26.512574', 'step': 9853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:26.574401', 'step': 9853, 'epoch': 1}
{'type': 'loss', 'content': 0.039417386054992676, 'timestamp': '2025-10-02 00:28:26.581786', 'step': 9854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:26.641943', 'step': 9854, 'epoch': 1}
{'type': 'loss', 'content': 0.036177124828100204, 'timestamp': '2025-10-02 00:28:26.646565', 'step': 9855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:28:26.732599', 'step': 9855, 'epoch': 1}
{'type': 'loss', 'content': 0.16058801114559174, 'timestamp': '2025-10-02 00:28:26.740458', 'step': 9856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:26.826341', 'step': 9856, 'epoch': 1}
{'type': 'loss', 'content': 0.10311070084571838, 'timestamp': '2025-10-02 00:28:26.842751', 'step': 9857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:26.925597', 'step': 9857, 'epoch': 1}
{'type': 'loss', 'content': 0.021255262196063995, 'timestamp': '2025-10-02 00:28:26.936058', 'step': 9858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:26.998053', 'step': 9858, 'epoch': 1}
{'type': 'loss', 'content': 0.040873002260923386, 'timestamp': '2025-10-02 00:28:27.002111', 'step': 9859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:27.060881', 'step': 9859, 'epoch': 1}
{'type': 'loss', 'content': 0.11070291697978973, 'timestamp': '2025-10-02 00:28:27.068330', 'step': 9860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:27.132121', 'step': 9860, 'epoch': 1}
{'type': 'loss', 'content': 0.0351174995303154, 'timestamp': '2025-10-02 00:28:27.136960', 'step': 9861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:27.228982', 'step': 9861, 'epoch': 1}
{'type': 'loss', 'content': 0.05186393857002258, 'timestamp': '2025-10-02 00:28:27.233504', 'step': 9862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:27.307400', 'step': 9862, 'epoch': 1}
{'type': 'loss', 'content': 0.1270444244146347, 'timestamp': '2025-10-02 00:28:27.310916', 'step': 9863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:27.371013', 'step': 9863, 'epoch': 1}
{'type': 'loss', 'content': 0.04364088550209999, 'timestamp': '2025-10-02 00:28:27.388171', 'step': 9864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:28:27.457644', 'step': 9864, 'epoch': 1}
{'type': 'loss', 'content': 0.016170969232916832, 'timestamp': '2025-10-02 00:28:27.469356', 'step': 9865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:27.550006', 'step': 9865, 'epoch': 1}
{'type': 'loss', 'content': 0.19162333011627197, 'timestamp': '2025-10-02 00:28:27.553508', 'step': 9866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:27.636945', 'step': 9866, 'epoch': 1}
{'type': 'loss', 'content': 0.13562817871570587, 'timestamp': '2025-10-02 00:28:27.641517', 'step': 9867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:27.714777', 'step': 9867, 'epoch': 1}
{'type': 'loss', 'content': 0.2278476357460022, 'timestamp': '2025-10-02 00:28:27.722418', 'step': 9868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:27.791388', 'step': 9868, 'epoch': 1}
{'type': 'loss', 'content': 0.11369027197360992, 'timestamp': '2025-10-02 00:28:27.795530', 'step': 9869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:27.866798', 'step': 9869, 'epoch': 1}
{'type': 'loss', 'content': 0.055492568761110306, 'timestamp': '2025-10-02 00:28:27.876081', 'step': 9870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:27.938396', 'step': 9870, 'epoch': 1}
{'type': 'loss', 'content': 0.11048734933137894, 'timestamp': '2025-10-02 00:28:27.952469', 'step': 9871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:28.017820', 'step': 9871, 'epoch': 1}
{'type': 'loss', 'content': 0.0221016276627779, 'timestamp': '2025-10-02 00:28:28.036632', 'step': 9872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:28.106817', 'step': 9872, 'epoch': 1}
{'type': 'loss', 'content': 0.13408556580543518, 'timestamp': '2025-10-02 00:28:28.118683', 'step': 9873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:28.199793', 'step': 9873, 'epoch': 1}
{'type': 'loss', 'content': 0.08092363178730011, 'timestamp': '2025-10-02 00:28:28.203867', 'step': 9874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:28.260422', 'step': 9874, 'epoch': 1}
{'type': 'loss', 'content': 0.07126770168542862, 'timestamp': '2025-10-02 00:28:28.271698', 'step': 9875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:28.330882', 'step': 9875, 'epoch': 1}
{'type': 'loss', 'content': 0.13861480355262756, 'timestamp': '2025-10-02 00:28:28.345417', 'step': 9876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:28.403900', 'step': 9876, 'epoch': 1}
{'type': 'loss', 'content': 0.15235918760299683, 'timestamp': '2025-10-02 00:28:28.408366', 'step': 9877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:28.473406', 'step': 9877, 'epoch': 1}
{'type': 'loss', 'content': 0.12392124533653259, 'timestamp': '2025-10-02 00:28:28.484465', 'step': 9878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:28.559491', 'step': 9878, 'epoch': 1}
{'type': 'loss', 'content': 0.29907846450805664, 'timestamp': '2025-10-02 00:28:28.571588', 'step': 9879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:28.637597', 'step': 9879, 'epoch': 1}
{'type': 'loss', 'content': 0.04503604397177696, 'timestamp': '2025-10-02 00:28:28.644589', 'step': 9880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:28.727077', 'step': 9880, 'epoch': 1}
{'type': 'loss', 'content': 0.07676670700311661, 'timestamp': '2025-10-02 00:28:28.736709', 'step': 9881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:28.803553', 'step': 9881, 'epoch': 1}
{'type': 'loss', 'content': 0.04310781881213188, 'timestamp': '2025-10-02 00:28:28.813122', 'step': 9882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:28.882662', 'step': 9882, 'epoch': 1}
{'type': 'loss', 'content': 0.06339578330516815, 'timestamp': '2025-10-02 00:28:28.892875', 'step': 9883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:28.951716', 'step': 9883, 'epoch': 1}
{'type': 'loss', 'content': 0.04641835018992424, 'timestamp': '2025-10-02 00:28:28.958216', 'step': 9884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:29.023163', 'step': 9884, 'epoch': 1}
{'type': 'loss', 'content': 0.028021978214383125, 'timestamp': '2025-10-02 00:28:29.026208', 'step': 9885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 00:28:29.128983', 'step': 9885, 'epoch': 1}
{'type': 'loss', 'content': 0.03628066927194595, 'timestamp': '2025-10-02 00:28:29.145388', 'step': 9886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:29.208846', 'step': 9886, 'epoch': 1}
{'type': 'loss', 'content': 0.13261641561985016, 'timestamp': '2025-10-02 00:28:29.212366', 'step': 9887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:29.283083', 'step': 9887, 'epoch': 1}
{'type': 'loss', 'content': 0.0720973089337349, 'timestamp': '2025-10-02 00:28:29.289872', 'step': 9888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:29.351874', 'step': 9888, 'epoch': 1}
{'type': 'loss', 'content': 0.026295693591237068, 'timestamp': '2025-10-02 00:28:29.362114', 'step': 9889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:29.426140', 'step': 9889, 'epoch': 1}
{'type': 'loss', 'content': 0.04199398308992386, 'timestamp': '2025-10-02 00:28:29.436325', 'step': 9890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:29.496378', 'step': 9890, 'epoch': 1}
{'type': 'loss', 'content': 0.2133336216211319, 'timestamp': '2025-10-02 00:28:29.499847', 'step': 9891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:29.564259', 'step': 9891, 'epoch': 1}
{'type': 'loss', 'content': 0.05541028827428818, 'timestamp': '2025-10-02 00:28:29.571350', 'step': 9892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:29.627274', 'step': 9892, 'epoch': 1}
{'type': 'loss', 'content': 0.10304208099842072, 'timestamp': '2025-10-02 00:28:29.630707', 'step': 9893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:29.687745', 'step': 9893, 'epoch': 1}
{'type': 'loss', 'content': 0.17517638206481934, 'timestamp': '2025-10-02 00:28:29.697435', 'step': 9894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:29.758996', 'step': 9894, 'epoch': 1}
{'type': 'loss', 'content': 0.04614140838384628, 'timestamp': '2025-10-02 00:28:29.762420', 'step': 9895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:29.822598', 'step': 9895, 'epoch': 1}
{'type': 'loss', 'content': 0.06387631595134735, 'timestamp': '2025-10-02 00:28:29.829720', 'step': 9896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:29.905751', 'step': 9896, 'epoch': 1}
{'type': 'loss', 'content': 0.14691756665706635, 'timestamp': '2025-10-02 00:28:29.916278', 'step': 9897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:29.973589', 'step': 9897, 'epoch': 1}
{'type': 'loss', 'content': 0.13035957515239716, 'timestamp': '2025-10-02 00:28:29.976432', 'step': 9898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:30.041200', 'step': 9898, 'epoch': 1}
{'type': 'loss', 'content': 0.09825950115919113, 'timestamp': '2025-10-02 00:28:30.046493', 'step': 9899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:30.110143', 'step': 9899, 'epoch': 1}
{'type': 'loss', 'content': 0.03435629606246948, 'timestamp': '2025-10-02 00:28:30.122949', 'step': 9900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:30.178133', 'step': 9900, 'epoch': 1}
{'type': 'loss', 'content': 0.10759514570236206, 'timestamp': '2025-10-02 00:28:30.180924', 'step': 9901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:30.237652', 'step': 9901, 'epoch': 1}
{'type': 'loss', 'content': 0.07608743757009506, 'timestamp': '2025-10-02 00:28:30.244658', 'step': 9902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:30.303426', 'step': 9902, 'epoch': 1}
{'type': 'loss', 'content': 0.08498530089855194, 'timestamp': '2025-10-02 00:28:30.313010', 'step': 9903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:30.369497', 'step': 9903, 'epoch': 1}
{'type': 'loss', 'content': 0.01887945830821991, 'timestamp': '2025-10-02 00:28:30.380238', 'step': 9904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:30.451268', 'step': 9904, 'epoch': 1}
{'type': 'loss', 'content': 0.0994928702712059, 'timestamp': '2025-10-02 00:28:30.457702', 'step': 9905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:30.524026', 'step': 9905, 'epoch': 1}
{'type': 'loss', 'content': 0.03858419135212898, 'timestamp': '2025-10-02 00:28:30.526791', 'step': 9906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:30.594290', 'step': 9906, 'epoch': 1}
{'type': 'loss', 'content': 0.11377213895320892, 'timestamp': '2025-10-02 00:28:30.600892', 'step': 9907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:30.667269', 'step': 9907, 'epoch': 1}
{'type': 'loss', 'content': 0.09761172533035278, 'timestamp': '2025-10-02 00:28:30.677834', 'step': 9908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:30.747836', 'step': 9908, 'epoch': 1}
{'type': 'loss', 'content': 0.012298930436372757, 'timestamp': '2025-10-02 00:28:30.757289', 'step': 9909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:30.829819', 'step': 9909, 'epoch': 1}
{'type': 'loss', 'content': 0.10982245951890945, 'timestamp': '2025-10-02 00:28:30.836467', 'step': 9910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:30.901155', 'step': 9910, 'epoch': 1}
{'type': 'loss', 'content': 0.05312477424740791, 'timestamp': '2025-10-02 00:28:30.908106', 'step': 9911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:30.982288', 'step': 9911, 'epoch': 1}
{'type': 'loss', 'content': 0.014229382388293743, 'timestamp': '2025-10-02 00:28:30.993050', 'step': 9912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:28:31.076519', 'step': 9912, 'epoch': 1}
{'type': 'loss', 'content': 0.06140214949846268, 'timestamp': '2025-10-02 00:28:31.088297', 'step': 9913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:31.178544', 'step': 9913, 'epoch': 1}
{'type': 'loss', 'content': 0.00912502408027649, 'timestamp': '2025-10-02 00:28:31.184690', 'step': 9914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:31.281755', 'step': 9914, 'epoch': 1}
{'type': 'loss', 'content': 0.043651942163705826, 'timestamp': '2025-10-02 00:28:31.291228', 'step': 9915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:31.361197', 'step': 9915, 'epoch': 1}
{'type': 'loss', 'content': 0.07197858393192291, 'timestamp': '2025-10-02 00:28:31.369237', 'step': 9916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:31.430221', 'step': 9916, 'epoch': 1}
{'type': 'loss', 'content': 0.13069358468055725, 'timestamp': '2025-10-02 00:28:31.433479', 'step': 9917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:31.502831', 'step': 9917, 'epoch': 1}
{'type': 'loss', 'content': 0.09481941163539886, 'timestamp': '2025-10-02 00:28:31.513809', 'step': 9918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:31.572796', 'step': 9918, 'epoch': 1}
{'type': 'loss', 'content': 0.0594777911901474, 'timestamp': '2025-10-02 00:28:31.582377', 'step': 9919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:31.639428', 'step': 9919, 'epoch': 1}
{'type': 'loss', 'content': 0.07247502356767654, 'timestamp': '2025-10-02 00:28:31.652066', 'step': 9920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:31.708405', 'step': 9920, 'epoch': 1}
{'type': 'loss', 'content': 0.05391700565814972, 'timestamp': '2025-10-02 00:28:31.715895', 'step': 9921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:31.791131', 'step': 9921, 'epoch': 1}
{'type': 'loss', 'content': 0.04065126180648804, 'timestamp': '2025-10-02 00:28:31.793559', 'step': 9922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:31.855621', 'step': 9922, 'epoch': 1}
{'type': 'loss', 'content': 0.09777220338582993, 'timestamp': '2025-10-02 00:28:31.858363', 'step': 9923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:31.922495', 'step': 9923, 'epoch': 1}
{'type': 'loss', 'content': 0.053054600954055786, 'timestamp': '2025-10-02 00:28:31.932860', 'step': 9924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:31.987894', 'step': 9924, 'epoch': 1}
{'type': 'loss', 'content': 0.03648360073566437, 'timestamp': '2025-10-02 00:28:31.995301', 'step': 9925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:32.062511', 'step': 9925, 'epoch': 1}
{'type': 'loss', 'content': 0.009782671928405762, 'timestamp': '2025-10-02 00:28:32.069887', 'step': 9926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:32.136615', 'step': 9926, 'epoch': 1}
{'type': 'loss', 'content': 0.13073605298995972, 'timestamp': '2025-10-02 00:28:32.142906', 'step': 9927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:32.203743', 'step': 9927, 'epoch': 1}
{'type': 'loss', 'content': 0.08894422650337219, 'timestamp': '2025-10-02 00:28:32.213286', 'step': 9928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:32.278251', 'step': 9928, 'epoch': 1}
{'type': 'loss', 'content': 0.09811452776193619, 'timestamp': '2025-10-02 00:28:32.284074', 'step': 9929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:32.339990', 'step': 9929, 'epoch': 1}
{'type': 'loss', 'content': 0.08675439655780792, 'timestamp': '2025-10-02 00:28:32.345880', 'step': 9930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:32.405498', 'step': 9930, 'epoch': 1}
{'type': 'loss', 'content': 0.12909652292728424, 'timestamp': '2025-10-02 00:28:32.413527', 'step': 9931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:32.470503', 'step': 9931, 'epoch': 1}
{'type': 'loss', 'content': 0.03704126924276352, 'timestamp': '2025-10-02 00:28:32.480603', 'step': 9932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:32.539060', 'step': 9932, 'epoch': 1}
{'type': 'loss', 'content': 0.054720744490623474, 'timestamp': '2025-10-02 00:28:32.543700', 'step': 9933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:32.613736', 'step': 9933, 'epoch': 1}
{'type': 'loss', 'content': 0.23570489883422852, 'timestamp': '2025-10-02 00:28:32.621981', 'step': 9934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:32.688061', 'step': 9934, 'epoch': 1}
{'type': 'loss', 'content': 0.012252052314579487, 'timestamp': '2025-10-02 00:28:32.691511', 'step': 9935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:32.751872', 'step': 9935, 'epoch': 1}
{'type': 'loss', 'content': 0.08670953661203384, 'timestamp': '2025-10-02 00:28:32.763633', 'step': 9936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:32.826102', 'step': 9936, 'epoch': 1}
{'type': 'loss', 'content': 0.09838663786649704, 'timestamp': '2025-10-02 00:28:32.835448', 'step': 9937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:32.907305', 'step': 9937, 'epoch': 1}
{'type': 'loss', 'content': 0.06525713950395584, 'timestamp': '2025-10-02 00:28:32.917833', 'step': 9938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:32.977413', 'step': 9938, 'epoch': 1}
{'type': 'loss', 'content': 0.06760047376155853, 'timestamp': '2025-10-02 00:28:32.984330', 'step': 9939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:33.051023', 'step': 9939, 'epoch': 1}
{'type': 'loss', 'content': 0.06675422936677933, 'timestamp': '2025-10-02 00:28:33.061201', 'step': 9940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:33.131083', 'step': 9940, 'epoch': 1}
{'type': 'loss', 'content': 0.035879991948604584, 'timestamp': '2025-10-02 00:28:33.136583', 'step': 9941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:33.204814', 'step': 9941, 'epoch': 1}
{'type': 'loss', 'content': 0.21946118772029877, 'timestamp': '2025-10-02 00:28:33.207350', 'step': 9942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:33.267893', 'step': 9942, 'epoch': 1}
{'type': 'loss', 'content': 0.07762803137302399, 'timestamp': '2025-10-02 00:28:33.271251', 'step': 9943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:28:33.339510', 'step': 9943, 'epoch': 1}
{'type': 'loss', 'content': 0.029985038563609123, 'timestamp': '2025-10-02 00:28:33.350946', 'step': 9944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:33.407701', 'step': 9944, 'epoch': 1}
{'type': 'loss', 'content': 0.06105891987681389, 'timestamp': '2025-10-02 00:28:33.414916', 'step': 9945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:28:33.484052', 'step': 9945, 'epoch': 1}
{'type': 'loss', 'content': 0.01055765151977539, 'timestamp': '2025-10-02 00:28:33.494744', 'step': 9946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:33.555774', 'step': 9946, 'epoch': 1}
{'type': 'loss', 'content': 0.04263642802834511, 'timestamp': '2025-10-02 00:28:33.564091', 'step': 9947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:33.634919', 'step': 9947, 'epoch': 1}
{'type': 'loss', 'content': 0.08658407628536224, 'timestamp': '2025-10-02 00:28:33.641713', 'step': 9948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:28:33.711847', 'step': 9948, 'epoch': 1}
{'type': 'loss', 'content': 0.053726647049188614, 'timestamp': '2025-10-02 00:28:33.723353', 'step': 9949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:33.783144', 'step': 9949, 'epoch': 1}
{'type': 'loss', 'content': 0.08048398792743683, 'timestamp': '2025-10-02 00:28:33.796745', 'step': 9950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:33.880754', 'step': 9950, 'epoch': 1}
{'type': 'loss', 'content': 0.05126484856009483, 'timestamp': '2025-10-02 00:28:33.891193', 'step': 9951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:33.947436', 'step': 9951, 'epoch': 1}
{'type': 'loss', 'content': 0.048365067690610886, 'timestamp': '2025-10-02 00:28:33.957384', 'step': 9952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:34.014855', 'step': 9952, 'epoch': 1}
{'type': 'loss', 'content': 0.1185302883386612, 'timestamp': '2025-10-02 00:28:34.023419', 'step': 9953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:34.094963', 'step': 9953, 'epoch': 1}
{'type': 'loss', 'content': 0.010773551650345325, 'timestamp': '2025-10-02 00:28:34.104383', 'step': 9954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:34.175307', 'step': 9954, 'epoch': 1}
{'type': 'loss', 'content': 0.1504524052143097, 'timestamp': '2025-10-02 00:28:34.177975', 'step': 9955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:34.238838', 'step': 9955, 'epoch': 1}
{'type': 'loss', 'content': 0.04193803295493126, 'timestamp': '2025-10-02 00:28:34.249812', 'step': 9956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:34.314977', 'step': 9956, 'epoch': 1}
{'type': 'loss', 'content': 0.2379041612148285, 'timestamp': '2025-10-02 00:28:34.317642', 'step': 9957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:34.372283', 'step': 9957, 'epoch': 1}
{'type': 'loss', 'content': 0.060304682701826096, 'timestamp': '2025-10-02 00:28:34.376053', 'step': 9958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:34.433388', 'step': 9958, 'epoch': 1}
{'type': 'loss', 'content': 0.08297114819288254, 'timestamp': '2025-10-02 00:28:34.438034', 'step': 9959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:34.498121', 'step': 9959, 'epoch': 1}
{'type': 'loss', 'content': 0.09419877082109451, 'timestamp': '2025-10-02 00:28:34.504770', 'step': 9960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:34.562465', 'step': 9960, 'epoch': 1}
{'type': 'loss', 'content': 0.09834876656532288, 'timestamp': '2025-10-02 00:28:34.567058', 'step': 9961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:34.627346', 'step': 9961, 'epoch': 1}
{'type': 'loss', 'content': 0.05868363752961159, 'timestamp': '2025-10-02 00:28:34.636897', 'step': 9962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:34.695059', 'step': 9962, 'epoch': 1}
{'type': 'loss', 'content': 0.15216603875160217, 'timestamp': '2025-10-02 00:28:34.704594', 'step': 9963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:34.764683', 'step': 9963, 'epoch': 1}
{'type': 'loss', 'content': 0.14627428352832794, 'timestamp': '2025-10-02 00:28:34.771037', 'step': 9964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:34.826729', 'step': 9964, 'epoch': 1}
{'type': 'loss', 'content': 0.03715114668011665, 'timestamp': '2025-10-02 00:28:34.832669', 'step': 9965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:34.889904', 'step': 9965, 'epoch': 1}
{'type': 'loss', 'content': 0.13079866766929626, 'timestamp': '2025-10-02 00:28:34.892722', 'step': 9966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:34.949846', 'step': 9966, 'epoch': 1}
{'type': 'loss', 'content': 0.041486386209726334, 'timestamp': '2025-10-02 00:28:34.952247', 'step': 9967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:35.008793', 'step': 9967, 'epoch': 1}
{'type': 'loss', 'content': 0.026452545076608658, 'timestamp': '2025-10-02 00:28:35.019168', 'step': 9968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:35.074042', 'step': 9968, 'epoch': 1}
{'type': 'loss', 'content': 0.11493822187185287, 'timestamp': '2025-10-02 00:28:35.076551', 'step': 9969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:35.135175', 'step': 9969, 'epoch': 1}
{'type': 'loss', 'content': 0.028933066874742508, 'timestamp': '2025-10-02 00:28:35.144661', 'step': 9970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:35.199893', 'step': 9970, 'epoch': 1}
{'type': 'loss', 'content': 0.12891638278961182, 'timestamp': '2025-10-02 00:28:35.202112', 'step': 9971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:28:35.264022', 'step': 9971, 'epoch': 1}
{'type': 'loss', 'content': 0.0424286425113678, 'timestamp': '2025-10-02 00:28:35.276788', 'step': 9972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:35.346641', 'step': 9972, 'epoch': 1}
{'type': 'loss', 'content': 0.13694153726100922, 'timestamp': '2025-10-02 00:28:35.349289', 'step': 9973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:35.405073', 'step': 9973, 'epoch': 1}
{'type': 'loss', 'content': 0.15279042720794678, 'timestamp': '2025-10-02 00:28:35.409835', 'step': 9974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:35.470593', 'step': 9974, 'epoch': 1}
{'type': 'loss', 'content': 0.04288465902209282, 'timestamp': '2025-10-02 00:28:35.477980', 'step': 9975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:35.534151', 'step': 9975, 'epoch': 1}
{'type': 'loss', 'content': 0.04855743795633316, 'timestamp': '2025-10-02 00:28:35.540379', 'step': 9976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:35.595007', 'step': 9976, 'epoch': 1}
{'type': 'loss', 'content': 0.029181743040680885, 'timestamp': '2025-10-02 00:28:35.597580', 'step': 9977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:35.653289', 'step': 9977, 'epoch': 1}
{'type': 'loss', 'content': 0.06705217063426971, 'timestamp': '2025-10-02 00:28:35.655796', 'step': 9978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:35.710137', 'step': 9978, 'epoch': 1}
{'type': 'loss', 'content': 0.05715519189834595, 'timestamp': '2025-10-02 00:28:35.712978', 'step': 9979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:35.771299', 'step': 9979, 'epoch': 1}
{'type': 'loss', 'content': 0.020037155598402023, 'timestamp': '2025-10-02 00:28:35.777249', 'step': 9980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:35.831282', 'step': 9980, 'epoch': 1}
{'type': 'loss', 'content': 0.11986099183559418, 'timestamp': '2025-10-02 00:28:35.834113', 'step': 9981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:35.889002', 'step': 9981, 'epoch': 1}
{'type': 'loss', 'content': 0.008168655447661877, 'timestamp': '2025-10-02 00:28:35.891213', 'step': 9982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:35.946431', 'step': 9982, 'epoch': 1}
{'type': 'loss', 'content': 0.053951531648635864, 'timestamp': '2025-10-02 00:28:35.955793', 'step': 9983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:36.010372', 'step': 9983, 'epoch': 1}
{'type': 'loss', 'content': 0.025760753080248833, 'timestamp': '2025-10-02 00:28:36.016761', 'step': 9984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:36.069831', 'step': 9984, 'epoch': 1}
{'type': 'loss', 'content': 0.17346671223640442, 'timestamp': '2025-10-02 00:28:36.075736', 'step': 9985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:36.129838', 'step': 9985, 'epoch': 1}
{'type': 'loss', 'content': 0.102955661714077, 'timestamp': '2025-10-02 00:28:36.132229', 'step': 9986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:36.186606', 'step': 9986, 'epoch': 1}
{'type': 'loss', 'content': 0.07357513904571533, 'timestamp': '2025-10-02 00:28:36.192361', 'step': 9987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:36.255777', 'step': 9987, 'epoch': 1}
{'type': 'loss', 'content': 0.0470900796353817, 'timestamp': '2025-10-02 00:28:36.261899', 'step': 9988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:36.315479', 'step': 9988, 'epoch': 1}
{'type': 'loss', 'content': 0.048611968755722046, 'timestamp': '2025-10-02 00:28:36.325710', 'step': 9989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:36.380138', 'step': 9989, 'epoch': 1}
{'type': 'loss', 'content': 0.10243427008390427, 'timestamp': '2025-10-02 00:28:36.386011', 'step': 9990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:36.441891', 'step': 9990, 'epoch': 1}
{'type': 'loss', 'content': 0.07102004438638687, 'timestamp': '2025-10-02 00:28:36.447152', 'step': 9991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:36.510121', 'step': 9991, 'epoch': 1}
{'type': 'loss', 'content': 0.04112059995532036, 'timestamp': '2025-10-02 00:28:36.521110', 'step': 9992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:36.582394', 'step': 9992, 'epoch': 1}
{'type': 'loss', 'content': 0.009023810736835003, 'timestamp': '2025-10-02 00:28:36.587487', 'step': 9993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:36.649049', 'step': 9993, 'epoch': 1}
{'type': 'loss', 'content': 0.05442668870091438, 'timestamp': '2025-10-02 00:28:36.651221', 'step': 9994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:36.704776', 'step': 9994, 'epoch': 1}
{'type': 'loss', 'content': 0.13058803975582123, 'timestamp': '2025-10-02 00:28:36.709774', 'step': 9995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:36.764480', 'step': 9995, 'epoch': 1}
{'type': 'loss', 'content': 0.13626356422901154, 'timestamp': '2025-10-02 00:28:36.770113', 'step': 9996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:36.824322', 'step': 9996, 'epoch': 1}
{'type': 'loss', 'content': 0.10561052709817886, 'timestamp': '2025-10-02 00:28:36.830308', 'step': 9997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:36.884182', 'step': 9997, 'epoch': 1}
{'type': 'loss', 'content': 0.12883523106575012, 'timestamp': '2025-10-02 00:28:36.886369', 'step': 9998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:36.940940', 'step': 9998, 'epoch': 1}
{'type': 'loss', 'content': 0.16103070974349976, 'timestamp': '2025-10-02 00:28:36.943519', 'step': 9999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:36.999663', 'step': 9999, 'epoch': 1}
{'type': 'loss', 'content': 0.14069244265556335, 'timestamp': '2025-10-02 00:28:37.006930', 'step': 10000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 10000', 'timestamp': '2025-10-02 00:28:37.488582', 'step': 10000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:37.545474', 'step': 10000, 'epoch': 1}
{'type': 'loss', 'content': 0.05389190465211868, 'timestamp': '2025-10-02 00:28:37.550348', 'step': 10001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:28:37.621920', 'step': 10001, 'epoch': 1}
{'type': 'loss', 'content': 0.043903570622205734, 'timestamp': '2025-10-02 00:28:37.634229', 'step': 10002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:28:37.698449', 'step': 10002, 'epoch': 1}
{'type': 'loss', 'content': 0.015751255676150322, 'timestamp': '2025-10-02 00:28:37.709343', 'step': 10003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:37.776191', 'step': 10003, 'epoch': 1}
{'type': 'loss', 'content': 0.0899469330906868, 'timestamp': '2025-10-02 00:28:37.787300', 'step': 10004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:37.850653', 'step': 10004, 'epoch': 1}
{'type': 'loss', 'content': 0.10109981894493103, 'timestamp': '2025-10-02 00:28:37.861638', 'step': 10005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:37.917429', 'step': 10005, 'epoch': 1}
{'type': 'loss', 'content': 0.1554940640926361, 'timestamp': '2025-10-02 00:28:37.921444', 'step': 10006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:37.977643', 'step': 10006, 'epoch': 1}
{'type': 'loss', 'content': 0.06948784738779068, 'timestamp': '2025-10-02 00:28:37.983418', 'step': 10007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:38.039178', 'step': 10007, 'epoch': 1}
{'type': 'loss', 'content': 0.08493997156620026, 'timestamp': '2025-10-02 00:28:38.045936', 'step': 10008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:28:38.113582', 'step': 10008, 'epoch': 1}
{'type': 'loss', 'content': 0.02593517117202282, 'timestamp': '2025-10-02 00:28:38.126933', 'step': 10009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:38.186590', 'step': 10009, 'epoch': 1}
{'type': 'loss', 'content': 0.09300816804170609, 'timestamp': '2025-10-02 00:28:38.196738', 'step': 10010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:38.258649', 'step': 10010, 'epoch': 1}
{'type': 'loss', 'content': 0.014924279414117336, 'timestamp': '2025-10-02 00:28:38.268909', 'step': 10011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:38.329505', 'step': 10011, 'epoch': 1}
{'type': 'loss', 'content': 0.07115137577056885, 'timestamp': '2025-10-02 00:28:38.340488', 'step': 10012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:38.396850', 'step': 10012, 'epoch': 1}
{'type': 'loss', 'content': 0.026514915749430656, 'timestamp': '2025-10-02 00:28:38.399520', 'step': 10013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:38.455270', 'step': 10013, 'epoch': 1}
{'type': 'loss', 'content': 0.0330791249871254, 'timestamp': '2025-10-02 00:28:38.462774', 'step': 10014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:38.516918', 'step': 10014, 'epoch': 1}
{'type': 'loss', 'content': 0.03878283128142357, 'timestamp': '2025-10-02 00:28:38.522994', 'step': 10015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:38.582816', 'step': 10015, 'epoch': 1}
{'type': 'loss', 'content': 0.08439693599939346, 'timestamp': '2025-10-02 00:28:38.588896', 'step': 10016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:38.643345', 'step': 10016, 'epoch': 1}
{'type': 'loss', 'content': 0.08509795367717743, 'timestamp': '2025-10-02 00:28:38.645595', 'step': 10017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:38.698770', 'step': 10017, 'epoch': 1}
{'type': 'loss', 'content': 0.06571180373430252, 'timestamp': '2025-10-02 00:28:38.701678', 'step': 10018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:38.755927', 'step': 10018, 'epoch': 1}
{'type': 'loss', 'content': 0.03929133713245392, 'timestamp': '2025-10-02 00:28:38.758400', 'step': 10019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:38.812927', 'step': 10019, 'epoch': 1}
{'type': 'loss', 'content': 0.02935563586652279, 'timestamp': '2025-10-02 00:28:38.819598', 'step': 10020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:38.873873', 'step': 10020, 'epoch': 1}
{'type': 'loss', 'content': 0.09975124895572662, 'timestamp': '2025-10-02 00:28:38.876344', 'step': 10021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:38.930589', 'step': 10021, 'epoch': 1}
{'type': 'loss', 'content': 0.0495908185839653, 'timestamp': '2025-10-02 00:28:38.936852', 'step': 10022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:38.998358', 'step': 10022, 'epoch': 1}
{'type': 'loss', 'content': 0.1336994618177414, 'timestamp': '2025-10-02 00:28:39.004887', 'step': 10023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:39.067933', 'step': 10023, 'epoch': 1}
{'type': 'loss', 'content': 0.06747094541788101, 'timestamp': '2025-10-02 00:28:39.074407', 'step': 10024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:39.143966', 'step': 10024, 'epoch': 1}
{'type': 'loss', 'content': 0.051784615963697433, 'timestamp': '2025-10-02 00:28:39.155276', 'step': 10025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:39.222422', 'step': 10025, 'epoch': 1}
{'type': 'loss', 'content': 0.175576850771904, 'timestamp': '2025-10-02 00:28:39.229766', 'step': 10026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:39.284608', 'step': 10026, 'epoch': 1}
{'type': 'loss', 'content': 0.06715432554483414, 'timestamp': '2025-10-02 00:28:39.294138', 'step': 10027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:39.348381', 'step': 10027, 'epoch': 1}
{'type': 'loss', 'content': 0.041651394218206406, 'timestamp': '2025-10-02 00:28:39.354890', 'step': 10028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:39.407784', 'step': 10028, 'epoch': 1}
{'type': 'loss', 'content': 0.14564062654972076, 'timestamp': '2025-10-02 00:28:39.410393', 'step': 10029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:39.464689', 'step': 10029, 'epoch': 1}
{'type': 'loss', 'content': 0.17867396771907806, 'timestamp': '2025-10-02 00:28:39.466808', 'step': 10030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:39.520786', 'step': 10030, 'epoch': 1}
{'type': 'loss', 'content': 0.08410923182964325, 'timestamp': '2025-10-02 00:28:39.528186', 'step': 10031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:39.583999', 'step': 10031, 'epoch': 1}
{'type': 'loss', 'content': 0.03839736059308052, 'timestamp': '2025-10-02 00:28:39.589801', 'step': 10032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:39.642719', 'step': 10032, 'epoch': 1}
{'type': 'loss', 'content': 0.12819160521030426, 'timestamp': '2025-10-02 00:28:39.645228', 'step': 10033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:39.699227', 'step': 10033, 'epoch': 1}
{'type': 'loss', 'content': 0.05703909695148468, 'timestamp': '2025-10-02 00:28:39.706688', 'step': 10034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:39.761358', 'step': 10034, 'epoch': 1}
{'type': 'loss', 'content': 0.043582700192928314, 'timestamp': '2025-10-02 00:28:39.768962', 'step': 10035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:39.823501', 'step': 10035, 'epoch': 1}
{'type': 'loss', 'content': 0.1549515426158905, 'timestamp': '2025-10-02 00:28:39.829402', 'step': 10036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:39.883280', 'step': 10036, 'epoch': 1}
{'type': 'loss', 'content': 0.09679526835680008, 'timestamp': '2025-10-02 00:28:39.885621', 'step': 10037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:39.939417', 'step': 10037, 'epoch': 1}
{'type': 'loss', 'content': 0.03631874546408653, 'timestamp': '2025-10-02 00:28:39.941995', 'step': 10038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:40.000301', 'step': 10038, 'epoch': 1}
{'type': 'loss', 'content': 0.022879265248775482, 'timestamp': '2025-10-02 00:28:40.009551', 'step': 10039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:40.065389', 'step': 10039, 'epoch': 1}
{'type': 'loss', 'content': 0.04686415195465088, 'timestamp': '2025-10-02 00:28:40.071272', 'step': 10040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:40.125733', 'step': 10040, 'epoch': 1}
{'type': 'loss', 'content': 0.09814296662807465, 'timestamp': '2025-10-02 00:28:40.129456', 'step': 10041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:40.186191', 'step': 10041, 'epoch': 1}
{'type': 'loss', 'content': 0.12417706847190857, 'timestamp': '2025-10-02 00:28:40.188990', 'step': 10042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:40.244494', 'step': 10042, 'epoch': 1}
{'type': 'loss', 'content': 0.09805846959352493, 'timestamp': '2025-10-02 00:28:40.247019', 'step': 10043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:40.307553', 'step': 10043, 'epoch': 1}
{'type': 'loss', 'content': 0.09857209771871567, 'timestamp': '2025-10-02 00:28:40.316091', 'step': 10044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:40.380060', 'step': 10044, 'epoch': 1}
{'type': 'loss', 'content': 0.09959390759468079, 'timestamp': '2025-10-02 00:28:40.384227', 'step': 10045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:40.442646', 'step': 10045, 'epoch': 1}
{'type': 'loss', 'content': 0.21436864137649536, 'timestamp': '2025-10-02 00:28:40.445571', 'step': 10046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:40.498499', 'step': 10046, 'epoch': 1}
{'type': 'loss', 'content': 0.1299876719713211, 'timestamp': '2025-10-02 00:28:40.501567', 'step': 10047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:40.557161', 'step': 10047, 'epoch': 1}
{'type': 'loss', 'content': 0.07323963940143585, 'timestamp': '2025-10-02 00:28:40.563111', 'step': 10048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:40.617860', 'step': 10048, 'epoch': 1}
{'type': 'loss', 'content': 0.04355083405971527, 'timestamp': '2025-10-02 00:28:40.620393', 'step': 10049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:40.675023', 'step': 10049, 'epoch': 1}
{'type': 'loss', 'content': 0.04315876588225365, 'timestamp': '2025-10-02 00:28:40.680936', 'step': 10050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:40.735347', 'step': 10050, 'epoch': 1}
{'type': 'loss', 'content': 0.07369169592857361, 'timestamp': '2025-10-02 00:28:40.737693', 'step': 10051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:40.791981', 'step': 10051, 'epoch': 1}
{'type': 'loss', 'content': 0.02022189274430275, 'timestamp': '2025-10-02 00:28:40.797844', 'step': 10052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:40.851645', 'step': 10052, 'epoch': 1}
{'type': 'loss', 'content': 0.10669569671154022, 'timestamp': '2025-10-02 00:28:40.853907', 'step': 10053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:40.908603', 'step': 10053, 'epoch': 1}
{'type': 'loss', 'content': 0.03143654391169548, 'timestamp': '2025-10-02 00:28:40.914520', 'step': 10054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:40.977418', 'step': 10054, 'epoch': 1}
{'type': 'loss', 'content': 0.07395581156015396, 'timestamp': '2025-10-02 00:28:40.987876', 'step': 10055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:41.042284', 'step': 10055, 'epoch': 1}
{'type': 'loss', 'content': 0.11088164150714874, 'timestamp': '2025-10-02 00:28:41.048772', 'step': 10056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:41.120260', 'step': 10056, 'epoch': 1}
{'type': 'loss', 'content': 0.02793095074594021, 'timestamp': '2025-10-02 00:28:41.131199', 'step': 10057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:41.187212', 'step': 10057, 'epoch': 1}
{'type': 'loss', 'content': 0.050917454063892365, 'timestamp': '2025-10-02 00:28:41.194625', 'step': 10058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:41.254278', 'step': 10058, 'epoch': 1}
{'type': 'loss', 'content': 0.026464030146598816, 'timestamp': '2025-10-02 00:28:41.258403', 'step': 10059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:41.312789', 'step': 10059, 'epoch': 1}
{'type': 'loss', 'content': 0.050964925438165665, 'timestamp': '2025-10-02 00:28:41.318788', 'step': 10060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:41.372657', 'step': 10060, 'epoch': 1}
{'type': 'loss', 'content': 0.08907633274793625, 'timestamp': '2025-10-02 00:28:41.375222', 'step': 10061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:41.433641', 'step': 10061, 'epoch': 1}
{'type': 'loss', 'content': 0.03410780802369118, 'timestamp': '2025-10-02 00:28:41.438594', 'step': 10062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:41.493618', 'step': 10062, 'epoch': 1}
{'type': 'loss', 'content': 0.3046788275241852, 'timestamp': '2025-10-02 00:28:41.497049', 'step': 10063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:41.558646', 'step': 10063, 'epoch': 1}
{'type': 'loss', 'content': 0.1439581662416458, 'timestamp': '2025-10-02 00:28:41.564902', 'step': 10064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:41.620196', 'step': 10064, 'epoch': 1}
{'type': 'loss', 'content': 0.13556823134422302, 'timestamp': '2025-10-02 00:28:41.622693', 'step': 10065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:41.678886', 'step': 10065, 'epoch': 1}
{'type': 'loss', 'content': 0.3308477997779846, 'timestamp': '2025-10-02 00:28:41.681029', 'step': 10066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:41.736258', 'step': 10066, 'epoch': 1}
{'type': 'loss', 'content': 0.03676248714327812, 'timestamp': '2025-10-02 00:28:41.741913', 'step': 10067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:41.797890', 'step': 10067, 'epoch': 1}
{'type': 'loss', 'content': 0.14002124965190887, 'timestamp': '2025-10-02 00:28:41.803473', 'step': 10068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:41.858771', 'step': 10068, 'epoch': 1}
{'type': 'loss', 'content': 0.07928712666034698, 'timestamp': '2025-10-02 00:28:41.868263', 'step': 10069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:41.922830', 'step': 10069, 'epoch': 1}
{'type': 'loss', 'content': 0.04603112116456032, 'timestamp': '2025-10-02 00:28:41.925201', 'step': 10070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:41.980127', 'step': 10070, 'epoch': 1}
{'type': 'loss', 'content': 0.05581787973642349, 'timestamp': '2025-10-02 00:28:41.982484', 'step': 10071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:42.036907', 'step': 10071, 'epoch': 1}
{'type': 'loss', 'content': 0.07270972430706024, 'timestamp': '2025-10-02 00:28:42.042924', 'step': 10072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:42.097414', 'step': 10072, 'epoch': 1}
{'type': 'loss', 'content': 0.02266686037182808, 'timestamp': '2025-10-02 00:28:42.107699', 'step': 10073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:42.163144', 'step': 10073, 'epoch': 1}
{'type': 'loss', 'content': 0.04560457170009613, 'timestamp': '2025-10-02 00:28:42.172637', 'step': 10074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:42.258313', 'step': 10074, 'epoch': 1}
{'type': 'loss', 'content': 0.031145736575126648, 'timestamp': '2025-10-02 00:28:42.276854', 'step': 10075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:42.378014', 'step': 10075, 'epoch': 1}
{'type': 'loss', 'content': 0.0294051393866539, 'timestamp': '2025-10-02 00:28:42.402534', 'step': 10076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:42.489714', 'step': 10076, 'epoch': 1}
{'type': 'loss', 'content': 0.07933392375707626, 'timestamp': '2025-10-02 00:28:42.500726', 'step': 10077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:42.591755', 'step': 10077, 'epoch': 1}
{'type': 'loss', 'content': 0.0291301142424345, 'timestamp': '2025-10-02 00:28:42.601450', 'step': 10078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:42.715879', 'step': 10078, 'epoch': 1}
{'type': 'loss', 'content': 0.04468872398138046, 'timestamp': '2025-10-02 00:28:42.743586', 'step': 10079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:42.828872', 'step': 10079, 'epoch': 1}
{'type': 'loss', 'content': 0.08975202590227127, 'timestamp': '2025-10-02 00:28:42.839862', 'step': 10080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:42.917936', 'step': 10080, 'epoch': 1}
{'type': 'loss', 'content': 0.0827711820602417, 'timestamp': '2025-10-02 00:28:42.931843', 'step': 10081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:43.040791', 'step': 10081, 'epoch': 1}
{'type': 'loss', 'content': 0.030119480565190315, 'timestamp': '2025-10-02 00:28:43.050088', 'step': 10082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:43.143443', 'step': 10082, 'epoch': 1}
{'type': 'loss', 'content': 0.04107574373483658, 'timestamp': '2025-10-02 00:28:43.170827', 'step': 10083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:43.260873', 'step': 10083, 'epoch': 1}
{'type': 'loss', 'content': 0.06213578209280968, 'timestamp': '2025-10-02 00:28:43.271168', 'step': 10084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:43.358992', 'step': 10084, 'epoch': 1}
{'type': 'loss', 'content': 0.008834553882479668, 'timestamp': '2025-10-02 00:28:43.375496', 'step': 10085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:43.474137', 'step': 10085, 'epoch': 1}
{'type': 'loss', 'content': 0.05652504414319992, 'timestamp': '2025-10-02 00:28:43.489631', 'step': 10086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:43.585010', 'step': 10086, 'epoch': 1}
{'type': 'loss', 'content': 0.026369208469986916, 'timestamp': '2025-10-02 00:28:43.599475', 'step': 10087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:43.659791', 'step': 10087, 'epoch': 1}
{'type': 'loss', 'content': 0.03730827942490578, 'timestamp': '2025-10-02 00:28:43.667824', 'step': 10088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:43.722961', 'step': 10088, 'epoch': 1}
{'type': 'loss', 'content': 0.13549724221229553, 'timestamp': '2025-10-02 00:28:43.725046', 'step': 10089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:43.780602', 'step': 10089, 'epoch': 1}
{'type': 'loss', 'content': 0.09465860575437546, 'timestamp': '2025-10-02 00:28:43.783142', 'step': 10090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:43.838881', 'step': 10090, 'epoch': 1}
{'type': 'loss', 'content': 0.08471685647964478, 'timestamp': '2025-10-02 00:28:43.841574', 'step': 10091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:43.905299', 'step': 10091, 'epoch': 1}
{'type': 'loss', 'content': 0.04411226511001587, 'timestamp': '2025-10-02 00:28:43.912010', 'step': 10092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:43.980356', 'step': 10092, 'epoch': 1}
{'type': 'loss', 'content': 0.07840495556592941, 'timestamp': '2025-10-02 00:28:43.982635', 'step': 10093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:44.038161', 'step': 10093, 'epoch': 1}
{'type': 'loss', 'content': 0.06897749751806259, 'timestamp': '2025-10-02 00:28:44.040274', 'step': 10094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:44.094640', 'step': 10094, 'epoch': 1}
{'type': 'loss', 'content': 0.05501202121376991, 'timestamp': '2025-10-02 00:28:44.096400', 'step': 10095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:44.150337', 'step': 10095, 'epoch': 1}
{'type': 'loss', 'content': 0.1197073757648468, 'timestamp': '2025-10-02 00:28:44.155889', 'step': 10096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:44.210271', 'step': 10096, 'epoch': 1}
{'type': 'loss', 'content': 0.029107030481100082, 'timestamp': '2025-10-02 00:28:44.216105', 'step': 10097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:44.271591', 'step': 10097, 'epoch': 1}
{'type': 'loss', 'content': 0.044295065104961395, 'timestamp': '2025-10-02 00:28:44.274702', 'step': 10098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:44.329732', 'step': 10098, 'epoch': 1}
{'type': 'loss', 'content': 0.13339971005916595, 'timestamp': '2025-10-02 00:28:44.332578', 'step': 10099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:44.389114', 'step': 10099, 'epoch': 1}
{'type': 'loss', 'content': 0.056943245232105255, 'timestamp': '2025-10-02 00:28:44.399440', 'step': 10100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:44.453076', 'step': 10100, 'epoch': 1}
{'type': 'loss', 'content': 0.1852366328239441, 'timestamp': '2025-10-02 00:28:44.455234', 'step': 10101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:44.509397', 'step': 10101, 'epoch': 1}
{'type': 'loss', 'content': 0.18125639855861664, 'timestamp': '2025-10-02 00:28:44.515338', 'step': 10102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:44.573738', 'step': 10102, 'epoch': 1}
{'type': 'loss', 'content': 0.025310944765806198, 'timestamp': '2025-10-02 00:28:44.576108', 'step': 10103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:44.631135', 'step': 10103, 'epoch': 1}
{'type': 'loss', 'content': 0.09432633966207504, 'timestamp': '2025-10-02 00:28:44.637209', 'step': 10104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:44.693717', 'step': 10104, 'epoch': 1}
{'type': 'loss', 'content': 0.13749626278877258, 'timestamp': '2025-10-02 00:28:44.702219', 'step': 10105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:44.759701', 'step': 10105, 'epoch': 1}
{'type': 'loss', 'content': 0.1695777177810669, 'timestamp': '2025-10-02 00:28:44.762478', 'step': 10106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:44.826327', 'step': 10106, 'epoch': 1}
{'type': 'loss', 'content': 0.16764797270298004, 'timestamp': '2025-10-02 00:28:44.833475', 'step': 10107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:44.892209', 'step': 10107, 'epoch': 1}
{'type': 'loss', 'content': 0.02734752558171749, 'timestamp': '2025-10-02 00:28:44.900524', 'step': 10108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:44.956271', 'step': 10108, 'epoch': 1}
{'type': 'loss', 'content': 0.16018274426460266, 'timestamp': '2025-10-02 00:28:44.958526', 'step': 10109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:45.022057', 'step': 10109, 'epoch': 1}
{'type': 'loss', 'content': 0.09147509932518005, 'timestamp': '2025-10-02 00:28:45.024426', 'step': 10110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:45.088594', 'step': 10110, 'epoch': 1}
{'type': 'loss', 'content': 0.03650011122226715, 'timestamp': '2025-10-02 00:28:45.097907', 'step': 10111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:45.156120', 'step': 10111, 'epoch': 1}
{'type': 'loss', 'content': 0.044614728540182114, 'timestamp': '2025-10-02 00:28:45.166489', 'step': 10112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:45.220925', 'step': 10112, 'epoch': 1}
{'type': 'loss', 'content': 0.06928557902574539, 'timestamp': '2025-10-02 00:28:45.223527', 'step': 10113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:45.279445', 'step': 10113, 'epoch': 1}
{'type': 'loss', 'content': 0.2291494607925415, 'timestamp': '2025-10-02 00:28:45.285127', 'step': 10114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:45.348262', 'step': 10114, 'epoch': 1}
{'type': 'loss', 'content': 0.07822082936763763, 'timestamp': '2025-10-02 00:28:45.358756', 'step': 10115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:45.415226', 'step': 10115, 'epoch': 1}
{'type': 'loss', 'content': 0.08607850223779678, 'timestamp': '2025-10-02 00:28:45.423393', 'step': 10116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:45.477836', 'step': 10116, 'epoch': 1}
{'type': 'loss', 'content': 0.09353900700807571, 'timestamp': '2025-10-02 00:28:45.480458', 'step': 10117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:45.538420', 'step': 10117, 'epoch': 1}
{'type': 'loss', 'content': 0.04536833241581917, 'timestamp': '2025-10-02 00:28:45.543978', 'step': 10118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:45.600221', 'step': 10118, 'epoch': 1}
{'type': 'loss', 'content': 0.06485158950090408, 'timestamp': '2025-10-02 00:28:45.609419', 'step': 10119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:28:45.682355', 'step': 10119, 'epoch': 1}
{'type': 'loss', 'content': 0.029255319386720657, 'timestamp': '2025-10-02 00:28:45.695706', 'step': 10120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:45.753378', 'step': 10120, 'epoch': 1}
{'type': 'loss', 'content': 0.12007033079862595, 'timestamp': '2025-10-02 00:28:45.755619', 'step': 10121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:45.812221', 'step': 10121, 'epoch': 1}
{'type': 'loss', 'content': 0.09013532847166061, 'timestamp': '2025-10-02 00:28:45.814652', 'step': 10122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:45.873359', 'step': 10122, 'epoch': 1}
{'type': 'loss', 'content': 0.023251689970493317, 'timestamp': '2025-10-02 00:28:45.882574', 'step': 10123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:45.939609', 'step': 10123, 'epoch': 1}
{'type': 'loss', 'content': 0.07078427821397781, 'timestamp': '2025-10-02 00:28:45.948097', 'step': 10124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:46.003689', 'step': 10124, 'epoch': 1}
{'type': 'loss', 'content': 0.06702661514282227, 'timestamp': '2025-10-02 00:28:46.011193', 'step': 10125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:46.068927', 'step': 10125, 'epoch': 1}
{'type': 'loss', 'content': 0.1483033299446106, 'timestamp': '2025-10-02 00:28:46.073744', 'step': 10126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:46.136824', 'step': 10126, 'epoch': 1}
{'type': 'loss', 'content': 0.1583525836467743, 'timestamp': '2025-10-02 00:28:46.140173', 'step': 10127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:46.200927', 'step': 10127, 'epoch': 1}
{'type': 'loss', 'content': 0.018818123266100883, 'timestamp': '2025-10-02 00:28:46.207544', 'step': 10128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:46.263004', 'step': 10128, 'epoch': 1}
{'type': 'loss', 'content': 0.24321036040782928, 'timestamp': '2025-10-02 00:28:46.265980', 'step': 10129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:46.322408', 'step': 10129, 'epoch': 1}
{'type': 'loss', 'content': 0.04797493293881416, 'timestamp': '2025-10-02 00:28:46.329676', 'step': 10130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:28:46.405331', 'step': 10130, 'epoch': 1}
{'type': 'loss', 'content': 0.049297135323286057, 'timestamp': '2025-10-02 00:28:46.418895', 'step': 10131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:46.476333', 'step': 10131, 'epoch': 1}
{'type': 'loss', 'content': 0.11674695461988449, 'timestamp': '2025-10-02 00:28:46.482571', 'step': 10132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:46.549657', 'step': 10132, 'epoch': 1}
{'type': 'loss', 'content': 0.14670415222644806, 'timestamp': '2025-10-02 00:28:46.551973', 'step': 10133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:46.608085', 'step': 10133, 'epoch': 1}
{'type': 'loss', 'content': 0.047414928674697876, 'timestamp': '2025-10-02 00:28:46.610709', 'step': 10134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:46.667625', 'step': 10134, 'epoch': 1}
{'type': 'loss', 'content': 0.038746219128370285, 'timestamp': '2025-10-02 00:28:46.673323', 'step': 10135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:46.730060', 'step': 10135, 'epoch': 1}
{'type': 'loss', 'content': 0.09682217240333557, 'timestamp': '2025-10-02 00:28:46.737339', 'step': 10136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:46.796150', 'step': 10136, 'epoch': 1}
{'type': 'loss', 'content': 0.13139817118644714, 'timestamp': '2025-10-02 00:28:46.799425', 'step': 10137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:46.858222', 'step': 10137, 'epoch': 1}
{'type': 'loss', 'content': 0.2024148553609848, 'timestamp': '2025-10-02 00:28:46.861346', 'step': 10138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:46.917510', 'step': 10138, 'epoch': 1}
{'type': 'loss', 'content': 0.05980664864182472, 'timestamp': '2025-10-02 00:28:46.919814', 'step': 10139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:28:46.987990', 'step': 10139, 'epoch': 1}
{'type': 'loss', 'content': 0.045885056257247925, 'timestamp': '2025-10-02 00:28:47.000731', 'step': 10140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:47.059015', 'step': 10140, 'epoch': 1}
{'type': 'loss', 'content': 0.16401082277297974, 'timestamp': '2025-10-02 00:28:47.062227', 'step': 10141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:47.117994', 'step': 10141, 'epoch': 1}
{'type': 'loss', 'content': 0.01280885748565197, 'timestamp': '2025-10-02 00:28:47.125303', 'step': 10142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:47.188721', 'step': 10142, 'epoch': 1}
{'type': 'loss', 'content': 0.1399139165878296, 'timestamp': '2025-10-02 00:28:47.194100', 'step': 10143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:47.254495', 'step': 10143, 'epoch': 1}
{'type': 'loss', 'content': 0.02983444184064865, 'timestamp': '2025-10-02 00:28:47.265463', 'step': 10144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:47.321264', 'step': 10144, 'epoch': 1}
{'type': 'loss', 'content': 0.02461480163037777, 'timestamp': '2025-10-02 00:28:47.324115', 'step': 10145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:47.378502', 'step': 10145, 'epoch': 1}
{'type': 'loss', 'content': 0.05183282494544983, 'timestamp': '2025-10-02 00:28:47.380767', 'step': 10146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:47.435451', 'step': 10146, 'epoch': 1}
{'type': 'loss', 'content': 0.08586101979017258, 'timestamp': '2025-10-02 00:28:47.441008', 'step': 10147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:47.495735', 'step': 10147, 'epoch': 1}
{'type': 'loss', 'content': 0.09337174147367477, 'timestamp': '2025-10-02 00:28:47.503895', 'step': 10148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:47.557758', 'step': 10148, 'epoch': 1}
{'type': 'loss', 'content': 0.10688057541847229, 'timestamp': '2025-10-02 00:28:47.560098', 'step': 10149, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:47.614932', 'step': 10149, 'epoch': 1}
{'type': 'loss', 'content': 0.04841666668653488, 'timestamp': '2025-10-02 00:28:47.624300', 'step': 10150, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:47.679063', 'step': 10150, 'epoch': 1}
{'type': 'loss', 'content': 0.13017834722995758, 'timestamp': '2025-10-02 00:28:47.681296', 'step': 10151, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:47.735649', 'step': 10151, 'epoch': 1}
{'type': 'loss', 'content': 0.16972842812538147, 'timestamp': '2025-10-02 00:28:47.741625', 'step': 10152, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:47.795263', 'step': 10152, 'epoch': 1}
{'type': 'loss', 'content': 0.08717625588178635, 'timestamp': '2025-10-02 00:28:47.801123', 'step': 10153, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:47.862225', 'step': 10153, 'epoch': 1}
{'type': 'loss', 'content': 0.04503142833709717, 'timestamp': '2025-10-02 00:28:47.872700', 'step': 10154, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:47.928829', 'step': 10154, 'epoch': 1}
{'type': 'loss', 'content': 0.048792991787195206, 'timestamp': '2025-10-02 00:28:47.938364', 'step': 10155, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:28:48.000701', 'step': 10155, 'epoch': 1}
{'type': 'loss', 'content': 0.03804533928632736, 'timestamp': '2025-10-02 00:28:48.012175', 'step': 10156, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:48.065814', 'step': 10156, 'epoch': 1}
{'type': 'loss', 'content': 0.13108406960964203, 'timestamp': '2025-10-02 00:28:48.068307', 'step': 10157, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:48.121939', 'step': 10157, 'epoch': 1}
{'type': 'loss', 'content': 0.057877231389284134, 'timestamp': '2025-10-02 00:28:48.125605', 'step': 10158, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:48.184956', 'step': 10158, 'epoch': 1}
{'type': 'loss', 'content': 0.07769216597080231, 'timestamp': '2025-10-02 00:28:48.187265', 'step': 10159, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:48.242950', 'step': 10159, 'epoch': 1}
{'type': 'loss', 'content': 0.08334731310606003, 'timestamp': '2025-10-02 00:28:48.248794', 'step': 10160, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:48.302362', 'step': 10160, 'epoch': 1}
{'type': 'loss', 'content': 0.1360851228237152, 'timestamp': '2025-10-02 00:28:48.304632', 'step': 10161, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:48.359044', 'step': 10161, 'epoch': 1}
{'type': 'loss', 'content': 0.05433197319507599, 'timestamp': '2025-10-02 00:28:48.368354', 'step': 10162, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:48.423733', 'step': 10162, 'epoch': 1}
{'type': 'loss', 'content': 0.1975671947002411, 'timestamp': '2025-10-02 00:28:48.426670', 'step': 10163, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:48.481641', 'step': 10163, 'epoch': 1}
{'type': 'loss', 'content': 0.015807850286364555, 'timestamp': '2025-10-02 00:28:48.488268', 'step': 10164, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:48.542044', 'step': 10164, 'epoch': 1}
{'type': 'loss', 'content': 0.04614317789673805, 'timestamp': '2025-10-02 00:28:48.547866', 'step': 10165, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:48.604353', 'step': 10165, 'epoch': 1}
{'type': 'loss', 'content': 0.06655971705913544, 'timestamp': '2025-10-02 00:28:48.606839', 'step': 10166, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:48.661504', 'step': 10166, 'epoch': 1}
{'type': 'loss', 'content': 0.06895139068365097, 'timestamp': '2025-10-02 00:28:48.667484', 'step': 10167, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:48.721757', 'step': 10167, 'epoch': 1}
{'type': 'loss', 'content': 0.07155150175094604, 'timestamp': '2025-10-02 00:28:48.728300', 'step': 10168, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:48.782857', 'step': 10168, 'epoch': 1}
{'type': 'loss', 'content': 0.041302867233753204, 'timestamp': '2025-10-02 00:28:48.788661', 'step': 10169, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:48.843995', 'step': 10169, 'epoch': 1}
{'type': 'loss', 'content': 0.006454919930547476, 'timestamp': '2025-10-02 00:28:48.851415', 'step': 10170, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:48.906733', 'step': 10170, 'epoch': 1}
{'type': 'loss', 'content': 0.030304666608572006, 'timestamp': '2025-10-02 00:28:48.909575', 'step': 10171, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:48.972105', 'step': 10171, 'epoch': 1}
{'type': 'loss', 'content': 0.044698502868413925, 'timestamp': '2025-10-02 00:28:48.983373', 'step': 10172, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:49.038759', 'step': 10172, 'epoch': 1}
{'type': 'loss', 'content': 0.08823127299547195, 'timestamp': '2025-10-02 00:28:49.041571', 'step': 10173, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 00:28:49.122920', 'step': 10173, 'epoch': 1}
{'type': 'loss', 'content': 0.032428041100502014, 'timestamp': '2025-10-02 00:28:49.137721', 'step': 10174, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:49.193201', 'step': 10174, 'epoch': 1}
{'type': 'loss', 'content': 0.1394231915473938, 'timestamp': '2025-10-02 00:28:49.196236', 'step': 10175, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:49.250894', 'step': 10175, 'epoch': 1}
{'type': 'loss', 'content': 0.08924496918916702, 'timestamp': '2025-10-02 00:28:49.257207', 'step': 10176, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:49.311012', 'step': 10176, 'epoch': 1}
{'type': 'loss', 'content': 0.1139412522315979, 'timestamp': '2025-10-02 00:28:49.313703', 'step': 10177, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:49.368229', 'step': 10177, 'epoch': 1}
{'type': 'loss', 'content': 0.09650924801826477, 'timestamp': '2025-10-02 00:28:49.370562', 'step': 10178, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:49.425653', 'step': 10178, 'epoch': 1}
{'type': 'loss', 'content': 0.10609636455774307, 'timestamp': '2025-10-02 00:28:49.428652', 'step': 10179, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:49.483131', 'step': 10179, 'epoch': 1}
{'type': 'loss', 'content': 0.09569645673036575, 'timestamp': '2025-10-02 00:28:49.489987', 'step': 10180, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:49.545082', 'step': 10180, 'epoch': 1}
{'type': 'loss', 'content': 0.05502084642648697, 'timestamp': '2025-10-02 00:28:49.547739', 'step': 10181, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:49.603018', 'step': 10181, 'epoch': 1}
{'type': 'loss', 'content': 0.03091365098953247, 'timestamp': '2025-10-02 00:28:49.609104', 'step': 10182, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:49.664036', 'step': 10182, 'epoch': 1}
{'type': 'loss', 'content': 0.13424591720104218, 'timestamp': '2025-10-02 00:28:49.666651', 'step': 10183, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:49.721587', 'step': 10183, 'epoch': 1}
{'type': 'loss', 'content': 0.046769145876169205, 'timestamp': '2025-10-02 00:28:49.727703', 'step': 10184, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:49.781719', 'step': 10184, 'epoch': 1}
{'type': 'loss', 'content': 0.017892902716994286, 'timestamp': '2025-10-02 00:28:49.784795', 'step': 10185, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:49.839731', 'step': 10185, 'epoch': 1}
{'type': 'loss', 'content': 0.04198244586586952, 'timestamp': '2025-10-02 00:28:49.842054', 'step': 10186, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:49.896100', 'step': 10186, 'epoch': 1}
{'type': 'loss', 'content': 0.10942541807889938, 'timestamp': '2025-10-02 00:28:49.898251', 'step': 10187, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:49.951984', 'step': 10187, 'epoch': 1}
{'type': 'loss', 'content': 0.06098098307847977, 'timestamp': '2025-10-02 00:28:49.958737', 'step': 10188, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:50.013162', 'step': 10188, 'epoch': 1}
{'type': 'loss', 'content': 0.052867960184812546, 'timestamp': '2025-10-02 00:28:50.019096', 'step': 10189, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:50.074737', 'step': 10189, 'epoch': 1}
{'type': 'loss', 'content': 0.10928058624267578, 'timestamp': '2025-10-02 00:28:50.077382', 'step': 10190, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:50.132003', 'step': 10190, 'epoch': 1}
{'type': 'loss', 'content': 0.06508469581604004, 'timestamp': '2025-10-02 00:28:50.136778', 'step': 10191, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:50.194768', 'step': 10191, 'epoch': 1}
{'type': 'loss', 'content': 0.05392620712518692, 'timestamp': '2025-10-02 00:28:50.200564', 'step': 10192, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:50.254027', 'step': 10192, 'epoch': 1}
{'type': 'loss', 'content': 0.09940826147794724, 'timestamp': '2025-10-02 00:28:50.256395', 'step': 10193, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:50.310367', 'step': 10193, 'epoch': 1}
{'type': 'loss', 'content': 0.01580890640616417, 'timestamp': '2025-10-02 00:28:50.312836', 'step': 10194, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:50.368130', 'step': 10194, 'epoch': 1}
{'type': 'loss', 'content': 0.05723452940583229, 'timestamp': '2025-10-02 00:28:50.374023', 'step': 10195, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:28:50.428415', 'step': 10195, 'epoch': 1}
{'type': 'loss', 'content': 0.1442841738462448, 'timestamp': '2025-10-02 00:28:50.434247', 'step': 10196, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:50.487761', 'step': 10196, 'epoch': 1}
{'type': 'loss', 'content': 0.14190860092639923, 'timestamp': '2025-10-02 00:28:50.503693', 'step': 10197, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:50.558050', 'step': 10197, 'epoch': 1}
{'type': 'loss', 'content': 0.18100231885910034, 'timestamp': '2025-10-02 00:28:50.560570', 'step': 10198, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:28:50.630586', 'step': 10198, 'epoch': 1}
{'type': 'loss', 'content': 0.03869126737117767, 'timestamp': '2025-10-02 00:28:50.642831', 'step': 10199, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:50.697291', 'step': 10199, 'epoch': 1}
{'type': 'loss', 'content': 0.14049218595027924, 'timestamp': '2025-10-02 00:28:50.703404', 'step': 10200, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:50.756689', 'step': 10200, 'epoch': 1}
{'type': 'loss', 'content': 0.0685778334736824, 'timestamp': '2025-10-02 00:28:50.759271', 'step': 10201, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:50.813469', 'step': 10201, 'epoch': 1}
{'type': 'loss', 'content': 0.0722164586186409, 'timestamp': '2025-10-02 00:28:50.819485', 'step': 10202, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:50.874493', 'step': 10202, 'epoch': 1}
{'type': 'loss', 'content': 0.03420611470937729, 'timestamp': '2025-10-02 00:28:50.876892', 'step': 10203, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:50.939592', 'step': 10203, 'epoch': 1}
{'type': 'loss', 'content': 0.10508634150028229, 'timestamp': '2025-10-02 00:28:50.950841', 'step': 10204, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:51.004636', 'step': 10204, 'epoch': 1}
{'type': 'loss', 'content': 0.22235237061977386, 'timestamp': '2025-10-02 00:28:51.007018', 'step': 10205, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:51.060469', 'step': 10205, 'epoch': 1}
{'type': 'loss', 'content': 0.10434218496084213, 'timestamp': '2025-10-02 00:28:51.063546', 'step': 10206, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:51.117725', 'step': 10206, 'epoch': 1}
{'type': 'loss', 'content': 0.13554026186466217, 'timestamp': '2025-10-02 00:28:51.121561', 'step': 10207, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:51.180586', 'step': 10207, 'epoch': 1}
{'type': 'loss', 'content': 0.08347959816455841, 'timestamp': '2025-10-02 00:28:51.186966', 'step': 10208, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:51.241747', 'step': 10208, 'epoch': 1}
{'type': 'loss', 'content': 0.012666006572544575, 'timestamp': '2025-10-02 00:28:51.247547', 'step': 10209, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:51.306428', 'step': 10209, 'epoch': 1}
{'type': 'loss', 'content': 0.12791094183921814, 'timestamp': '2025-10-02 00:28:51.316617', 'step': 10210, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:51.371681', 'step': 10210, 'epoch': 1}
{'type': 'loss', 'content': 0.01521151140332222, 'timestamp': '2025-10-02 00:28:51.373936', 'step': 10211, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:51.430870', 'step': 10211, 'epoch': 1}
{'type': 'loss', 'content': 0.06457217782735825, 'timestamp': '2025-10-02 00:28:51.437057', 'step': 10212, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:51.490443', 'step': 10212, 'epoch': 1}
{'type': 'loss', 'content': 0.1960678994655609, 'timestamp': '2025-10-02 00:28:51.493139', 'step': 10213, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:51.548497', 'step': 10213, 'epoch': 1}
{'type': 'loss', 'content': 0.11607140302658081, 'timestamp': '2025-10-02 00:28:51.550837', 'step': 10214, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:51.607001', 'step': 10214, 'epoch': 1}
{'type': 'loss', 'content': 0.046761851757764816, 'timestamp': '2025-10-02 00:28:51.616523', 'step': 10215, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:51.671901', 'step': 10215, 'epoch': 1}
{'type': 'loss', 'content': 0.09499991685152054, 'timestamp': '2025-10-02 00:28:51.678885', 'step': 10216, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:51.737835', 'step': 10216, 'epoch': 1}
{'type': 'loss', 'content': 0.024827826768159866, 'timestamp': '2025-10-02 00:28:51.740282', 'step': 10217, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:51.794668', 'step': 10217, 'epoch': 1}
{'type': 'loss', 'content': 0.10836613923311234, 'timestamp': '2025-10-02 00:28:51.800672', 'step': 10218, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:51.855584', 'step': 10218, 'epoch': 1}
{'type': 'loss', 'content': 0.048274826258420944, 'timestamp': '2025-10-02 00:28:51.858113', 'step': 10219, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:51.912658', 'step': 10219, 'epoch': 1}
{'type': 'loss', 'content': 0.07784651219844818, 'timestamp': '2025-10-02 00:28:51.922767', 'step': 10220, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:51.981179', 'step': 10220, 'epoch': 1}
{'type': 'loss', 'content': 0.05732829496264458, 'timestamp': '2025-10-02 00:28:51.992129', 'step': 10221, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:28:52.057494', 'step': 10221, 'epoch': 1}
{'type': 'loss', 'content': 0.043215617537498474, 'timestamp': '2025-10-02 00:28:52.068366', 'step': 10222, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:28:52.123274', 'step': 10222, 'epoch': 1}
{'type': 'loss', 'content': 0.056227173656225204, 'timestamp': '2025-10-02 00:28:52.127446', 'step': 10223, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:52.183655', 'step': 10223, 'epoch': 1}
{'type': 'loss', 'content': 0.054817333817481995, 'timestamp': '2025-10-02 00:28:52.190713', 'step': 10224, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:52.249743', 'step': 10224, 'epoch': 1}
{'type': 'loss', 'content': 0.11435677111148834, 'timestamp': '2025-10-02 00:28:52.252023', 'step': 10225, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:52.306326', 'step': 10225, 'epoch': 1}
{'type': 'loss', 'content': 0.0636463612318039, 'timestamp': '2025-10-02 00:28:52.315811', 'step': 10226, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:52.369721', 'step': 10226, 'epoch': 1}
{'type': 'loss', 'content': 0.24118253588676453, 'timestamp': '2025-10-02 00:28:52.372282', 'step': 10227, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:52.426603', 'step': 10227, 'epoch': 1}
{'type': 'loss', 'content': 0.12747599184513092, 'timestamp': '2025-10-02 00:28:52.432296', 'step': 10228, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:52.486296', 'step': 10228, 'epoch': 1}
{'type': 'loss', 'content': 0.030719023197889328, 'timestamp': '2025-10-02 00:28:52.491954', 'step': 10229, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:52.546593', 'step': 10229, 'epoch': 1}
{'type': 'loss', 'content': 0.06541389226913452, 'timestamp': '2025-10-02 00:28:52.555785', 'step': 10230, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:52.610350', 'step': 10230, 'epoch': 1}
{'type': 'loss', 'content': 0.14979445934295654, 'timestamp': '2025-10-02 00:28:52.612831', 'step': 10231, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:52.666927', 'step': 10231, 'epoch': 1}
{'type': 'loss', 'content': 0.056111596524715424, 'timestamp': '2025-10-02 00:28:52.673005', 'step': 10232, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:52.727264', 'step': 10232, 'epoch': 1}
{'type': 'loss', 'content': 0.0862119123339653, 'timestamp': '2025-10-02 00:28:52.729861', 'step': 10233, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:52.784140', 'step': 10233, 'epoch': 1}
{'type': 'loss', 'content': 0.1583845466375351, 'timestamp': '2025-10-02 00:28:52.786689', 'step': 10234, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:52.842905', 'step': 10234, 'epoch': 1}
{'type': 'loss', 'content': 0.036464035511016846, 'timestamp': '2025-10-02 00:28:52.850171', 'step': 10235, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:52.904421', 'step': 10235, 'epoch': 1}
{'type': 'loss', 'content': 0.09680943936109543, 'timestamp': '2025-10-02 00:28:52.910404', 'step': 10236, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:52.963681', 'step': 10236, 'epoch': 1}
{'type': 'loss', 'content': 0.21135756373405457, 'timestamp': '2025-10-02 00:28:52.966104', 'step': 10237, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:53.020817', 'step': 10237, 'epoch': 1}
{'type': 'loss', 'content': 0.14188896119594574, 'timestamp': '2025-10-02 00:28:53.023382', 'step': 10238, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:53.078162', 'step': 10238, 'epoch': 1}
{'type': 'loss', 'content': 0.016992967575788498, 'timestamp': '2025-10-02 00:28:53.087512', 'step': 10239, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:53.142258', 'step': 10239, 'epoch': 1}
{'type': 'loss', 'content': 0.12556324899196625, 'timestamp': '2025-10-02 00:28:53.148293', 'step': 10240, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:53.205546', 'step': 10240, 'epoch': 1}
{'type': 'loss', 'content': 0.04408567026257515, 'timestamp': '2025-10-02 00:28:53.215932', 'step': 10241, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:53.271146', 'step': 10241, 'epoch': 1}
{'type': 'loss', 'content': 0.045484162867069244, 'timestamp': '2025-10-02 00:28:53.273363', 'step': 10242, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:53.327496', 'step': 10242, 'epoch': 1}
{'type': 'loss', 'content': 0.08792618662118912, 'timestamp': '2025-10-02 00:28:53.333344', 'step': 10243, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:28:53.403867', 'step': 10243, 'epoch': 1}
{'type': 'loss', 'content': 0.015725702047348022, 'timestamp': '2025-10-02 00:28:53.417142', 'step': 10244, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:53.470728', 'step': 10244, 'epoch': 1}
{'type': 'loss', 'content': 0.064882792532444, 'timestamp': '2025-10-02 00:28:53.478285', 'step': 10245, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:28:53.540746', 'step': 10245, 'epoch': 1}
{'type': 'loss', 'content': 0.02895895391702652, 'timestamp': '2025-10-02 00:28:53.551423', 'step': 10246, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:53.616408', 'step': 10246, 'epoch': 1}
{'type': 'loss', 'content': 0.09920285642147064, 'timestamp': '2025-10-02 00:28:53.623639', 'step': 10247, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:53.678046', 'step': 10247, 'epoch': 1}
{'type': 'loss', 'content': 0.06011379510164261, 'timestamp': '2025-10-02 00:28:53.684046', 'step': 10248, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:53.738941', 'step': 10248, 'epoch': 1}
{'type': 'loss', 'content': 0.03898167982697487, 'timestamp': '2025-10-02 00:28:53.749209', 'step': 10249, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:53.803836', 'step': 10249, 'epoch': 1}
{'type': 'loss', 'content': 0.11621283739805222, 'timestamp': '2025-10-02 00:28:53.806796', 'step': 10250, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:53.864650', 'step': 10250, 'epoch': 1}
{'type': 'loss', 'content': 0.038955338299274445, 'timestamp': '2025-10-02 00:28:53.874021', 'step': 10251, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:53.936199', 'step': 10251, 'epoch': 1}
{'type': 'loss', 'content': 0.0817156732082367, 'timestamp': '2025-10-02 00:28:53.942175', 'step': 10252, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:53.999121', 'step': 10252, 'epoch': 1}
{'type': 'loss', 'content': 0.06973012536764145, 'timestamp': '2025-10-02 00:28:54.004443', 'step': 10253, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:54.061034', 'step': 10253, 'epoch': 1}
{'type': 'loss', 'content': 0.08420225977897644, 'timestamp': '2025-10-02 00:28:54.064311', 'step': 10254, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:54.119653', 'step': 10254, 'epoch': 1}
{'type': 'loss', 'content': 0.13111469149589539, 'timestamp': '2025-10-02 00:28:54.125632', 'step': 10255, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:54.184434', 'step': 10255, 'epoch': 1}
{'type': 'loss', 'content': 0.18409991264343262, 'timestamp': '2025-10-02 00:28:54.194173', 'step': 10256, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:54.250128', 'step': 10256, 'epoch': 1}
{'type': 'loss', 'content': 0.03885689377784729, 'timestamp': '2025-10-02 00:28:54.257645', 'step': 10257, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:54.317030', 'step': 10257, 'epoch': 1}
{'type': 'loss', 'content': 0.0462726466357708, 'timestamp': '2025-10-02 00:28:54.326652', 'step': 10258, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:54.384566', 'step': 10258, 'epoch': 1}
{'type': 'loss', 'content': 0.08700022101402283, 'timestamp': '2025-10-02 00:28:54.387558', 'step': 10259, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:54.444086', 'step': 10259, 'epoch': 1}
{'type': 'loss', 'content': 0.04027511179447174, 'timestamp': '2025-10-02 00:28:54.450041', 'step': 10260, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:54.506837', 'step': 10260, 'epoch': 1}
{'type': 'loss', 'content': 0.0495561845600605, 'timestamp': '2025-10-02 00:28:54.514356', 'step': 10261, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:54.570504', 'step': 10261, 'epoch': 1}
{'type': 'loss', 'content': 0.055824149399995804, 'timestamp': '2025-10-02 00:28:54.573906', 'step': 10262, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:54.630919', 'step': 10262, 'epoch': 1}
{'type': 'loss', 'content': 0.05036645755171776, 'timestamp': '2025-10-02 00:28:54.638068', 'step': 10263, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:54.694757', 'step': 10263, 'epoch': 1}
{'type': 'loss', 'content': 0.050553228706121445, 'timestamp': '2025-10-02 00:28:54.700753', 'step': 10264, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:54.758301', 'step': 10264, 'epoch': 1}
{'type': 'loss', 'content': 0.05547133833169937, 'timestamp': '2025-10-02 00:28:54.764210', 'step': 10265, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:54.821157', 'step': 10265, 'epoch': 1}
{'type': 'loss', 'content': 0.09638005495071411, 'timestamp': '2025-10-02 00:28:54.824320', 'step': 10266, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:54.881411', 'step': 10266, 'epoch': 1}
{'type': 'loss', 'content': 0.10147237777709961, 'timestamp': '2025-10-02 00:28:54.883711', 'step': 10267, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:54.939904', 'step': 10267, 'epoch': 1}
{'type': 'loss', 'content': 0.10012097656726837, 'timestamp': '2025-10-02 00:28:54.946567', 'step': 10268, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:55.004255', 'step': 10268, 'epoch': 1}
{'type': 'loss', 'content': 0.09131384640932083, 'timestamp': '2025-10-02 00:28:55.011692', 'step': 10269, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:55.068148', 'step': 10269, 'epoch': 1}
{'type': 'loss', 'content': 0.1514083743095398, 'timestamp': '2025-10-02 00:28:55.070727', 'step': 10270, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:55.133956', 'step': 10270, 'epoch': 1}
{'type': 'loss', 'content': 0.03576120734214783, 'timestamp': '2025-10-02 00:28:55.144448', 'step': 10271, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:55.213871', 'step': 10271, 'epoch': 1}
{'type': 'loss', 'content': 0.019405445083975792, 'timestamp': '2025-10-02 00:28:55.225119', 'step': 10272, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:55.286329', 'step': 10272, 'epoch': 1}
{'type': 'loss', 'content': 0.22032305598258972, 'timestamp': '2025-10-02 00:28:55.288814', 'step': 10273, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:55.344657', 'step': 10273, 'epoch': 1}
{'type': 'loss', 'content': 0.0455741249024868, 'timestamp': '2025-10-02 00:28:55.347013', 'step': 10274, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:55.402024', 'step': 10274, 'epoch': 1}
{'type': 'loss', 'content': 0.046803537756204605, 'timestamp': '2025-10-02 00:28:55.409369', 'step': 10275, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:55.465772', 'step': 10275, 'epoch': 1}
{'type': 'loss', 'content': 0.012254585511982441, 'timestamp': '2025-10-02 00:28:55.474181', 'step': 10276, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:55.530080', 'step': 10276, 'epoch': 1}
{'type': 'loss', 'content': 0.1240936815738678, 'timestamp': '2025-10-02 00:28:55.532472', 'step': 10277, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:55.586880', 'step': 10277, 'epoch': 1}
{'type': 'loss', 'content': 0.060528770089149475, 'timestamp': '2025-10-02 00:28:55.594432', 'step': 10278, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:55.650485', 'step': 10278, 'epoch': 1}
{'type': 'loss', 'content': 0.11533176898956299, 'timestamp': '2025-10-02 00:28:55.657579', 'step': 10279, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:55.714085', 'step': 10279, 'epoch': 1}
{'type': 'loss', 'content': 0.008518359623849392, 'timestamp': '2025-10-02 00:28:55.724305', 'step': 10280, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:55.779341', 'step': 10280, 'epoch': 1}
{'type': 'loss', 'content': 0.07894973456859589, 'timestamp': '2025-10-02 00:28:55.786982', 'step': 10281, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:55.842877', 'step': 10281, 'epoch': 1}
{'type': 'loss', 'content': 0.04310297220945358, 'timestamp': '2025-10-02 00:28:55.845331', 'step': 10282, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:55.903861', 'step': 10282, 'epoch': 1}
{'type': 'loss', 'content': 0.07814770936965942, 'timestamp': '2025-10-02 00:28:55.906332', 'step': 10283, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:55.962649', 'step': 10283, 'epoch': 1}
{'type': 'loss', 'content': 0.0212703850120306, 'timestamp': '2025-10-02 00:28:55.970826', 'step': 10284, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:56.025540', 'step': 10284, 'epoch': 1}
{'type': 'loss', 'content': 0.06615108996629715, 'timestamp': '2025-10-02 00:28:56.035821', 'step': 10285, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:56.090108', 'step': 10285, 'epoch': 1}
{'type': 'loss', 'content': 0.08661951869726181, 'timestamp': '2025-10-02 00:28:56.092545', 'step': 10286, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:56.147704', 'step': 10286, 'epoch': 1}
{'type': 'loss', 'content': 0.08944900333881378, 'timestamp': '2025-10-02 00:28:56.150355', 'step': 10287, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:56.210063', 'step': 10287, 'epoch': 1}
{'type': 'loss', 'content': 0.04171641543507576, 'timestamp': '2025-10-02 00:28:56.217165', 'step': 10288, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:56.283132', 'step': 10288, 'epoch': 1}
{'type': 'loss', 'content': 0.05465312302112579, 'timestamp': '2025-10-02 00:28:56.285526', 'step': 10289, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:56.340915', 'step': 10289, 'epoch': 1}
{'type': 'loss', 'content': 0.11965367943048477, 'timestamp': '2025-10-02 00:28:56.344518', 'step': 10290, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:56.400601', 'step': 10290, 'epoch': 1}
{'type': 'loss', 'content': 0.03274695947766304, 'timestamp': '2025-10-02 00:28:56.410111', 'step': 10291, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:56.464478', 'step': 10291, 'epoch': 1}
{'type': 'loss', 'content': 0.13056787848472595, 'timestamp': '2025-10-02 00:28:56.470255', 'step': 10292, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:56.524773', 'step': 10292, 'epoch': 1}
{'type': 'loss', 'content': 0.08126004040241241, 'timestamp': '2025-10-02 00:28:56.535132', 'step': 10293, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:56.589801', 'step': 10293, 'epoch': 1}
{'type': 'loss', 'content': 0.0405825674533844, 'timestamp': '2025-10-02 00:28:56.592871', 'step': 10294, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:56.655503', 'step': 10294, 'epoch': 1}
{'type': 'loss', 'content': 0.09531772136688232, 'timestamp': '2025-10-02 00:28:56.665964', 'step': 10295, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:56.721526', 'step': 10295, 'epoch': 1}
{'type': 'loss', 'content': 0.08290012180805206, 'timestamp': '2025-10-02 00:28:56.728372', 'step': 10296, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:56.781906', 'step': 10296, 'epoch': 1}
{'type': 'loss', 'content': 0.09116777777671814, 'timestamp': '2025-10-02 00:28:56.791470', 'step': 10297, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:56.847490', 'step': 10297, 'epoch': 1}
{'type': 'loss', 'content': 0.041485872119665146, 'timestamp': '2025-10-02 00:28:56.853108', 'step': 10298, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:56.907760', 'step': 10298, 'epoch': 1}
{'type': 'loss', 'content': 0.05865461751818657, 'timestamp': '2025-10-02 00:28:56.910052', 'step': 10299, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:56.964105', 'step': 10299, 'epoch': 1}
{'type': 'loss', 'content': 0.15030306577682495, 'timestamp': '2025-10-02 00:28:56.970289', 'step': 10300, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:57.023419', 'step': 10300, 'epoch': 1}
{'type': 'loss', 'content': 0.13946184515953064, 'timestamp': '2025-10-02 00:28:57.026477', 'step': 10301, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:57.082154', 'step': 10301, 'epoch': 1}
{'type': 'loss', 'content': 0.008120756596326828, 'timestamp': '2025-10-02 00:28:57.089839', 'step': 10302, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:28:57.159525', 'step': 10302, 'epoch': 1}
{'type': 'loss', 'content': 0.029450206086039543, 'timestamp': '2025-10-02 00:28:57.171877', 'step': 10303, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:57.227674', 'step': 10303, 'epoch': 1}
{'type': 'loss', 'content': 0.05568304285407066, 'timestamp': '2025-10-02 00:28:57.233987', 'step': 10304, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:57.288538', 'step': 10304, 'epoch': 1}
{'type': 'loss', 'content': 0.07356203347444534, 'timestamp': '2025-10-02 00:28:57.292899', 'step': 10305, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:57.352933', 'step': 10305, 'epoch': 1}
{'type': 'loss', 'content': 0.05140894651412964, 'timestamp': '2025-10-02 00:28:57.355499', 'step': 10306, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:57.415643', 'step': 10306, 'epoch': 1}
{'type': 'loss', 'content': 0.07930810749530792, 'timestamp': '2025-10-02 00:28:57.418165', 'step': 10307, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:57.472615', 'step': 10307, 'epoch': 1}
{'type': 'loss', 'content': 0.11239485442638397, 'timestamp': '2025-10-02 00:28:57.481001', 'step': 10308, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:57.539133', 'step': 10308, 'epoch': 1}
{'type': 'loss', 'content': 0.057385124266147614, 'timestamp': '2025-10-02 00:28:57.550059', 'step': 10309, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:57.603838', 'step': 10309, 'epoch': 1}
{'type': 'loss', 'content': 0.1385040134191513, 'timestamp': '2025-10-02 00:28:57.606452', 'step': 10310, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:57.663026', 'step': 10310, 'epoch': 1}
{'type': 'loss', 'content': 0.023574279621243477, 'timestamp': '2025-10-02 00:28:57.666088', 'step': 10311, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:57.721417', 'step': 10311, 'epoch': 1}
{'type': 'loss', 'content': 0.11701665073633194, 'timestamp': '2025-10-02 00:28:57.727731', 'step': 10312, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:57.781073', 'step': 10312, 'epoch': 1}
{'type': 'loss', 'content': 0.09869909286499023, 'timestamp': '2025-10-02 00:28:57.783464', 'step': 10313, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:57.837442', 'step': 10313, 'epoch': 1}
{'type': 'loss', 'content': 0.05006391182541847, 'timestamp': '2025-10-02 00:28:57.839842', 'step': 10314, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:57.893977', 'step': 10314, 'epoch': 1}
{'type': 'loss', 'content': 0.11183536797761917, 'timestamp': '2025-10-02 00:28:57.896323', 'step': 10315, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:57.951977', 'step': 10315, 'epoch': 1}
{'type': 'loss', 'content': 0.04243526607751846, 'timestamp': '2025-10-02 00:28:57.958768', 'step': 10316, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:58.011877', 'step': 10316, 'epoch': 1}
{'type': 'loss', 'content': 0.11209195107221603, 'timestamp': '2025-10-02 00:28:58.021489', 'step': 10317, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:58.081184', 'step': 10317, 'epoch': 1}
{'type': 'loss', 'content': 0.08867651969194412, 'timestamp': '2025-10-02 00:28:58.091394', 'step': 10318, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:28:58.147739', 'step': 10318, 'epoch': 1}
{'type': 'loss', 'content': 0.05014641210436821, 'timestamp': '2025-10-02 00:28:58.156955', 'step': 10319, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:28:58.223847', 'step': 10319, 'epoch': 1}
{'type': 'loss', 'content': 0.11986199766397476, 'timestamp': '2025-10-02 00:28:58.231488', 'step': 10320, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:58.289488', 'step': 10320, 'epoch': 1}
{'type': 'loss', 'content': 0.08720199763774872, 'timestamp': '2025-10-02 00:28:58.295276', 'step': 10321, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:58.349592', 'step': 10321, 'epoch': 1}
{'type': 'loss', 'content': 0.07929330319166183, 'timestamp': '2025-10-02 00:28:58.351955', 'step': 10322, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:58.406102', 'step': 10322, 'epoch': 1}
{'type': 'loss', 'content': 0.062168508768081665, 'timestamp': '2025-10-02 00:28:58.408650', 'step': 10323, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:58.463139', 'step': 10323, 'epoch': 1}
{'type': 'loss', 'content': 0.09776904433965683, 'timestamp': '2025-10-02 00:28:58.469099', 'step': 10324, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:28:58.540876', 'step': 10324, 'epoch': 1}
{'type': 'loss', 'content': 0.017466116696596146, 'timestamp': '2025-10-02 00:28:58.555297', 'step': 10325, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:58.609149', 'step': 10325, 'epoch': 1}
{'type': 'loss', 'content': 0.16123434901237488, 'timestamp': '2025-10-02 00:28:58.611937', 'step': 10326, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:58.675156', 'step': 10326, 'epoch': 1}
{'type': 'loss', 'content': 0.061846520751714706, 'timestamp': '2025-10-02 00:28:58.682542', 'step': 10327, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:58.736890', 'step': 10327, 'epoch': 1}
{'type': 'loss', 'content': 0.07429177314043045, 'timestamp': '2025-10-02 00:28:58.742721', 'step': 10328, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:28:58.797819', 'step': 10328, 'epoch': 1}
{'type': 'loss', 'content': 0.04354572296142578, 'timestamp': '2025-10-02 00:28:58.808094', 'step': 10329, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:58.868982', 'step': 10329, 'epoch': 1}
{'type': 'loss', 'content': 0.025556962937116623, 'timestamp': '2025-10-02 00:28:58.879189', 'step': 10330, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:28:58.933485', 'step': 10330, 'epoch': 1}
{'type': 'loss', 'content': 0.13607166707515717, 'timestamp': '2025-10-02 00:28:58.936022', 'step': 10331, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:28:58.990584', 'step': 10331, 'epoch': 1}
{'type': 'loss', 'content': 0.14477959275245667, 'timestamp': '2025-10-02 00:28:58.996154', 'step': 10332, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:28:59.050566', 'step': 10332, 'epoch': 1}
{'type': 'loss', 'content': 0.204985573887825, 'timestamp': '2025-10-02 00:28:59.053277', 'step': 10333, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:28:59.107802', 'step': 10333, 'epoch': 1}
{'type': 'loss', 'content': 0.12106986343860626, 'timestamp': '2025-10-02 00:28:59.110376', 'step': 10334, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:28:59.164293', 'step': 10334, 'epoch': 1}
{'type': 'loss', 'content': 0.11487279087305069, 'timestamp': '2025-10-02 00:28:59.166940', 'step': 10335, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:59.222338', 'step': 10335, 'epoch': 1}
{'type': 'loss', 'content': 0.0414191298186779, 'timestamp': '2025-10-02 00:28:59.229401', 'step': 10336, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:28:59.290115', 'step': 10336, 'epoch': 1}
{'type': 'loss', 'content': 0.02902907319366932, 'timestamp': '2025-10-02 00:28:59.301086', 'step': 10337, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:59.356262', 'step': 10337, 'epoch': 1}
{'type': 'loss', 'content': 0.014832044020295143, 'timestamp': '2025-10-02 00:28:59.359055', 'step': 10338, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:59.414061', 'step': 10338, 'epoch': 1}
{'type': 'loss', 'content': 0.02453489415347576, 'timestamp': '2025-10-02 00:28:59.421446', 'step': 10339, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:28:59.475977', 'step': 10339, 'epoch': 1}
{'type': 'loss', 'content': 0.06703915446996689, 'timestamp': '2025-10-02 00:28:59.481872', 'step': 10340, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:59.535889', 'step': 10340, 'epoch': 1}
{'type': 'loss', 'content': 0.12352292984724045, 'timestamp': '2025-10-02 00:28:59.538442', 'step': 10341, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:28:59.592863', 'step': 10341, 'epoch': 1}
{'type': 'loss', 'content': 0.05591211095452309, 'timestamp': '2025-10-02 00:28:59.598753', 'step': 10342, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:28:59.659885', 'step': 10342, 'epoch': 1}
{'type': 'loss', 'content': 0.03641355782747269, 'timestamp': '2025-10-02 00:28:59.670380', 'step': 10343, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:28:59.725157', 'step': 10343, 'epoch': 1}
{'type': 'loss', 'content': 0.1863178014755249, 'timestamp': '2025-10-02 00:28:59.731191', 'step': 10344, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:28:59.787688', 'step': 10344, 'epoch': 1}
{'type': 'loss', 'content': 0.09502004832029343, 'timestamp': '2025-10-02 00:28:59.790047', 'step': 10345, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:59.844482', 'step': 10345, 'epoch': 1}
{'type': 'loss', 'content': 0.018726356327533722, 'timestamp': '2025-10-02 00:28:59.851685', 'step': 10346, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:28:59.906196', 'step': 10346, 'epoch': 1}
{'type': 'loss', 'content': 0.08099231868982315, 'timestamp': '2025-10-02 00:28:59.908544', 'step': 10347, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:28:59.962841', 'step': 10347, 'epoch': 1}
{'type': 'loss', 'content': 0.08055561035871506, 'timestamp': '2025-10-02 00:28:59.971092', 'step': 10348, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:00.025625', 'step': 10348, 'epoch': 1}
{'type': 'loss', 'content': 0.12646649777889252, 'timestamp': '2025-10-02 00:29:00.028289', 'step': 10349, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:00.082592', 'step': 10349, 'epoch': 1}
{'type': 'loss', 'content': 0.0445115827023983, 'timestamp': '2025-10-02 00:29:00.084795', 'step': 10350, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:00.139242', 'step': 10350, 'epoch': 1}
{'type': 'loss', 'content': 0.10168148577213287, 'timestamp': '2025-10-02 00:29:00.142012', 'step': 10351, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:00.197119', 'step': 10351, 'epoch': 1}
{'type': 'loss', 'content': 0.07221349328756332, 'timestamp': '2025-10-02 00:29:00.204210', 'step': 10352, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:00.260776', 'step': 10352, 'epoch': 1}
{'type': 'loss', 'content': 0.05947931483387947, 'timestamp': '2025-10-02 00:29:00.264578', 'step': 10353, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:00.319878', 'step': 10353, 'epoch': 1}
{'type': 'loss', 'content': 0.07325295358896255, 'timestamp': '2025-10-02 00:29:00.322210', 'step': 10354, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:00.376782', 'step': 10354, 'epoch': 1}
{'type': 'loss', 'content': 0.04072757437825203, 'timestamp': '2025-10-02 00:29:00.379745', 'step': 10355, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:00.433979', 'step': 10355, 'epoch': 1}
{'type': 'loss', 'content': 0.014088794589042664, 'timestamp': '2025-10-02 00:29:00.442412', 'step': 10356, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:00.495837', 'step': 10356, 'epoch': 1}
{'type': 'loss', 'content': 0.03974146023392677, 'timestamp': '2025-10-02 00:29:00.501938', 'step': 10357, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:00.556764', 'step': 10357, 'epoch': 1}
{'type': 'loss', 'content': 0.0911392942070961, 'timestamp': '2025-10-02 00:29:00.559743', 'step': 10358, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:00.614642', 'step': 10358, 'epoch': 1}
{'type': 'loss', 'content': 0.054143473505973816, 'timestamp': '2025-10-02 00:29:00.616686', 'step': 10359, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:00.670702', 'step': 10359, 'epoch': 1}
{'type': 'loss', 'content': 0.06829674541950226, 'timestamp': '2025-10-02 00:29:00.679205', 'step': 10360, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:00.734231', 'step': 10360, 'epoch': 1}
{'type': 'loss', 'content': 0.1771017611026764, 'timestamp': '2025-10-02 00:29:00.736556', 'step': 10361, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:00.791374', 'step': 10361, 'epoch': 1}
{'type': 'loss', 'content': 0.013312273658812046, 'timestamp': '2025-10-02 00:29:00.799346', 'step': 10362, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:00.854439', 'step': 10362, 'epoch': 1}
{'type': 'loss', 'content': 0.20867718756198883, 'timestamp': '2025-10-02 00:29:00.857002', 'step': 10363, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:00.910664', 'step': 10363, 'epoch': 1}
{'type': 'loss', 'content': 0.11627206951379776, 'timestamp': '2025-10-02 00:29:00.916667', 'step': 10364, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:00.969397', 'step': 10364, 'epoch': 1}
{'type': 'loss', 'content': 0.06526338309049606, 'timestamp': '2025-10-02 00:29:00.975711', 'step': 10365, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:01.030260', 'step': 10365, 'epoch': 1}
{'type': 'loss', 'content': 0.07479732483625412, 'timestamp': '2025-10-02 00:29:01.038001', 'step': 10366, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:29:01.100129', 'step': 10366, 'epoch': 1}
{'type': 'loss', 'content': 0.029922109097242355, 'timestamp': '2025-10-02 00:29:01.111009', 'step': 10367, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:01.172644', 'step': 10367, 'epoch': 1}
{'type': 'loss', 'content': 0.03641042113304138, 'timestamp': '2025-10-02 00:29:01.183660', 'step': 10368, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:01.236382', 'step': 10368, 'epoch': 1}
{'type': 'loss', 'content': 0.13113118708133698, 'timestamp': '2025-10-02 00:29:01.238841', 'step': 10369, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:01.295174', 'step': 10369, 'epoch': 1}
{'type': 'loss', 'content': 0.06850909441709518, 'timestamp': '2025-10-02 00:29:01.297360', 'step': 10370, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:01.352745', 'step': 10370, 'epoch': 1}
{'type': 'loss', 'content': 0.0739695280790329, 'timestamp': '2025-10-02 00:29:01.362248', 'step': 10371, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:01.416780', 'step': 10371, 'epoch': 1}
{'type': 'loss', 'content': 0.16362646222114563, 'timestamp': '2025-10-02 00:29:01.422565', 'step': 10372, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:01.484185', 'step': 10372, 'epoch': 1}
{'type': 'loss', 'content': 0.09017015993595123, 'timestamp': '2025-10-02 00:29:01.495188', 'step': 10373, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:01.550815', 'step': 10373, 'epoch': 1}
{'type': 'loss', 'content': 0.10298562794923782, 'timestamp': '2025-10-02 00:29:01.558239', 'step': 10374, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:01.620580', 'step': 10374, 'epoch': 1}
{'type': 'loss', 'content': 0.06101760268211365, 'timestamp': '2025-10-02 00:29:01.631367', 'step': 10375, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:01.685418', 'step': 10375, 'epoch': 1}
{'type': 'loss', 'content': 0.027916274964809418, 'timestamp': '2025-10-02 00:29:01.693864', 'step': 10376, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:01.748500', 'step': 10376, 'epoch': 1}
{'type': 'loss', 'content': 0.08929304778575897, 'timestamp': '2025-10-02 00:29:01.751214', 'step': 10377, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:01.804852', 'step': 10377, 'epoch': 1}
{'type': 'loss', 'content': 0.1488071233034134, 'timestamp': '2025-10-02 00:29:01.809473', 'step': 10378, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:01.864077', 'step': 10378, 'epoch': 1}
{'type': 'loss', 'content': 0.03583916649222374, 'timestamp': '2025-10-02 00:29:01.866853', 'step': 10379, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:01.921430', 'step': 10379, 'epoch': 1}
{'type': 'loss', 'content': 0.03815816342830658, 'timestamp': '2025-10-02 00:29:01.930007', 'step': 10380, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:01.984125', 'step': 10380, 'epoch': 1}
{'type': 'loss', 'content': 0.056874845176935196, 'timestamp': '2025-10-02 00:29:01.986596', 'step': 10381, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:02.040590', 'step': 10381, 'epoch': 1}
{'type': 'loss', 'content': 0.1329449713230133, 'timestamp': '2025-10-02 00:29:02.043349', 'step': 10382, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:02.098116', 'step': 10382, 'epoch': 1}
{'type': 'loss', 'content': 0.0357046015560627, 'timestamp': '2025-10-02 00:29:02.107489', 'step': 10383, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:02.161190', 'step': 10383, 'epoch': 1}
{'type': 'loss', 'content': 0.16783466935157776, 'timestamp': '2025-10-02 00:29:02.168418', 'step': 10384, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:02.220901', 'step': 10384, 'epoch': 1}
{'type': 'loss', 'content': 0.14118336141109467, 'timestamp': '2025-10-02 00:29:02.224682', 'step': 10385, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:02.281597', 'step': 10385, 'epoch': 1}
{'type': 'loss', 'content': 0.04717576503753662, 'timestamp': '2025-10-02 00:29:02.284417', 'step': 10386, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:02.340697', 'step': 10386, 'epoch': 1}
{'type': 'loss', 'content': 0.006261874921619892, 'timestamp': '2025-10-02 00:29:02.350197', 'step': 10387, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:02.405083', 'step': 10387, 'epoch': 1}
{'type': 'loss', 'content': 0.03740333393216133, 'timestamp': '2025-10-02 00:29:02.411568', 'step': 10388, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:02.465502', 'step': 10388, 'epoch': 1}
{'type': 'loss', 'content': 0.08827023953199387, 'timestamp': '2025-10-02 00:29:02.468119', 'step': 10389, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:02.523468', 'step': 10389, 'epoch': 1}
{'type': 'loss', 'content': 0.12125100195407867, 'timestamp': '2025-10-02 00:29:02.525593', 'step': 10390, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:02.580050', 'step': 10390, 'epoch': 1}
{'type': 'loss', 'content': 0.03451148793101311, 'timestamp': '2025-10-02 00:29:02.586157', 'step': 10391, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:02.640658', 'step': 10391, 'epoch': 1}
{'type': 'loss', 'content': 0.07150071859359741, 'timestamp': '2025-10-02 00:29:02.650779', 'step': 10392, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:02.705265', 'step': 10392, 'epoch': 1}
{'type': 'loss', 'content': 0.04043683037161827, 'timestamp': '2025-10-02 00:29:02.707922', 'step': 10393, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:02.762935', 'step': 10393, 'epoch': 1}
{'type': 'loss', 'content': 0.17586643993854523, 'timestamp': '2025-10-02 00:29:02.765441', 'step': 10394, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:02.820386', 'step': 10394, 'epoch': 1}
{'type': 'loss', 'content': 0.018515169620513916, 'timestamp': '2025-10-02 00:29:02.822794', 'step': 10395, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:02.878036', 'step': 10395, 'epoch': 1}
{'type': 'loss', 'content': 0.03456853702664375, 'timestamp': '2025-10-02 00:29:02.884262', 'step': 10396, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:02.938221', 'step': 10396, 'epoch': 1}
{'type': 'loss', 'content': 0.05819449946284294, 'timestamp': '2025-10-02 00:29:02.941113', 'step': 10397, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:02.996078', 'step': 10397, 'epoch': 1}
{'type': 'loss', 'content': 0.11395496129989624, 'timestamp': '2025-10-02 00:29:03.005472', 'step': 10398, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:03.060915', 'step': 10398, 'epoch': 1}
{'type': 'loss', 'content': 0.08423619717359543, 'timestamp': '2025-10-02 00:29:03.068488', 'step': 10399, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:03.122870', 'step': 10399, 'epoch': 1}
{'type': 'loss', 'content': 0.1355048567056656, 'timestamp': '2025-10-02 00:29:03.128868', 'step': 10400, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:03.181749', 'step': 10400, 'epoch': 1}
{'type': 'loss', 'content': 0.21918396651744843, 'timestamp': '2025-10-02 00:29:03.183964', 'step': 10401, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:03.238113', 'step': 10401, 'epoch': 1}
{'type': 'loss', 'content': 0.12871068716049194, 'timestamp': '2025-10-02 00:29:03.241227', 'step': 10402, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:03.304714', 'step': 10402, 'epoch': 1}
{'type': 'loss', 'content': 0.06352782249450684, 'timestamp': '2025-10-02 00:29:03.315360', 'step': 10403, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:03.371069', 'step': 10403, 'epoch': 1}
{'type': 'loss', 'content': 0.039701927453279495, 'timestamp': '2025-10-02 00:29:03.381238', 'step': 10404, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:03.435432', 'step': 10404, 'epoch': 1}
{'type': 'loss', 'content': 0.09054934233427048, 'timestamp': '2025-10-02 00:29:03.438003', 'step': 10405, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:03.491795', 'step': 10405, 'epoch': 1}
{'type': 'loss', 'content': 0.10759179294109344, 'timestamp': '2025-10-02 00:29:03.494254', 'step': 10406, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:03.549715', 'step': 10406, 'epoch': 1}
{'type': 'loss', 'content': 0.0384233258664608, 'timestamp': '2025-10-02 00:29:03.553647', 'step': 10407, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:03.607652', 'step': 10407, 'epoch': 1}
{'type': 'loss', 'content': 0.10209112614393234, 'timestamp': '2025-10-02 00:29:03.613808', 'step': 10408, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:03.667881', 'step': 10408, 'epoch': 1}
{'type': 'loss', 'content': 0.07581152021884918, 'timestamp': '2025-10-02 00:29:03.677696', 'step': 10409, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:03.732657', 'step': 10409, 'epoch': 1}
{'type': 'loss', 'content': 0.16089127957820892, 'timestamp': '2025-10-02 00:29:03.735047', 'step': 10410, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:03.788680', 'step': 10410, 'epoch': 1}
{'type': 'loss', 'content': 0.15180395543575287, 'timestamp': '2025-10-02 00:29:03.791294', 'step': 10411, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:03.845822', 'step': 10411, 'epoch': 1}
{'type': 'loss', 'content': 0.035720095038414, 'timestamp': '2025-10-02 00:29:03.853220', 'step': 10412, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:03.908584', 'step': 10412, 'epoch': 1}
{'type': 'loss', 'content': 0.0799013078212738, 'timestamp': '2025-10-02 00:29:03.911361', 'step': 10413, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:03.966012', 'step': 10413, 'epoch': 1}
{'type': 'loss', 'content': 0.04162447899580002, 'timestamp': '2025-10-02 00:29:03.968843', 'step': 10414, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:04.027649', 'step': 10414, 'epoch': 1}
{'type': 'loss', 'content': 0.019039040431380272, 'timestamp': '2025-10-02 00:29:04.037094', 'step': 10415, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:04.102114', 'step': 10415, 'epoch': 1}
{'type': 'loss', 'content': 0.03443701192736626, 'timestamp': '2025-10-02 00:29:04.113341', 'step': 10416, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:04.170411', 'step': 10416, 'epoch': 1}
{'type': 'loss', 'content': 0.09362480044364929, 'timestamp': '2025-10-02 00:29:04.178408', 'step': 10417, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:04.235623', 'step': 10417, 'epoch': 1}
{'type': 'loss', 'content': 0.03983429819345474, 'timestamp': '2025-10-02 00:29:04.240416', 'step': 10418, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:04.301868', 'step': 10418, 'epoch': 1}
{'type': 'loss', 'content': 0.11503872275352478, 'timestamp': '2025-10-02 00:29:04.306531', 'step': 10419, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:04.367910', 'step': 10419, 'epoch': 1}
{'type': 'loss', 'content': 0.08259939402341843, 'timestamp': '2025-10-02 00:29:04.375709', 'step': 10420, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:04.431065', 'step': 10420, 'epoch': 1}
{'type': 'loss', 'content': 0.08260203897953033, 'timestamp': '2025-10-02 00:29:04.437433', 'step': 10421, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:04.492605', 'step': 10421, 'epoch': 1}
{'type': 'loss', 'content': 0.1528140902519226, 'timestamp': '2025-10-02 00:29:04.494977', 'step': 10422, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:04.551585', 'step': 10422, 'epoch': 1}
{'type': 'loss', 'content': 0.027298860251903534, 'timestamp': '2025-10-02 00:29:04.554765', 'step': 10423, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:04.614979', 'step': 10423, 'epoch': 1}
{'type': 'loss', 'content': 0.02765485644340515, 'timestamp': '2025-10-02 00:29:04.621755', 'step': 10424, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:04.678026', 'step': 10424, 'epoch': 1}
{'type': 'loss', 'content': 0.07263974100351334, 'timestamp': '2025-10-02 00:29:04.680784', 'step': 10425, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:04.739191', 'step': 10425, 'epoch': 1}
{'type': 'loss', 'content': 0.03431322053074837, 'timestamp': '2025-10-02 00:29:04.748772', 'step': 10426, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:04.805588', 'step': 10426, 'epoch': 1}
{'type': 'loss', 'content': 0.08591753244400024, 'timestamp': '2025-10-02 00:29:04.809524', 'step': 10427, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:04.873540', 'step': 10427, 'epoch': 1}
{'type': 'loss', 'content': 0.04524712264537811, 'timestamp': '2025-10-02 00:29:04.884530', 'step': 10428, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:04.940369', 'step': 10428, 'epoch': 1}
{'type': 'loss', 'content': 0.08398611098527908, 'timestamp': '2025-10-02 00:29:04.943143', 'step': 10429, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:04.999312', 'step': 10429, 'epoch': 1}
{'type': 'loss', 'content': 0.09987091273069382, 'timestamp': '2025-10-02 00:29:05.002448', 'step': 10430, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:05.059592', 'step': 10430, 'epoch': 1}
{'type': 'loss', 'content': 0.03895598277449608, 'timestamp': '2025-10-02 00:29:05.066857', 'step': 10431, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:05.125394', 'step': 10431, 'epoch': 1}
{'type': 'loss', 'content': 0.1848248839378357, 'timestamp': '2025-10-02 00:29:05.132644', 'step': 10432, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:05.194596', 'step': 10432, 'epoch': 1}
{'type': 'loss', 'content': 0.06868354231119156, 'timestamp': '2025-10-02 00:29:05.206150', 'step': 10433, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:05.262323', 'step': 10433, 'epoch': 1}
{'type': 'loss', 'content': 0.03597547486424446, 'timestamp': '2025-10-02 00:29:05.265289', 'step': 10434, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:05.325359', 'step': 10434, 'epoch': 1}
{'type': 'loss', 'content': 0.14170803129673004, 'timestamp': '2025-10-02 00:29:05.329451', 'step': 10435, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:05.387774', 'step': 10435, 'epoch': 1}
{'type': 'loss', 'content': 0.02268826588988304, 'timestamp': '2025-10-02 00:29:05.394818', 'step': 10436, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:05.450165', 'step': 10436, 'epoch': 1}
{'type': 'loss', 'content': 0.17026029527187347, 'timestamp': '2025-10-02 00:29:05.453054', 'step': 10437, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:05.509206', 'step': 10437, 'epoch': 1}
{'type': 'loss', 'content': 0.08456245064735413, 'timestamp': '2025-10-02 00:29:05.512857', 'step': 10438, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:05.569887', 'step': 10438, 'epoch': 1}
{'type': 'loss', 'content': 0.0503188893198967, 'timestamp': '2025-10-02 00:29:05.573539', 'step': 10439, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:05.635292', 'step': 10439, 'epoch': 1}
{'type': 'loss', 'content': 0.07180079072713852, 'timestamp': '2025-10-02 00:29:05.641094', 'step': 10440, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:29:32.927076', 'step': 10440, 'epoch': 1}
{'type': 'pplx', 'content': 103.70469584528894, 'timestamp': '2025-10-02 00:29:32.931622', 'step': 10440, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:32.988625', 'step': 10440, 'epoch': 1}
{'type': 'loss', 'content': 0.06279435008764267, 'timestamp': '2025-10-02 00:29:32.992288', 'step': 10441, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:33.051678', 'step': 10441, 'epoch': 1}
{'type': 'loss', 'content': 0.06452818959951401, 'timestamp': '2025-10-02 00:29:33.054701', 'step': 10442, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:33.111601', 'step': 10442, 'epoch': 1}
{'type': 'loss', 'content': 0.05084478482604027, 'timestamp': '2025-10-02 00:29:33.119493', 'step': 10443, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:33.185793', 'step': 10443, 'epoch': 1}
{'type': 'loss', 'content': 0.08759081363677979, 'timestamp': '2025-10-02 00:29:33.199888', 'step': 10444, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:33.271479', 'step': 10444, 'epoch': 1}
{'type': 'loss', 'content': 0.23764100670814514, 'timestamp': '2025-10-02 00:29:33.274911', 'step': 10445, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:33.345151', 'step': 10445, 'epoch': 1}
{'type': 'loss', 'content': 0.10670021176338196, 'timestamp': '2025-10-02 00:29:33.348638', 'step': 10446, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:33.438144', 'step': 10446, 'epoch': 1}
{'type': 'loss', 'content': 0.041489437222480774, 'timestamp': '2025-10-02 00:29:33.448356', 'step': 10447, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:33.517401', 'step': 10447, 'epoch': 1}
{'type': 'loss', 'content': 0.12756487727165222, 'timestamp': '2025-10-02 00:29:33.524425', 'step': 10448, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:33.601027', 'step': 10448, 'epoch': 1}
{'type': 'loss', 'content': 0.05058014765381813, 'timestamp': '2025-10-02 00:29:33.612382', 'step': 10449, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:33.670417', 'step': 10449, 'epoch': 1}
{'type': 'loss', 'content': 0.1342848837375641, 'timestamp': '2025-10-02 00:29:33.673108', 'step': 10450, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:33.736159', 'step': 10450, 'epoch': 1}
{'type': 'loss', 'content': 0.06108349189162254, 'timestamp': '2025-10-02 00:29:33.742199', 'step': 10451, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:33.819622', 'step': 10451, 'epoch': 1}
{'type': 'loss', 'content': 0.05617585405707359, 'timestamp': '2025-10-02 00:29:33.831738', 'step': 10452, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:33.894634', 'step': 10452, 'epoch': 1}
{'type': 'loss', 'content': 0.12056280672550201, 'timestamp': '2025-10-02 00:29:33.902112', 'step': 10453, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:33.978378', 'step': 10453, 'epoch': 1}
{'type': 'loss', 'content': 0.035677578300237656, 'timestamp': '2025-10-02 00:29:33.986046', 'step': 10454, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:34.046855', 'step': 10454, 'epoch': 1}
{'type': 'loss', 'content': 0.06067923083901405, 'timestamp': '2025-10-02 00:29:34.049442', 'step': 10455, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:34.126334', 'step': 10455, 'epoch': 1}
{'type': 'loss', 'content': 0.011199003085494041, 'timestamp': '2025-10-02 00:29:34.136468', 'step': 10456, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:34.203740', 'step': 10456, 'epoch': 1}
{'type': 'loss', 'content': 0.0450369268655777, 'timestamp': '2025-10-02 00:29:34.213200', 'step': 10457, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:34.283134', 'step': 10457, 'epoch': 1}
{'type': 'loss', 'content': 0.07837426662445068, 'timestamp': '2025-10-02 00:29:34.289319', 'step': 10458, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:34.364960', 'step': 10458, 'epoch': 1}
{'type': 'loss', 'content': 0.07098441570997238, 'timestamp': '2025-10-02 00:29:34.374381', 'step': 10459, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:34.440046', 'step': 10459, 'epoch': 1}
{'type': 'loss', 'content': 0.04785216227173805, 'timestamp': '2025-10-02 00:29:34.448667', 'step': 10460, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:34.505683', 'step': 10460, 'epoch': 1}
{'type': 'loss', 'content': 0.08768310397863388, 'timestamp': '2025-10-02 00:29:34.523655', 'step': 10461, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:34.588607', 'step': 10461, 'epoch': 1}
{'type': 'loss', 'content': 0.13518399000167847, 'timestamp': '2025-10-02 00:29:34.594046', 'step': 10462, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:34.657566', 'step': 10462, 'epoch': 1}
{'type': 'loss', 'content': 0.1789073497056961, 'timestamp': '2025-10-02 00:29:34.662797', 'step': 10463, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:34.724701', 'step': 10463, 'epoch': 1}
{'type': 'loss', 'content': 0.07286839187145233, 'timestamp': '2025-10-02 00:29:34.736331', 'step': 10464, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:34.807779', 'step': 10464, 'epoch': 1}
{'type': 'loss', 'content': 0.1299494355916977, 'timestamp': '2025-10-02 00:29:34.811934', 'step': 10465, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:34.884074', 'step': 10465, 'epoch': 1}
{'type': 'loss', 'content': 0.0731414183974266, 'timestamp': '2025-10-02 00:29:34.894537', 'step': 10466, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:34.964090', 'step': 10466, 'epoch': 1}
{'type': 'loss', 'content': 0.044342998415231705, 'timestamp': '2025-10-02 00:29:34.973611', 'step': 10467, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:35.039066', 'step': 10467, 'epoch': 1}
{'type': 'loss', 'content': 0.10159424692392349, 'timestamp': '2025-10-02 00:29:35.045322', 'step': 10468, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:35.107656', 'step': 10468, 'epoch': 1}
{'type': 'loss', 'content': 0.06775578111410141, 'timestamp': '2025-10-02 00:29:35.110727', 'step': 10469, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:35.166507', 'step': 10469, 'epoch': 1}
{'type': 'loss', 'content': 0.08375608921051025, 'timestamp': '2025-10-02 00:29:35.168831', 'step': 10470, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:35.225776', 'step': 10470, 'epoch': 1}
{'type': 'loss', 'content': 0.1251792311668396, 'timestamp': '2025-10-02 00:29:35.229453', 'step': 10471, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:35.285431', 'step': 10471, 'epoch': 1}
{'type': 'loss', 'content': 0.06784078478813171, 'timestamp': '2025-10-02 00:29:35.291472', 'step': 10472, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:35.354873', 'step': 10472, 'epoch': 1}
{'type': 'loss', 'content': 0.2204585075378418, 'timestamp': '2025-10-02 00:29:35.357948', 'step': 10473, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:35.421772', 'step': 10473, 'epoch': 1}
{'type': 'loss', 'content': 0.05124931409955025, 'timestamp': '2025-10-02 00:29:35.424710', 'step': 10474, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:35.484992', 'step': 10474, 'epoch': 1}
{'type': 'loss', 'content': 0.0733199194073677, 'timestamp': '2025-10-02 00:29:35.487948', 'step': 10475, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:35.555716', 'step': 10475, 'epoch': 1}
{'type': 'loss', 'content': 0.08270637691020966, 'timestamp': '2025-10-02 00:29:35.566795', 'step': 10476, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:35.624075', 'step': 10476, 'epoch': 1}
{'type': 'loss', 'content': 0.05714259669184685, 'timestamp': '2025-10-02 00:29:35.634349', 'step': 10477, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:35.698000', 'step': 10477, 'epoch': 1}
{'type': 'loss', 'content': 0.12924884259700775, 'timestamp': '2025-10-02 00:29:35.701178', 'step': 10478, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:35.767025', 'step': 10478, 'epoch': 1}
{'type': 'loss', 'content': 0.1782321333885193, 'timestamp': '2025-10-02 00:29:35.772235', 'step': 10479, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:35.829722', 'step': 10479, 'epoch': 1}
{'type': 'loss', 'content': 0.05590210482478142, 'timestamp': '2025-10-02 00:29:35.836348', 'step': 10480, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:29:35.899375', 'step': 10480, 'epoch': 1}
{'type': 'loss', 'content': 0.025495631620287895, 'timestamp': '2025-10-02 00:29:35.911181', 'step': 10481, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:35.974198', 'step': 10481, 'epoch': 1}
{'type': 'loss', 'content': 0.05026915296912193, 'timestamp': '2025-10-02 00:29:35.977827', 'step': 10482, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:36.039311', 'step': 10482, 'epoch': 1}
{'type': 'loss', 'content': 0.040988147258758545, 'timestamp': '2025-10-02 00:29:36.047305', 'step': 10483, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:29:36.102372', 'step': 10483, 'epoch': 1}
{'type': 'loss', 'content': 0.13715411722660065, 'timestamp': '2025-10-02 00:29:36.113303', 'step': 10484, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:36.170089', 'step': 10484, 'epoch': 1}
{'type': 'loss', 'content': 0.017874548211693764, 'timestamp': '2025-10-02 00:29:36.178019', 'step': 10485, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:36.233313', 'step': 10485, 'epoch': 1}
{'type': 'loss', 'content': 0.1662917137145996, 'timestamp': '2025-10-02 00:29:36.236986', 'step': 10486, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:36.294722', 'step': 10486, 'epoch': 1}
{'type': 'loss', 'content': 0.025049667805433273, 'timestamp': '2025-10-02 00:29:36.302439', 'step': 10487, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:36.362824', 'step': 10487, 'epoch': 1}
{'type': 'loss', 'content': 0.1270296722650528, 'timestamp': '2025-10-02 00:29:36.373965', 'step': 10488, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:36.442510', 'step': 10488, 'epoch': 1}
{'type': 'loss', 'content': 0.04812599718570709, 'timestamp': '2025-10-02 00:29:36.449048', 'step': 10489, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:36.513919', 'step': 10489, 'epoch': 1}
{'type': 'loss', 'content': 0.04647164046764374, 'timestamp': '2025-10-02 00:29:36.517453', 'step': 10490, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:36.587674', 'step': 10490, 'epoch': 1}
{'type': 'loss', 'content': 0.05567440018057823, 'timestamp': '2025-10-02 00:29:36.597194', 'step': 10491, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:36.661074', 'step': 10491, 'epoch': 1}
{'type': 'loss', 'content': 0.015618694014847279, 'timestamp': '2025-10-02 00:29:36.672011', 'step': 10492, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:29:36.731955', 'step': 10492, 'epoch': 1}
{'type': 'loss', 'content': 0.16177290678024292, 'timestamp': '2025-10-02 00:29:36.745764', 'step': 10493, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:36.819639', 'step': 10493, 'epoch': 1}
{'type': 'loss', 'content': 0.20809289813041687, 'timestamp': '2025-10-02 00:29:36.828994', 'step': 10494, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:36.901476', 'step': 10494, 'epoch': 1}
{'type': 'loss', 'content': 0.31067079305648804, 'timestamp': '2025-10-02 00:29:36.904995', 'step': 10495, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:36.978669', 'step': 10495, 'epoch': 1}
{'type': 'loss', 'content': 0.055177778005599976, 'timestamp': '2025-10-02 00:29:36.984892', 'step': 10496, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:37.051464', 'step': 10496, 'epoch': 1}
{'type': 'loss', 'content': 0.02193469926714897, 'timestamp': '2025-10-02 00:29:37.061508', 'step': 10497, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:37.144370', 'step': 10497, 'epoch': 1}
{'type': 'loss', 'content': 0.09323987364768982, 'timestamp': '2025-10-02 00:29:37.153264', 'step': 10498, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:37.217148', 'step': 10498, 'epoch': 1}
{'type': 'loss', 'content': 0.05285642668604851, 'timestamp': '2025-10-02 00:29:37.220156', 'step': 10499, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:37.287274', 'step': 10499, 'epoch': 1}
{'type': 'loss', 'content': 0.029423197731375694, 'timestamp': '2025-10-02 00:29:37.298584', 'step': 10500, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 10500', 'timestamp': '2025-10-02 00:29:37.757457', 'step': 10500, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:37.818043', 'step': 10500, 'epoch': 1}
{'type': 'loss', 'content': 0.11686193943023682, 'timestamp': '2025-10-02 00:29:37.824841', 'step': 10501, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:37.888858', 'step': 10501, 'epoch': 1}
{'type': 'loss', 'content': 0.018418487161397934, 'timestamp': '2025-10-02 00:29:37.892270', 'step': 10502, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:37.948743', 'step': 10502, 'epoch': 1}
{'type': 'loss', 'content': 0.0476333424448967, 'timestamp': '2025-10-02 00:29:37.952834', 'step': 10503, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:38.014729', 'step': 10503, 'epoch': 1}
{'type': 'loss', 'content': 0.07091522961854935, 'timestamp': '2025-10-02 00:29:38.024409', 'step': 10504, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:38.081848', 'step': 10504, 'epoch': 1}
{'type': 'loss', 'content': 0.1978292614221573, 'timestamp': '2025-10-02 00:29:38.085345', 'step': 10505, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:38.157512', 'step': 10505, 'epoch': 1}
{'type': 'loss', 'content': 0.021269820630550385, 'timestamp': '2025-10-02 00:29:38.167074', 'step': 10506, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:38.233348', 'step': 10506, 'epoch': 1}
{'type': 'loss', 'content': 0.04282668977975845, 'timestamp': '2025-10-02 00:29:38.239691', 'step': 10507, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:38.299858', 'step': 10507, 'epoch': 1}
{'type': 'loss', 'content': 0.014151555486023426, 'timestamp': '2025-10-02 00:29:38.310022', 'step': 10508, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:38.366635', 'step': 10508, 'epoch': 1}
{'type': 'loss', 'content': 0.11098997294902802, 'timestamp': '2025-10-02 00:29:38.372657', 'step': 10509, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:38.439080', 'step': 10509, 'epoch': 1}
{'type': 'loss', 'content': 0.052789874374866486, 'timestamp': '2025-10-02 00:29:38.448439', 'step': 10510, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:38.518291', 'step': 10510, 'epoch': 1}
{'type': 'loss', 'content': 0.09004044532775879, 'timestamp': '2025-10-02 00:29:38.524153', 'step': 10511, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:38.590937', 'step': 10511, 'epoch': 1}
{'type': 'loss', 'content': 0.017773369327187538, 'timestamp': '2025-10-02 00:29:38.601938', 'step': 10512, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:38.666072', 'step': 10512, 'epoch': 1}
{'type': 'loss', 'content': 0.11702252924442291, 'timestamp': '2025-10-02 00:29:38.672863', 'step': 10513, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:38.740830', 'step': 10513, 'epoch': 1}
{'type': 'loss', 'content': 0.0950736477971077, 'timestamp': '2025-10-02 00:29:38.743987', 'step': 10514, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:38.814656', 'step': 10514, 'epoch': 1}
{'type': 'loss', 'content': 0.06587973982095718, 'timestamp': '2025-10-02 00:29:38.817913', 'step': 10515, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:38.877215', 'step': 10515, 'epoch': 1}
{'type': 'loss', 'content': 0.2308431714773178, 'timestamp': '2025-10-02 00:29:38.883731', 'step': 10516, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:38.939073', 'step': 10516, 'epoch': 1}
{'type': 'loss', 'content': 0.08329788595438004, 'timestamp': '2025-10-02 00:29:38.949046', 'step': 10517, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:39.019201', 'step': 10517, 'epoch': 1}
{'type': 'loss', 'content': 0.029146812856197357, 'timestamp': '2025-10-02 00:29:39.026315', 'step': 10518, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:39.092441', 'step': 10518, 'epoch': 1}
{'type': 'loss', 'content': 0.06545693427324295, 'timestamp': '2025-10-02 00:29:39.100390', 'step': 10519, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:39.162837', 'step': 10519, 'epoch': 1}
{'type': 'loss', 'content': 0.11795412749052048, 'timestamp': '2025-10-02 00:29:39.172605', 'step': 10520, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:39.245703', 'step': 10520, 'epoch': 1}
{'type': 'loss', 'content': 0.14121706783771515, 'timestamp': '2025-10-02 00:29:39.253896', 'step': 10521, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:39.315007', 'step': 10521, 'epoch': 1}
{'type': 'loss', 'content': 0.05217810347676277, 'timestamp': '2025-10-02 00:29:39.320049', 'step': 10522, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:39.389121', 'step': 10522, 'epoch': 1}
{'type': 'loss', 'content': 0.04025876149535179, 'timestamp': '2025-10-02 00:29:39.396917', 'step': 10523, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:39.459874', 'step': 10523, 'epoch': 1}
{'type': 'loss', 'content': 0.09588226675987244, 'timestamp': '2025-10-02 00:29:39.466492', 'step': 10524, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:39.526942', 'step': 10524, 'epoch': 1}
{'type': 'loss', 'content': 0.03962457552552223, 'timestamp': '2025-10-02 00:29:39.533097', 'step': 10525, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:39.593363', 'step': 10525, 'epoch': 1}
{'type': 'loss', 'content': 0.08231350779533386, 'timestamp': '2025-10-02 00:29:39.600980', 'step': 10526, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:39.665422', 'step': 10526, 'epoch': 1}
{'type': 'loss', 'content': 0.11332068592309952, 'timestamp': '2025-10-02 00:29:39.671253', 'step': 10527, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:39.733657', 'step': 10527, 'epoch': 1}
{'type': 'loss', 'content': 0.09394422173500061, 'timestamp': '2025-10-02 00:29:39.743891', 'step': 10528, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:39.806357', 'step': 10528, 'epoch': 1}
{'type': 'loss', 'content': 0.06691810488700867, 'timestamp': '2025-10-02 00:29:39.812141', 'step': 10529, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:39.873606', 'step': 10529, 'epoch': 1}
{'type': 'loss', 'content': 0.09940524399280548, 'timestamp': '2025-10-02 00:29:39.880585', 'step': 10530, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:39.938733', 'step': 10530, 'epoch': 1}
{'type': 'loss', 'content': 0.1213989406824112, 'timestamp': '2025-10-02 00:29:39.948322', 'step': 10531, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:40.009648', 'step': 10531, 'epoch': 1}
{'type': 'loss', 'content': 0.011269216425716877, 'timestamp': '2025-10-02 00:29:40.020577', 'step': 10532, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:40.086732', 'step': 10532, 'epoch': 1}
{'type': 'loss', 'content': 0.033998001366853714, 'timestamp': '2025-10-02 00:29:40.093156', 'step': 10533, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:40.157836', 'step': 10533, 'epoch': 1}
{'type': 'loss', 'content': 0.1254875361919403, 'timestamp': '2025-10-02 00:29:40.164476', 'step': 10534, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:40.230526', 'step': 10534, 'epoch': 1}
{'type': 'loss', 'content': 0.052045900374650955, 'timestamp': '2025-10-02 00:29:40.233496', 'step': 10535, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:40.288680', 'step': 10535, 'epoch': 1}
{'type': 'loss', 'content': 0.16624361276626587, 'timestamp': '2025-10-02 00:29:40.299192', 'step': 10536, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:40.361945', 'step': 10536, 'epoch': 1}
{'type': 'loss', 'content': 0.10754847526550293, 'timestamp': '2025-10-02 00:29:40.369489', 'step': 10537, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:40.442936', 'step': 10537, 'epoch': 1}
{'type': 'loss', 'content': 0.16189533472061157, 'timestamp': '2025-10-02 00:29:40.446213', 'step': 10538, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:40.511978', 'step': 10538, 'epoch': 1}
{'type': 'loss', 'content': 0.06847885996103287, 'timestamp': '2025-10-02 00:29:40.515564', 'step': 10539, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:40.582931', 'step': 10539, 'epoch': 1}
{'type': 'loss', 'content': 0.04499030485749245, 'timestamp': '2025-10-02 00:29:40.594542', 'step': 10540, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:40.662461', 'step': 10540, 'epoch': 1}
{'type': 'loss', 'content': 0.05263388529419899, 'timestamp': '2025-10-02 00:29:40.664803', 'step': 10541, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:40.719750', 'step': 10541, 'epoch': 1}
{'type': 'loss', 'content': 0.08365240693092346, 'timestamp': '2025-10-02 00:29:40.727394', 'step': 10542, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:40.782892', 'step': 10542, 'epoch': 1}
{'type': 'loss', 'content': 0.006254161708056927, 'timestamp': '2025-10-02 00:29:40.792405', 'step': 10543, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:29:40.854878', 'step': 10543, 'epoch': 1}
{'type': 'loss', 'content': 0.025129148736596107, 'timestamp': '2025-10-02 00:29:40.866556', 'step': 10544, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:40.920206', 'step': 10544, 'epoch': 1}
{'type': 'loss', 'content': 0.20689529180526733, 'timestamp': '2025-10-02 00:29:40.922864', 'step': 10545, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:40.977054', 'step': 10545, 'epoch': 1}
{'type': 'loss', 'content': 0.09465611726045609, 'timestamp': '2025-10-02 00:29:40.984986', 'step': 10546, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:41.039605', 'step': 10546, 'epoch': 1}
{'type': 'loss', 'content': 0.16332946717739105, 'timestamp': '2025-10-02 00:29:41.042236', 'step': 10547, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:41.095918', 'step': 10547, 'epoch': 1}
{'type': 'loss', 'content': 0.0532769151031971, 'timestamp': '2025-10-02 00:29:41.101761', 'step': 10548, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:41.155240', 'step': 10548, 'epoch': 1}
{'type': 'loss', 'content': 0.1213664785027504, 'timestamp': '2025-10-02 00:29:41.157590', 'step': 10549, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:41.211398', 'step': 10549, 'epoch': 1}
{'type': 'loss', 'content': 0.09440114349126816, 'timestamp': '2025-10-02 00:29:41.213817', 'step': 10550, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:41.267350', 'step': 10550, 'epoch': 1}
{'type': 'loss', 'content': 0.08130466192960739, 'timestamp': '2025-10-02 00:29:41.269642', 'step': 10551, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:41.328758', 'step': 10551, 'epoch': 1}
{'type': 'loss', 'content': 0.03772098943591118, 'timestamp': '2025-10-02 00:29:41.339758', 'step': 10552, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:41.398013', 'step': 10552, 'epoch': 1}
{'type': 'loss', 'content': 0.027480877935886383, 'timestamp': '2025-10-02 00:29:41.409132', 'step': 10553, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:41.463754', 'step': 10553, 'epoch': 1}
{'type': 'loss', 'content': 0.16977360844612122, 'timestamp': '2025-10-02 00:29:41.466618', 'step': 10554, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:41.528160', 'step': 10554, 'epoch': 1}
{'type': 'loss', 'content': 0.022483522072434425, 'timestamp': '2025-10-02 00:29:41.538684', 'step': 10555, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:29:41.606625', 'step': 10555, 'epoch': 1}
{'type': 'loss', 'content': 0.0399123951792717, 'timestamp': '2025-10-02 00:29:41.619379', 'step': 10556, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:41.674198', 'step': 10556, 'epoch': 1}
{'type': 'loss', 'content': 0.05339815840125084, 'timestamp': '2025-10-02 00:29:41.676857', 'step': 10557, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:41.732594', 'step': 10557, 'epoch': 1}
{'type': 'loss', 'content': 0.06765636056661606, 'timestamp': '2025-10-02 00:29:41.734902', 'step': 10558, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:41.788645', 'step': 10558, 'epoch': 1}
{'type': 'loss', 'content': 0.14221934974193573, 'timestamp': '2025-10-02 00:29:41.794617', 'step': 10559, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:41.856595', 'step': 10559, 'epoch': 1}
{'type': 'loss', 'content': 0.02316923439502716, 'timestamp': '2025-10-02 00:29:41.867845', 'step': 10560, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:41.922578', 'step': 10560, 'epoch': 1}
{'type': 'loss', 'content': 0.15342196822166443, 'timestamp': '2025-10-02 00:29:41.925659', 'step': 10561, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:41.981260', 'step': 10561, 'epoch': 1}
{'type': 'loss', 'content': 0.1944366842508316, 'timestamp': '2025-10-02 00:29:41.983905', 'step': 10562, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:42.038984', 'step': 10562, 'epoch': 1}
{'type': 'loss', 'content': 0.0610252320766449, 'timestamp': '2025-10-02 00:29:42.045029', 'step': 10563, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:42.101644', 'step': 10563, 'epoch': 1}
{'type': 'loss', 'content': 0.07946133613586426, 'timestamp': '2025-10-02 00:29:42.107776', 'step': 10564, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:42.163462', 'step': 10564, 'epoch': 1}
{'type': 'loss', 'content': 0.012429057620465755, 'timestamp': '2025-10-02 00:29:42.171032', 'step': 10565, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:42.227376', 'step': 10565, 'epoch': 1}
{'type': 'loss', 'content': 0.10074222832918167, 'timestamp': '2025-10-02 00:29:42.230814', 'step': 10566, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:42.287383', 'step': 10566, 'epoch': 1}
{'type': 'loss', 'content': 0.06755471974611282, 'timestamp': '2025-10-02 00:29:42.290443', 'step': 10567, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:42.345297', 'step': 10567, 'epoch': 1}
{'type': 'loss', 'content': 0.053630512207746506, 'timestamp': '2025-10-02 00:29:42.352929', 'step': 10568, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:42.408395', 'step': 10568, 'epoch': 1}
{'type': 'loss', 'content': 0.04533764719963074, 'timestamp': '2025-10-02 00:29:42.411640', 'step': 10569, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:42.466453', 'step': 10569, 'epoch': 1}
{'type': 'loss', 'content': 0.19657458364963531, 'timestamp': '2025-10-02 00:29:42.469550', 'step': 10570, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:42.526487', 'step': 10570, 'epoch': 1}
{'type': 'loss', 'content': 0.1479995995759964, 'timestamp': '2025-10-02 00:29:42.529574', 'step': 10571, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:42.587598', 'step': 10571, 'epoch': 1}
{'type': 'loss', 'content': 0.1160462498664856, 'timestamp': '2025-10-02 00:29:42.593564', 'step': 10572, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:42.655304', 'step': 10572, 'epoch': 1}
{'type': 'loss', 'content': 0.07713739573955536, 'timestamp': '2025-10-02 00:29:42.666613', 'step': 10573, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:42.722589', 'step': 10573, 'epoch': 1}
{'type': 'loss', 'content': 0.0649065151810646, 'timestamp': '2025-10-02 00:29:42.730144', 'step': 10574, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:42.791697', 'step': 10574, 'epoch': 1}
{'type': 'loss', 'content': 0.13088904321193695, 'timestamp': '2025-10-02 00:29:42.794865', 'step': 10575, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:42.852729', 'step': 10575, 'epoch': 1}
{'type': 'loss', 'content': 0.09255164116621017, 'timestamp': '2025-10-02 00:29:42.863083', 'step': 10576, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:29:42.919698', 'step': 10576, 'epoch': 1}
{'type': 'loss', 'content': 0.16646216809749603, 'timestamp': '2025-10-02 00:29:42.922780', 'step': 10577, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:42.977582', 'step': 10577, 'epoch': 1}
{'type': 'loss', 'content': 0.11901941150426865, 'timestamp': '2025-10-02 00:29:42.980588', 'step': 10578, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:43.034527', 'step': 10578, 'epoch': 1}
{'type': 'loss', 'content': 0.05780426412820816, 'timestamp': '2025-10-02 00:29:43.037026', 'step': 10579, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:29:43.093292', 'step': 10579, 'epoch': 1}
{'type': 'loss', 'content': 0.20539970695972443, 'timestamp': '2025-10-02 00:29:43.099140', 'step': 10580, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:43.153050', 'step': 10580, 'epoch': 1}
{'type': 'loss', 'content': 0.09348846971988678, 'timestamp': '2025-10-02 00:29:43.156115', 'step': 10581, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:43.210850', 'step': 10581, 'epoch': 1}
{'type': 'loss', 'content': 0.1979372203350067, 'timestamp': '2025-10-02 00:29:43.214362', 'step': 10582, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:43.272243', 'step': 10582, 'epoch': 1}
{'type': 'loss', 'content': 0.08319994062185287, 'timestamp': '2025-10-02 00:29:43.278327', 'step': 10583, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:43.335289', 'step': 10583, 'epoch': 1}
{'type': 'loss', 'content': 0.19272634387016296, 'timestamp': '2025-10-02 00:29:43.342137', 'step': 10584, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:43.397543', 'step': 10584, 'epoch': 1}
{'type': 'loss', 'content': 0.021804284304380417, 'timestamp': '2025-10-02 00:29:43.400784', 'step': 10585, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:29:43.465374', 'step': 10585, 'epoch': 1}
{'type': 'loss', 'content': 0.033838532865047455, 'timestamp': '2025-10-02 00:29:43.476251', 'step': 10586, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:43.533196', 'step': 10586, 'epoch': 1}
{'type': 'loss', 'content': 0.05489356070756912, 'timestamp': '2025-10-02 00:29:43.536225', 'step': 10587, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:43.593149', 'step': 10587, 'epoch': 1}
{'type': 'loss', 'content': 0.12794315814971924, 'timestamp': '2025-10-02 00:29:43.600018', 'step': 10588, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:43.656996', 'step': 10588, 'epoch': 1}
{'type': 'loss', 'content': 0.04848583787679672, 'timestamp': '2025-10-02 00:29:43.664898', 'step': 10589, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:43.721138', 'step': 10589, 'epoch': 1}
{'type': 'loss', 'content': 0.10823096334934235, 'timestamp': '2025-10-02 00:29:43.728709', 'step': 10590, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:43.787132', 'step': 10590, 'epoch': 1}
{'type': 'loss', 'content': 0.06741832941770554, 'timestamp': '2025-10-02 00:29:43.794953', 'step': 10591, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:43.850175', 'step': 10591, 'epoch': 1}
{'type': 'loss', 'content': 0.034437719732522964, 'timestamp': '2025-10-02 00:29:43.856146', 'step': 10592, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:43.911890', 'step': 10592, 'epoch': 1}
{'type': 'loss', 'content': 0.08456002920866013, 'timestamp': '2025-10-02 00:29:43.915027', 'step': 10593, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:43.971803', 'step': 10593, 'epoch': 1}
{'type': 'loss', 'content': 0.06511992961168289, 'timestamp': '2025-10-02 00:29:43.974649', 'step': 10594, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:44.031565', 'step': 10594, 'epoch': 1}
{'type': 'loss', 'content': 0.12019506096839905, 'timestamp': '2025-10-02 00:29:44.033940', 'step': 10595, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:44.094344', 'step': 10595, 'epoch': 1}
{'type': 'loss', 'content': 0.1481662541627884, 'timestamp': '2025-10-02 00:29:44.101093', 'step': 10596, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:44.155960', 'step': 10596, 'epoch': 1}
{'type': 'loss', 'content': 0.05017765238881111, 'timestamp': '2025-10-02 00:29:44.159562', 'step': 10597, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:44.217925', 'step': 10597, 'epoch': 1}
{'type': 'loss', 'content': 0.025453077629208565, 'timestamp': '2025-10-02 00:29:44.220365', 'step': 10598, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:44.274258', 'step': 10598, 'epoch': 1}
{'type': 'loss', 'content': 0.05394745245575905, 'timestamp': '2025-10-02 00:29:44.276921', 'step': 10599, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:44.330755', 'step': 10599, 'epoch': 1}
{'type': 'loss', 'content': 0.08842390775680542, 'timestamp': '2025-10-02 00:29:44.337104', 'step': 10600, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:44.391424', 'step': 10600, 'epoch': 1}
{'type': 'loss', 'content': 0.07743018865585327, 'timestamp': '2025-10-02 00:29:44.393534', 'step': 10601, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:29:44.461929', 'step': 10601, 'epoch': 1}
{'type': 'loss', 'content': 0.050534311681985855, 'timestamp': '2025-10-02 00:29:44.474247', 'step': 10602, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:44.530227', 'step': 10602, 'epoch': 1}
{'type': 'loss', 'content': 0.05246816202998161, 'timestamp': '2025-10-02 00:29:44.539799', 'step': 10603, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:44.594513', 'step': 10603, 'epoch': 1}
{'type': 'loss', 'content': 0.045605890452861786, 'timestamp': '2025-10-02 00:29:44.600408', 'step': 10604, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:44.659841', 'step': 10604, 'epoch': 1}
{'type': 'loss', 'content': 0.05066470056772232, 'timestamp': '2025-10-02 00:29:44.671168', 'step': 10605, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:44.724737', 'step': 10605, 'epoch': 1}
{'type': 'loss', 'content': 0.10661458224058151, 'timestamp': '2025-10-02 00:29:44.727632', 'step': 10606, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:44.782903', 'step': 10606, 'epoch': 1}
{'type': 'loss', 'content': 0.03356235846877098, 'timestamp': '2025-10-02 00:29:44.792464', 'step': 10607, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:29:44.867031', 'step': 10607, 'epoch': 1}
{'type': 'loss', 'content': 0.023848919197916985, 'timestamp': '2025-10-02 00:29:44.881274', 'step': 10608, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:44.935324', 'step': 10608, 'epoch': 1}
{'type': 'loss', 'content': 0.0990118458867073, 'timestamp': '2025-10-02 00:29:44.942977', 'step': 10609, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:44.996102', 'step': 10609, 'epoch': 1}
{'type': 'loss', 'content': 0.10568351298570633, 'timestamp': '2025-10-02 00:29:44.998641', 'step': 10610, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:45.053170', 'step': 10610, 'epoch': 1}
{'type': 'loss', 'content': 0.029179098084568977, 'timestamp': '2025-10-02 00:29:45.061025', 'step': 10611, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:29:45.134512', 'step': 10611, 'epoch': 1}
{'type': 'loss', 'content': 0.048541732132434845, 'timestamp': '2025-10-02 00:29:45.148530', 'step': 10612, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:45.202212', 'step': 10612, 'epoch': 1}
{'type': 'loss', 'content': 0.05859969183802605, 'timestamp': '2025-10-02 00:29:45.204704', 'step': 10613, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:45.258411', 'step': 10613, 'epoch': 1}
{'type': 'loss', 'content': 0.09115690737962723, 'timestamp': '2025-10-02 00:29:45.260937', 'step': 10614, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:45.314489', 'step': 10614, 'epoch': 1}
{'type': 'loss', 'content': 0.15010708570480347, 'timestamp': '2025-10-02 00:29:45.316804', 'step': 10615, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:45.382938', 'step': 10615, 'epoch': 1}
{'type': 'loss', 'content': 0.0350603349506855, 'timestamp': '2025-10-02 00:29:45.391724', 'step': 10616, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:45.444876', 'step': 10616, 'epoch': 1}
{'type': 'loss', 'content': 0.13407675921916962, 'timestamp': '2025-10-02 00:29:45.447627', 'step': 10617, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:45.502292', 'step': 10617, 'epoch': 1}
{'type': 'loss', 'content': 0.18239228427410126, 'timestamp': '2025-10-02 00:29:45.504881', 'step': 10618, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:45.560191', 'step': 10618, 'epoch': 1}
{'type': 'loss', 'content': 0.07063554227352142, 'timestamp': '2025-10-02 00:29:45.563011', 'step': 10619, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:45.618164', 'step': 10619, 'epoch': 1}
{'type': 'loss', 'content': 0.1150410920381546, 'timestamp': '2025-10-02 00:29:45.624080', 'step': 10620, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:45.677654', 'step': 10620, 'epoch': 1}
{'type': 'loss', 'content': 0.13176733255386353, 'timestamp': '2025-10-02 00:29:45.680087', 'step': 10621, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:45.734816', 'step': 10621, 'epoch': 1}
{'type': 'loss', 'content': 0.13157135248184204, 'timestamp': '2025-10-02 00:29:45.737489', 'step': 10622, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:45.790871', 'step': 10622, 'epoch': 1}
{'type': 'loss', 'content': 0.21889790892601013, 'timestamp': '2025-10-02 00:29:45.794000', 'step': 10623, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:45.851799', 'step': 10623, 'epoch': 1}
{'type': 'loss', 'content': 0.16240699589252472, 'timestamp': '2025-10-02 00:29:45.860279', 'step': 10624, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:45.914616', 'step': 10624, 'epoch': 1}
{'type': 'loss', 'content': 0.04827011376619339, 'timestamp': '2025-10-02 00:29:45.920597', 'step': 10625, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:45.974913', 'step': 10625, 'epoch': 1}
{'type': 'loss', 'content': 0.020990287885069847, 'timestamp': '2025-10-02 00:29:45.982777', 'step': 10626, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:46.040710', 'step': 10626, 'epoch': 1}
{'type': 'loss', 'content': 0.00808234978467226, 'timestamp': '2025-10-02 00:29:46.050270', 'step': 10627, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:46.108050', 'step': 10627, 'epoch': 1}
{'type': 'loss', 'content': 0.022067295387387276, 'timestamp': '2025-10-02 00:29:46.118360', 'step': 10628, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:46.171979', 'step': 10628, 'epoch': 1}
{'type': 'loss', 'content': 0.1505354642868042, 'timestamp': '2025-10-02 00:29:46.174424', 'step': 10629, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:46.229652', 'step': 10629, 'epoch': 1}
{'type': 'loss', 'content': 0.07828638702630997, 'timestamp': '2025-10-02 00:29:46.232480', 'step': 10630, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:46.286734', 'step': 10630, 'epoch': 1}
{'type': 'loss', 'content': 0.18604180216789246, 'timestamp': '2025-10-02 00:29:46.289623', 'step': 10631, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:46.344956', 'step': 10631, 'epoch': 1}
{'type': 'loss', 'content': 0.050813883543014526, 'timestamp': '2025-10-02 00:29:46.351224', 'step': 10632, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:46.404794', 'step': 10632, 'epoch': 1}
{'type': 'loss', 'content': 0.07054389268159866, 'timestamp': '2025-10-02 00:29:46.407352', 'step': 10633, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:29:46.461550', 'step': 10633, 'epoch': 1}
{'type': 'loss', 'content': 0.23392894864082336, 'timestamp': '2025-10-02 00:29:46.464398', 'step': 10634, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:46.519103', 'step': 10634, 'epoch': 1}
{'type': 'loss', 'content': 0.12798473238945007, 'timestamp': '2025-10-02 00:29:46.521520', 'step': 10635, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:46.575590', 'step': 10635, 'epoch': 1}
{'type': 'loss', 'content': 0.17687730491161346, 'timestamp': '2025-10-02 00:29:46.585398', 'step': 10636, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:46.641340', 'step': 10636, 'epoch': 1}
{'type': 'loss', 'content': 0.023319769650697708, 'timestamp': '2025-10-02 00:29:46.644960', 'step': 10637, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:46.710662', 'step': 10637, 'epoch': 1}
{'type': 'loss', 'content': 0.14261405169963837, 'timestamp': '2025-10-02 00:29:46.713752', 'step': 10638, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:46.769036', 'step': 10638, 'epoch': 1}
{'type': 'loss', 'content': 0.04126398637890816, 'timestamp': '2025-10-02 00:29:46.775129', 'step': 10639, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:46.830656', 'step': 10639, 'epoch': 1}
{'type': 'loss', 'content': 0.01453894842416048, 'timestamp': '2025-10-02 00:29:46.837431', 'step': 10640, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:46.891980', 'step': 10640, 'epoch': 1}
{'type': 'loss', 'content': 0.1800232231616974, 'timestamp': '2025-10-02 00:29:46.893938', 'step': 10641, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:46.947844', 'step': 10641, 'epoch': 1}
{'type': 'loss', 'content': 0.11970975995063782, 'timestamp': '2025-10-02 00:29:46.951728', 'step': 10642, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:47.020783', 'step': 10642, 'epoch': 1}
{'type': 'loss', 'content': 0.03465988114476204, 'timestamp': '2025-10-02 00:29:47.025230', 'step': 10643, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:47.081794', 'step': 10643, 'epoch': 1}
{'type': 'loss', 'content': 0.2071840465068817, 'timestamp': '2025-10-02 00:29:47.088113', 'step': 10644, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:47.145637', 'step': 10644, 'epoch': 1}
{'type': 'loss', 'content': 0.04571431130170822, 'timestamp': '2025-10-02 00:29:47.148118', 'step': 10645, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:47.202048', 'step': 10645, 'epoch': 1}
{'type': 'loss', 'content': 0.1476345807313919, 'timestamp': '2025-10-02 00:29:47.204360', 'step': 10646, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:47.259640', 'step': 10646, 'epoch': 1}
{'type': 'loss', 'content': 0.24641436338424683, 'timestamp': '2025-10-02 00:29:47.262613', 'step': 10647, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:47.318176', 'step': 10647, 'epoch': 1}
{'type': 'loss', 'content': 0.019929230213165283, 'timestamp': '2025-10-02 00:29:47.325217', 'step': 10648, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:47.384646', 'step': 10648, 'epoch': 1}
{'type': 'loss', 'content': 0.09404043108224869, 'timestamp': '2025-10-02 00:29:47.395984', 'step': 10649, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:47.452030', 'step': 10649, 'epoch': 1}
{'type': 'loss', 'content': 0.02562112733721733, 'timestamp': '2025-10-02 00:29:47.461575', 'step': 10650, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:47.515915', 'step': 10650, 'epoch': 1}
{'type': 'loss', 'content': 0.025708258152008057, 'timestamp': '2025-10-02 00:29:47.518312', 'step': 10651, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:47.573420', 'step': 10651, 'epoch': 1}
{'type': 'loss', 'content': 0.08376883715391159, 'timestamp': '2025-10-02 00:29:47.579821', 'step': 10652, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:47.633081', 'step': 10652, 'epoch': 1}
{'type': 'loss', 'content': 0.07869523018598557, 'timestamp': '2025-10-02 00:29:47.638738', 'step': 10653, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:47.694208', 'step': 10653, 'epoch': 1}
{'type': 'loss', 'content': 0.08085436373949051, 'timestamp': '2025-10-02 00:29:47.696572', 'step': 10654, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:47.750945', 'step': 10654, 'epoch': 1}
{'type': 'loss', 'content': 0.09299803525209427, 'timestamp': '2025-10-02 00:29:47.753492', 'step': 10655, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:47.806824', 'step': 10655, 'epoch': 1}
{'type': 'loss', 'content': 0.27189093828201294, 'timestamp': '2025-10-02 00:29:47.928552', 'step': 10656, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:47.985278', 'step': 10656, 'epoch': 1}
{'type': 'loss', 'content': 0.08228740841150284, 'timestamp': '2025-10-02 00:29:47.987232', 'step': 10657, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:48.041510', 'step': 10657, 'epoch': 1}
{'type': 'loss', 'content': 0.08388303965330124, 'timestamp': '2025-10-02 00:29:48.043711', 'step': 10658, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:48.102155', 'step': 10658, 'epoch': 1}
{'type': 'loss', 'content': 0.02321607619524002, 'timestamp': '2025-10-02 00:29:48.112389', 'step': 10659, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:48.173265', 'step': 10659, 'epoch': 1}
{'type': 'loss', 'content': 0.08689798414707184, 'timestamp': '2025-10-02 00:29:48.184694', 'step': 10660, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:48.238738', 'step': 10660, 'epoch': 1}
{'type': 'loss', 'content': 0.0695619136095047, 'timestamp': '2025-10-02 00:29:48.241276', 'step': 10661, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:48.295767', 'step': 10661, 'epoch': 1}
{'type': 'loss', 'content': 0.04624715447425842, 'timestamp': '2025-10-02 00:29:48.305326', 'step': 10662, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:48.359658', 'step': 10662, 'epoch': 1}
{'type': 'loss', 'content': 0.012438185513019562, 'timestamp': '2025-10-02 00:29:48.368947', 'step': 10663, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:48.422821', 'step': 10663, 'epoch': 1}
{'type': 'loss', 'content': 0.023994194343686104, 'timestamp': '2025-10-02 00:29:48.431273', 'step': 10664, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:48.487080', 'step': 10664, 'epoch': 1}
{'type': 'loss', 'content': 0.05616961792111397, 'timestamp': '2025-10-02 00:29:48.491280', 'step': 10665, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:48.545800', 'step': 10665, 'epoch': 1}
{'type': 'loss', 'content': 0.05701844394207001, 'timestamp': '2025-10-02 00:29:48.555295', 'step': 10666, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:48.611072', 'step': 10666, 'epoch': 1}
{'type': 'loss', 'content': 0.09578792005777359, 'timestamp': '2025-10-02 00:29:48.620730', 'step': 10667, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:48.677900', 'step': 10667, 'epoch': 1}
{'type': 'loss', 'content': 0.009122611954808235, 'timestamp': '2025-10-02 00:29:48.688097', 'step': 10668, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:48.743460', 'step': 10668, 'epoch': 1}
{'type': 'loss', 'content': 0.06584499031305313, 'timestamp': '2025-10-02 00:29:48.745957', 'step': 10669, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:48.801653', 'step': 10669, 'epoch': 1}
{'type': 'loss', 'content': 0.16132351756095886, 'timestamp': '2025-10-02 00:29:48.804641', 'step': 10670, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:48.859203', 'step': 10670, 'epoch': 1}
{'type': 'loss', 'content': 0.028536437079310417, 'timestamp': '2025-10-02 00:29:48.866985', 'step': 10671, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:48.920410', 'step': 10671, 'epoch': 1}
{'type': 'loss', 'content': 0.07796820998191833, 'timestamp': '2025-10-02 00:29:48.929841', 'step': 10672, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:48.983538', 'step': 10672, 'epoch': 1}
{'type': 'loss', 'content': 0.062024980783462524, 'timestamp': '2025-10-02 00:29:48.985667', 'step': 10673, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:49.040725', 'step': 10673, 'epoch': 1}
{'type': 'loss', 'content': 0.10367807000875473, 'timestamp': '2025-10-02 00:29:49.047973', 'step': 10674, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:49.101723', 'step': 10674, 'epoch': 1}
{'type': 'loss', 'content': 0.14192156493663788, 'timestamp': '2025-10-02 00:29:49.103888', 'step': 10675, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:49.157380', 'step': 10675, 'epoch': 1}
{'type': 'loss', 'content': 0.07383197546005249, 'timestamp': '2025-10-02 00:29:49.163371', 'step': 10676, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:49.216712', 'step': 10676, 'epoch': 1}
{'type': 'loss', 'content': 0.028092509135603905, 'timestamp': '2025-10-02 00:29:49.219183', 'step': 10677, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:49.272847', 'step': 10677, 'epoch': 1}
{'type': 'loss', 'content': 0.053781647235155106, 'timestamp': '2025-10-02 00:29:49.275083', 'step': 10678, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:49.329183', 'step': 10678, 'epoch': 1}
{'type': 'loss', 'content': 0.10481578856706619, 'timestamp': '2025-10-02 00:29:49.336425', 'step': 10679, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:49.390504', 'step': 10679, 'epoch': 1}
{'type': 'loss', 'content': 0.04150855913758278, 'timestamp': '2025-10-02 00:29:49.397446', 'step': 10680, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:49.457576', 'step': 10680, 'epoch': 1}
{'type': 'loss', 'content': 0.017949797213077545, 'timestamp': '2025-10-02 00:29:49.469069', 'step': 10681, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:49.522138', 'step': 10681, 'epoch': 1}
{'type': 'loss', 'content': 0.11355363577604294, 'timestamp': '2025-10-02 00:29:49.524565', 'step': 10682, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:49.578275', 'step': 10682, 'epoch': 1}
{'type': 'loss', 'content': 0.14433333277702332, 'timestamp': '2025-10-02 00:29:49.580352', 'step': 10683, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:49.634103', 'step': 10683, 'epoch': 1}
{'type': 'loss', 'content': 0.031455397605895996, 'timestamp': '2025-10-02 00:29:49.640867', 'step': 10684, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:49.694261', 'step': 10684, 'epoch': 1}
{'type': 'loss', 'content': 0.08363573253154755, 'timestamp': '2025-10-02 00:29:49.696511', 'step': 10685, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:49.750139', 'step': 10685, 'epoch': 1}
{'type': 'loss', 'content': 0.060976408421993256, 'timestamp': '2025-10-02 00:29:49.752836', 'step': 10686, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:49.807301', 'step': 10686, 'epoch': 1}
{'type': 'loss', 'content': 0.046624310314655304, 'timestamp': '2025-10-02 00:29:49.809401', 'step': 10687, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:49.862824', 'step': 10687, 'epoch': 1}
{'type': 'loss', 'content': 0.09726054966449738, 'timestamp': '2025-10-02 00:29:49.868420', 'step': 10688, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:49.921409', 'step': 10688, 'epoch': 1}
{'type': 'loss', 'content': 0.09511665254831314, 'timestamp': '2025-10-02 00:29:49.923628', 'step': 10689, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:49.977325', 'step': 10689, 'epoch': 1}
{'type': 'loss', 'content': 0.05642501637339592, 'timestamp': '2025-10-02 00:29:49.981376', 'step': 10690, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:50.035328', 'step': 10690, 'epoch': 1}
{'type': 'loss', 'content': 0.05991598218679428, 'timestamp': '2025-10-02 00:29:50.041060', 'step': 10691, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:50.095192', 'step': 10691, 'epoch': 1}
{'type': 'loss', 'content': 0.15190045535564423, 'timestamp': '2025-10-02 00:29:50.101200', 'step': 10692, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:50.158445', 'step': 10692, 'epoch': 1}
{'type': 'loss', 'content': 0.0409589447081089, 'timestamp': '2025-10-02 00:29:50.169430', 'step': 10693, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:50.225628', 'step': 10693, 'epoch': 1}
{'type': 'loss', 'content': 0.07907773554325104, 'timestamp': '2025-10-02 00:29:50.235308', 'step': 10694, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:50.288922', 'step': 10694, 'epoch': 1}
{'type': 'loss', 'content': 0.07723916321992874, 'timestamp': '2025-10-02 00:29:50.291434', 'step': 10695, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:50.345204', 'step': 10695, 'epoch': 1}
{'type': 'loss', 'content': 0.05679476633667946, 'timestamp': '2025-10-02 00:29:50.353581', 'step': 10696, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:50.410289', 'step': 10696, 'epoch': 1}
{'type': 'loss', 'content': 0.16644808650016785, 'timestamp': '2025-10-02 00:29:50.412359', 'step': 10697, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:50.465258', 'step': 10697, 'epoch': 1}
{'type': 'loss', 'content': 0.11962629109621048, 'timestamp': '2025-10-02 00:29:50.467543', 'step': 10698, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:50.531804', 'step': 10698, 'epoch': 1}
{'type': 'loss', 'content': 0.14744646847248077, 'timestamp': '2025-10-02 00:29:50.542462', 'step': 10699, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:50.596802', 'step': 10699, 'epoch': 1}
{'type': 'loss', 'content': 0.06277790665626526, 'timestamp': '2025-10-02 00:29:50.602657', 'step': 10700, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:50.655123', 'step': 10700, 'epoch': 1}
{'type': 'loss', 'content': 0.1808917224407196, 'timestamp': '2025-10-02 00:29:50.657321', 'step': 10701, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:50.710516', 'step': 10701, 'epoch': 1}
{'type': 'loss', 'content': 0.16916732490062714, 'timestamp': '2025-10-02 00:29:50.712584', 'step': 10702, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:50.766131', 'step': 10702, 'epoch': 1}
{'type': 'loss', 'content': 0.12486682832241058, 'timestamp': '2025-10-02 00:29:50.768210', 'step': 10703, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:50.822424', 'step': 10703, 'epoch': 1}
{'type': 'loss', 'content': 0.10113885253667831, 'timestamp': '2025-10-02 00:29:50.827927', 'step': 10704, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:50.881037', 'step': 10704, 'epoch': 1}
{'type': 'loss', 'content': 0.09520695358514786, 'timestamp': '2025-10-02 00:29:50.883288', 'step': 10705, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:50.936601', 'step': 10705, 'epoch': 1}
{'type': 'loss', 'content': 0.08440976589918137, 'timestamp': '2025-10-02 00:29:50.939733', 'step': 10706, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:29:51.006791', 'step': 10706, 'epoch': 1}
{'type': 'loss', 'content': 0.10240044444799423, 'timestamp': '2025-10-02 00:29:51.018755', 'step': 10707, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:51.072443', 'step': 10707, 'epoch': 1}
{'type': 'loss', 'content': 0.058079879730939865, 'timestamp': '2025-10-02 00:29:51.078271', 'step': 10708, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:51.138072', 'step': 10708, 'epoch': 1}
{'type': 'loss', 'content': 0.06962300837039948, 'timestamp': '2025-10-02 00:29:51.149631', 'step': 10709, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:51.204035', 'step': 10709, 'epoch': 1}
{'type': 'loss', 'content': 0.07548162341117859, 'timestamp': '2025-10-02 00:29:51.206636', 'step': 10710, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:51.260558', 'step': 10710, 'epoch': 1}
{'type': 'loss', 'content': 0.13657328486442566, 'timestamp': '2025-10-02 00:29:51.262925', 'step': 10711, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:51.317034', 'step': 10711, 'epoch': 1}
{'type': 'loss', 'content': 0.046174053102731705, 'timestamp': '2025-10-02 00:29:51.327306', 'step': 10712, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:51.380329', 'step': 10712, 'epoch': 1}
{'type': 'loss', 'content': 0.040834713727235794, 'timestamp': '2025-10-02 00:29:51.382257', 'step': 10713, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:29:51.449214', 'step': 10713, 'epoch': 1}
{'type': 'loss', 'content': 0.027012178674340248, 'timestamp': '2025-10-02 00:29:51.461205', 'step': 10714, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:51.514396', 'step': 10714, 'epoch': 1}
{'type': 'loss', 'content': 0.09762182086706161, 'timestamp': '2025-10-02 00:29:51.517077', 'step': 10715, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:51.571608', 'step': 10715, 'epoch': 1}
{'type': 'loss', 'content': 0.06974168866872787, 'timestamp': '2025-10-02 00:29:51.580151', 'step': 10716, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:51.635867', 'step': 10716, 'epoch': 1}
{'type': 'loss', 'content': 0.042356088757514954, 'timestamp': '2025-10-02 00:29:51.645292', 'step': 10717, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:51.701924', 'step': 10717, 'epoch': 1}
{'type': 'loss', 'content': 0.1169731393456459, 'timestamp': '2025-10-02 00:29:51.711505', 'step': 10718, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:51.774310', 'step': 10718, 'epoch': 1}
{'type': 'loss', 'content': 0.054132070392370224, 'timestamp': '2025-10-02 00:29:51.784990', 'step': 10719, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:51.840237', 'step': 10719, 'epoch': 1}
{'type': 'loss', 'content': 0.07922515273094177, 'timestamp': '2025-10-02 00:29:51.846430', 'step': 10720, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:51.900549', 'step': 10720, 'epoch': 1}
{'type': 'loss', 'content': 0.10955123603343964, 'timestamp': '2025-10-02 00:29:51.908350', 'step': 10721, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:51.970861', 'step': 10721, 'epoch': 1}
{'type': 'loss', 'content': 0.1088842898607254, 'timestamp': '2025-10-02 00:29:51.981353', 'step': 10722, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:52.038166', 'step': 10722, 'epoch': 1}
{'type': 'loss', 'content': 0.02132609114050865, 'timestamp': '2025-10-02 00:29:52.040690', 'step': 10723, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:52.095356', 'step': 10723, 'epoch': 1}
{'type': 'loss', 'content': 0.07806912809610367, 'timestamp': '2025-10-02 00:29:52.101329', 'step': 10724, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:52.155214', 'step': 10724, 'epoch': 1}
{'type': 'loss', 'content': 0.18074879050254822, 'timestamp': '2025-10-02 00:29:52.158288', 'step': 10725, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:52.212420', 'step': 10725, 'epoch': 1}
{'type': 'loss', 'content': 0.12737341225147247, 'timestamp': '2025-10-02 00:29:52.215327', 'step': 10726, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:52.271261', 'step': 10726, 'epoch': 1}
{'type': 'loss', 'content': 0.08568387478590012, 'timestamp': '2025-10-02 00:29:52.273222', 'step': 10727, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:52.336889', 'step': 10727, 'epoch': 1}
{'type': 'loss', 'content': 0.06973446905612946, 'timestamp': '2025-10-02 00:29:52.343895', 'step': 10728, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:52.401009', 'step': 10728, 'epoch': 1}
{'type': 'loss', 'content': 0.03689469024538994, 'timestamp': '2025-10-02 00:29:52.406504', 'step': 10729, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:52.467200', 'step': 10729, 'epoch': 1}
{'type': 'loss', 'content': 0.11676894128322601, 'timestamp': '2025-10-02 00:29:52.474529', 'step': 10730, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:52.539740', 'step': 10730, 'epoch': 1}
{'type': 'loss', 'content': 0.12028785794973373, 'timestamp': '2025-10-02 00:29:52.562933', 'step': 10731, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:52.637366', 'step': 10731, 'epoch': 1}
{'type': 'loss', 'content': 0.18523161113262177, 'timestamp': '2025-10-02 00:29:52.644852', 'step': 10732, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:52.726908', 'step': 10732, 'epoch': 1}
{'type': 'loss', 'content': 0.03008211962878704, 'timestamp': '2025-10-02 00:29:52.741351', 'step': 10733, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:52.808270', 'step': 10733, 'epoch': 1}
{'type': 'loss', 'content': 0.01525778230279684, 'timestamp': '2025-10-02 00:29:52.816088', 'step': 10734, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:52.894808', 'step': 10734, 'epoch': 1}
{'type': 'loss', 'content': 0.08619444817304611, 'timestamp': '2025-10-02 00:29:52.907470', 'step': 10735, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:29:52.998153', 'step': 10735, 'epoch': 1}
{'type': 'loss', 'content': 0.021481433883309364, 'timestamp': '2025-10-02 00:29:53.012168', 'step': 10736, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:53.095485', 'step': 10736, 'epoch': 1}
{'type': 'loss', 'content': 0.06724230200052261, 'timestamp': '2025-10-02 00:29:53.108468', 'step': 10737, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:53.186943', 'step': 10737, 'epoch': 1}
{'type': 'loss', 'content': 0.027021586894989014, 'timestamp': '2025-10-02 00:29:53.203821', 'step': 10738, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:29:53.293125', 'step': 10738, 'epoch': 1}
{'type': 'loss', 'content': 0.07483037561178207, 'timestamp': '2025-10-02 00:29:53.303963', 'step': 10739, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:53.394652', 'step': 10739, 'epoch': 1}
{'type': 'loss', 'content': 0.022692130878567696, 'timestamp': '2025-10-02 00:29:53.413274', 'step': 10740, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:53.503396', 'step': 10740, 'epoch': 1}
{'type': 'loss', 'content': 0.04106910154223442, 'timestamp': '2025-10-02 00:29:53.509477', 'step': 10741, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:53.568215', 'step': 10741, 'epoch': 1}
{'type': 'loss', 'content': 0.055621448904275894, 'timestamp': '2025-10-02 00:29:53.570885', 'step': 10742, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:29:53.636414', 'step': 10742, 'epoch': 1}
{'type': 'loss', 'content': 0.03562403470277786, 'timestamp': '2025-10-02 00:29:53.647261', 'step': 10743, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:53.708382', 'step': 10743, 'epoch': 1}
{'type': 'loss', 'content': 0.06439778208732605, 'timestamp': '2025-10-02 00:29:53.719488', 'step': 10744, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:29:53.794693', 'step': 10744, 'epoch': 1}
{'type': 'loss', 'content': 0.09951703250408173, 'timestamp': '2025-10-02 00:29:53.809820', 'step': 10745, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:53.863850', 'step': 10745, 'epoch': 1}
{'type': 'loss', 'content': 0.19512183964252472, 'timestamp': '2025-10-02 00:29:53.866253', 'step': 10746, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:53.920202', 'step': 10746, 'epoch': 1}
{'type': 'loss', 'content': 0.036157190799713135, 'timestamp': '2025-10-02 00:29:53.926437', 'step': 10747, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:53.982568', 'step': 10747, 'epoch': 1}
{'type': 'loss', 'content': 0.023921428248286247, 'timestamp': '2025-10-02 00:29:53.988995', 'step': 10748, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:54.045514', 'step': 10748, 'epoch': 1}
{'type': 'loss', 'content': 0.09050596505403519, 'timestamp': '2025-10-02 00:29:54.048107', 'step': 10749, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:54.103990', 'step': 10749, 'epoch': 1}
{'type': 'loss', 'content': 0.07073593139648438, 'timestamp': '2025-10-02 00:29:54.107546', 'step': 10750, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:54.163303', 'step': 10750, 'epoch': 1}
{'type': 'loss', 'content': 0.036191847175359726, 'timestamp': '2025-10-02 00:29:54.165954', 'step': 10751, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:54.220170', 'step': 10751, 'epoch': 1}
{'type': 'loss', 'content': 0.12417677044868469, 'timestamp': '2025-10-02 00:29:54.226742', 'step': 10752, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:54.281194', 'step': 10752, 'epoch': 1}
{'type': 'loss', 'content': 0.08286968618631363, 'timestamp': '2025-10-02 00:29:54.283649', 'step': 10753, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:29:54.346944', 'step': 10753, 'epoch': 1}
{'type': 'loss', 'content': 0.034311577677726746, 'timestamp': '2025-10-02 00:29:54.357818', 'step': 10754, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:54.414800', 'step': 10754, 'epoch': 1}
{'type': 'loss', 'content': 0.04993214085698128, 'timestamp': '2025-10-02 00:29:54.421128', 'step': 10755, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:54.475599', 'step': 10755, 'epoch': 1}
{'type': 'loss', 'content': 0.18906128406524658, 'timestamp': '2025-10-02 00:29:54.481721', 'step': 10756, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:54.536533', 'step': 10756, 'epoch': 1}
{'type': 'loss', 'content': 0.03634505718946457, 'timestamp': '2025-10-02 00:29:54.539166', 'step': 10757, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:54.594389', 'step': 10757, 'epoch': 1}
{'type': 'loss', 'content': 0.02649516612291336, 'timestamp': '2025-10-02 00:29:54.596893', 'step': 10758, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:54.651645', 'step': 10758, 'epoch': 1}
{'type': 'loss', 'content': 0.26615244150161743, 'timestamp': '2025-10-02 00:29:54.654383', 'step': 10759, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:54.708614', 'step': 10759, 'epoch': 1}
{'type': 'loss', 'content': 0.2561652660369873, 'timestamp': '2025-10-02 00:29:54.714942', 'step': 10760, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:54.768578', 'step': 10760, 'epoch': 1}
{'type': 'loss', 'content': 0.10014545917510986, 'timestamp': '2025-10-02 00:29:54.771286', 'step': 10761, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:54.826256', 'step': 10761, 'epoch': 1}
{'type': 'loss', 'content': 0.11573649197816849, 'timestamp': '2025-10-02 00:29:54.835606', 'step': 10762, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:54.890758', 'step': 10762, 'epoch': 1}
{'type': 'loss', 'content': 0.04087343439459801, 'timestamp': '2025-10-02 00:29:54.893377', 'step': 10763, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:54.947976', 'step': 10763, 'epoch': 1}
{'type': 'loss', 'content': 0.06912726163864136, 'timestamp': '2025-10-02 00:29:54.953987', 'step': 10764, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:55.012869', 'step': 10764, 'epoch': 1}
{'type': 'loss', 'content': 0.048214830458164215, 'timestamp': '2025-10-02 00:29:55.021297', 'step': 10765, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:55.076753', 'step': 10765, 'epoch': 1}
{'type': 'loss', 'content': 0.05645693093538284, 'timestamp': '2025-10-02 00:29:55.086122', 'step': 10766, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:55.142548', 'step': 10766, 'epoch': 1}
{'type': 'loss', 'content': 0.04360020160675049, 'timestamp': '2025-10-02 00:29:55.148214', 'step': 10767, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:55.214468', 'step': 10767, 'epoch': 1}
{'type': 'loss', 'content': 0.11914162337779999, 'timestamp': '2025-10-02 00:29:55.220840', 'step': 10768, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:55.275952', 'step': 10768, 'epoch': 1}
{'type': 'loss', 'content': 0.06073680520057678, 'timestamp': '2025-10-02 00:29:55.278920', 'step': 10769, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:55.333438', 'step': 10769, 'epoch': 1}
{'type': 'loss', 'content': 0.030066700652241707, 'timestamp': '2025-10-02 00:29:55.336184', 'step': 10770, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:55.391319', 'step': 10770, 'epoch': 1}
{'type': 'loss', 'content': 0.05874200165271759, 'timestamp': '2025-10-02 00:29:55.398770', 'step': 10771, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:55.456502', 'step': 10771, 'epoch': 1}
{'type': 'loss', 'content': 0.053551893681287766, 'timestamp': '2025-10-02 00:29:55.463246', 'step': 10772, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:55.518118', 'step': 10772, 'epoch': 1}
{'type': 'loss', 'content': 0.07774780690670013, 'timestamp': '2025-10-02 00:29:55.520625', 'step': 10773, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:55.577221', 'step': 10773, 'epoch': 1}
{'type': 'loss', 'content': 0.06074196472764015, 'timestamp': '2025-10-02 00:29:55.586619', 'step': 10774, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:55.641812', 'step': 10774, 'epoch': 1}
{'type': 'loss', 'content': 0.11198156327009201, 'timestamp': '2025-10-02 00:29:55.644445', 'step': 10775, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:55.699671', 'step': 10775, 'epoch': 1}
{'type': 'loss', 'content': 0.027791205793619156, 'timestamp': '2025-10-02 00:29:55.708095', 'step': 10776, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:55.768204', 'step': 10776, 'epoch': 1}
{'type': 'loss', 'content': 0.045369334518909454, 'timestamp': '2025-10-02 00:29:55.779497', 'step': 10777, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:55.834403', 'step': 10777, 'epoch': 1}
{'type': 'loss', 'content': 0.13158878684043884, 'timestamp': '2025-10-02 00:29:55.837027', 'step': 10778, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:55.892035', 'step': 10778, 'epoch': 1}
{'type': 'loss', 'content': 0.045551884919404984, 'timestamp': '2025-10-02 00:29:55.899540', 'step': 10779, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:55.953884', 'step': 10779, 'epoch': 1}
{'type': 'loss', 'content': 0.07369791716337204, 'timestamp': '2025-10-02 00:29:55.961020', 'step': 10780, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:29:56.029408', 'step': 10780, 'epoch': 1}
{'type': 'loss', 'content': 0.011482875794172287, 'timestamp': '2025-10-02 00:29:56.040778', 'step': 10781, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:29:56.113033', 'step': 10781, 'epoch': 1}
{'type': 'loss', 'content': 0.0464361310005188, 'timestamp': '2025-10-02 00:29:56.125827', 'step': 10782, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:56.182954', 'step': 10782, 'epoch': 1}
{'type': 'loss', 'content': 0.020804286003112793, 'timestamp': '2025-10-02 00:29:56.190512', 'step': 10783, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:56.256359', 'step': 10783, 'epoch': 1}
{'type': 'loss', 'content': 0.061706408858299255, 'timestamp': '2025-10-02 00:29:56.266764', 'step': 10784, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:56.323523', 'step': 10784, 'epoch': 1}
{'type': 'loss', 'content': 0.02495947852730751, 'timestamp': '2025-10-02 00:29:56.326014', 'step': 10785, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:56.382288', 'step': 10785, 'epoch': 1}
{'type': 'loss', 'content': 0.0156283900141716, 'timestamp': '2025-10-02 00:29:56.391657', 'step': 10786, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:56.447850', 'step': 10786, 'epoch': 1}
{'type': 'loss', 'content': 0.06148548051714897, 'timestamp': '2025-10-02 00:29:56.451150', 'step': 10787, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:56.506321', 'step': 10787, 'epoch': 1}
{'type': 'loss', 'content': 0.05265248194336891, 'timestamp': '2025-10-02 00:29:56.512447', 'step': 10788, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:29:56.566568', 'step': 10788, 'epoch': 1}
{'type': 'loss', 'content': 0.21761725842952728, 'timestamp': '2025-10-02 00:29:56.569207', 'step': 10789, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:56.625982', 'step': 10789, 'epoch': 1}
{'type': 'loss', 'content': 0.03647937625646591, 'timestamp': '2025-10-02 00:29:56.633572', 'step': 10790, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:56.688597', 'step': 10790, 'epoch': 1}
{'type': 'loss', 'content': 0.09832973033189774, 'timestamp': '2025-10-02 00:29:56.691407', 'step': 10791, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:56.747286', 'step': 10791, 'epoch': 1}
{'type': 'loss', 'content': 0.12903772294521332, 'timestamp': '2025-10-02 00:29:56.753919', 'step': 10792, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:56.807582', 'step': 10792, 'epoch': 1}
{'type': 'loss', 'content': 0.18401430547237396, 'timestamp': '2025-10-02 00:29:56.810037', 'step': 10793, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:29:56.870375', 'step': 10793, 'epoch': 1}
{'type': 'loss', 'content': 0.02235696278512478, 'timestamp': '2025-10-02 00:29:56.880549', 'step': 10794, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:56.935748', 'step': 10794, 'epoch': 1}
{'type': 'loss', 'content': 0.10890650004148483, 'timestamp': '2025-10-02 00:29:56.941617', 'step': 10795, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:56.996811', 'step': 10795, 'epoch': 1}
{'type': 'loss', 'content': 0.032358624041080475, 'timestamp': '2025-10-02 00:29:57.003759', 'step': 10796, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:57.059765', 'step': 10796, 'epoch': 1}
{'type': 'loss', 'content': 0.04274432733654976, 'timestamp': '2025-10-02 00:29:57.062418', 'step': 10797, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:57.118175', 'step': 10797, 'epoch': 1}
{'type': 'loss', 'content': 0.07299798727035522, 'timestamp': '2025-10-02 00:29:57.120708', 'step': 10798, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:29:57.176419', 'step': 10798, 'epoch': 1}
{'type': 'loss', 'content': 0.060381341725587845, 'timestamp': '2025-10-02 00:29:57.185807', 'step': 10799, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:57.241591', 'step': 10799, 'epoch': 1}
{'type': 'loss', 'content': 0.045122839510440826, 'timestamp': '2025-10-02 00:29:57.251836', 'step': 10800, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:57.310114', 'step': 10800, 'epoch': 1}
{'type': 'loss', 'content': 0.05042275786399841, 'timestamp': '2025-10-02 00:29:57.312552', 'step': 10801, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:57.367238', 'step': 10801, 'epoch': 1}
{'type': 'loss', 'content': 0.16597193479537964, 'timestamp': '2025-10-02 00:29:57.369730', 'step': 10802, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:57.427222', 'step': 10802, 'epoch': 1}
{'type': 'loss', 'content': 0.014376221224665642, 'timestamp': '2025-10-02 00:29:57.436769', 'step': 10803, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:57.491545', 'step': 10803, 'epoch': 1}
{'type': 'loss', 'content': 0.14103655517101288, 'timestamp': '2025-10-02 00:29:57.498263', 'step': 10804, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:57.552954', 'step': 10804, 'epoch': 1}
{'type': 'loss', 'content': 0.04683983325958252, 'timestamp': '2025-10-02 00:29:57.556120', 'step': 10805, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:57.613725', 'step': 10805, 'epoch': 1}
{'type': 'loss', 'content': 0.05993008613586426, 'timestamp': '2025-10-02 00:29:57.623248', 'step': 10806, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:29:57.680524', 'step': 10806, 'epoch': 1}
{'type': 'loss', 'content': 0.05962076038122177, 'timestamp': '2025-10-02 00:29:57.682677', 'step': 10807, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:57.737193', 'step': 10807, 'epoch': 1}
{'type': 'loss', 'content': 0.03238851577043533, 'timestamp': '2025-10-02 00:29:57.743221', 'step': 10808, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:57.796821', 'step': 10808, 'epoch': 1}
{'type': 'loss', 'content': 0.11550962179899216, 'timestamp': '2025-10-02 00:29:57.799294', 'step': 10809, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:29:57.862298', 'step': 10809, 'epoch': 1}
{'type': 'loss', 'content': 0.0665176510810852, 'timestamp': '2025-10-02 00:29:57.872958', 'step': 10810, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:57.928103', 'step': 10810, 'epoch': 1}
{'type': 'loss', 'content': 0.12479928135871887, 'timestamp': '2025-10-02 00:29:57.930889', 'step': 10811, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:29:57.985603', 'step': 10811, 'epoch': 1}
{'type': 'loss', 'content': 0.10136706382036209, 'timestamp': '2025-10-02 00:29:57.991851', 'step': 10812, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:58.046635', 'step': 10812, 'epoch': 1}
{'type': 'loss', 'content': 0.13983049988746643, 'timestamp': '2025-10-02 00:29:58.049240', 'step': 10813, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:58.104058', 'step': 10813, 'epoch': 1}
{'type': 'loss', 'content': 0.1397935152053833, 'timestamp': '2025-10-02 00:29:58.106433', 'step': 10814, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:58.162131', 'step': 10814, 'epoch': 1}
{'type': 'loss', 'content': 0.018826957792043686, 'timestamp': '2025-10-02 00:29:58.168041', 'step': 10815, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:58.222717', 'step': 10815, 'epoch': 1}
{'type': 'loss', 'content': 0.034624386578798294, 'timestamp': '2025-10-02 00:29:58.228504', 'step': 10816, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:58.282660', 'step': 10816, 'epoch': 1}
{'type': 'loss', 'content': 0.09008573740720749, 'timestamp': '2025-10-02 00:29:58.285071', 'step': 10817, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:58.338787', 'step': 10817, 'epoch': 1}
{'type': 'loss', 'content': 0.0890679582953453, 'timestamp': '2025-10-02 00:29:58.341711', 'step': 10818, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:58.397639', 'step': 10818, 'epoch': 1}
{'type': 'loss', 'content': 0.15754881501197815, 'timestamp': '2025-10-02 00:29:58.400146', 'step': 10819, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:29:58.455679', 'step': 10819, 'epoch': 1}
{'type': 'loss', 'content': 0.11931469291448593, 'timestamp': '2025-10-02 00:29:58.462026', 'step': 10820, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:58.517580', 'step': 10820, 'epoch': 1}
{'type': 'loss', 'content': 0.03318699449300766, 'timestamp': '2025-10-02 00:29:58.525477', 'step': 10821, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:29:58.580245', 'step': 10821, 'epoch': 1}
{'type': 'loss', 'content': 0.03134220466017723, 'timestamp': '2025-10-02 00:29:58.588036', 'step': 10822, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:58.644224', 'step': 10822, 'epoch': 1}
{'type': 'loss', 'content': 0.004935830365866423, 'timestamp': '2025-10-02 00:29:58.646539', 'step': 10823, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:29:58.700593', 'step': 10823, 'epoch': 1}
{'type': 'loss', 'content': 0.13099335134029388, 'timestamp': '2025-10-02 00:29:58.708153', 'step': 10824, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:29:58.762033', 'step': 10824, 'epoch': 1}
{'type': 'loss', 'content': 0.08528683334589005, 'timestamp': '2025-10-02 00:29:58.772315', 'step': 10825, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:58.829335', 'step': 10825, 'epoch': 1}
{'type': 'loss', 'content': 0.09982044249773026, 'timestamp': '2025-10-02 00:29:58.831869', 'step': 10826, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:58.886201', 'step': 10826, 'epoch': 1}
{'type': 'loss', 'content': 0.04541803151369095, 'timestamp': '2025-10-02 00:29:58.888792', 'step': 10827, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:58.943306', 'step': 10827, 'epoch': 1}
{'type': 'loss', 'content': 0.09362148493528366, 'timestamp': '2025-10-02 00:29:58.949305', 'step': 10828, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:59.003396', 'step': 10828, 'epoch': 1}
{'type': 'loss', 'content': 0.013994314707815647, 'timestamp': '2025-10-02 00:29:59.009539', 'step': 10829, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:59.066788', 'step': 10829, 'epoch': 1}
{'type': 'loss', 'content': 0.03064051643013954, 'timestamp': '2025-10-02 00:29:59.072818', 'step': 10830, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:59.127950', 'step': 10830, 'epoch': 1}
{'type': 'loss', 'content': 0.025579610839486122, 'timestamp': '2025-10-02 00:29:59.130630', 'step': 10831, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:29:59.189832', 'step': 10831, 'epoch': 1}
{'type': 'loss', 'content': 0.10226065665483475, 'timestamp': '2025-10-02 00:29:59.196235', 'step': 10832, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:29:59.252185', 'step': 10832, 'epoch': 1}
{'type': 'loss', 'content': 0.17664726078510284, 'timestamp': '2025-10-02 00:29:59.255066', 'step': 10833, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:59.309809', 'step': 10833, 'epoch': 1}
{'type': 'loss', 'content': 0.08455053716897964, 'timestamp': '2025-10-02 00:29:59.313164', 'step': 10834, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:59.367846', 'step': 10834, 'epoch': 1}
{'type': 'loss', 'content': 0.03304153308272362, 'timestamp': '2025-10-02 00:29:59.370108', 'step': 10835, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:59.424756', 'step': 10835, 'epoch': 1}
{'type': 'loss', 'content': 0.05099548399448395, 'timestamp': '2025-10-02 00:29:59.431582', 'step': 10836, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:59.485114', 'step': 10836, 'epoch': 1}
{'type': 'loss', 'content': 0.031314410269260406, 'timestamp': '2025-10-02 00:29:59.491207', 'step': 10837, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:59.546340', 'step': 10837, 'epoch': 1}
{'type': 'loss', 'content': 0.13431382179260254, 'timestamp': '2025-10-02 00:29:59.549002', 'step': 10838, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:59.603856', 'step': 10838, 'epoch': 1}
{'type': 'loss', 'content': 0.07896902412176132, 'timestamp': '2025-10-02 00:29:59.606637', 'step': 10839, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:29:59.661764', 'step': 10839, 'epoch': 1}
{'type': 'loss', 'content': 0.0341963954269886, 'timestamp': '2025-10-02 00:29:59.668475', 'step': 10840, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:29:59.722756', 'step': 10840, 'epoch': 1}
{'type': 'loss', 'content': 0.05245345085859299, 'timestamp': '2025-10-02 00:29:59.724741', 'step': 10841, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:59.778807', 'step': 10841, 'epoch': 1}
{'type': 'loss', 'content': 0.1795521229505539, 'timestamp': '2025-10-02 00:29:59.782409', 'step': 10842, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:29:59.838239', 'step': 10842, 'epoch': 1}
{'type': 'loss', 'content': 0.08804343640804291, 'timestamp': '2025-10-02 00:29:59.843926', 'step': 10843, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:29:59.899071', 'step': 10843, 'epoch': 1}
{'type': 'loss', 'content': 0.09229204803705215, 'timestamp': '2025-10-02 00:29:59.905306', 'step': 10844, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:29:59.959872', 'step': 10844, 'epoch': 1}
{'type': 'loss', 'content': 0.17137117683887482, 'timestamp': '2025-10-02 00:29:59.962253', 'step': 10845, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:00.018002', 'step': 10845, 'epoch': 1}
{'type': 'loss', 'content': 0.09126739203929901, 'timestamp': '2025-10-02 00:30:00.027557', 'step': 10846, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:00.083166', 'step': 10846, 'epoch': 1}
{'type': 'loss', 'content': 0.030843479558825493, 'timestamp': '2025-10-02 00:30:00.092480', 'step': 10847, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:00.148246', 'step': 10847, 'epoch': 1}
{'type': 'loss', 'content': 0.0285869799554348, 'timestamp': '2025-10-02 00:30:00.154942', 'step': 10848, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:00.208644', 'step': 10848, 'epoch': 1}
{'type': 'loss', 'content': 0.039427757263183594, 'timestamp': '2025-10-02 00:30:00.216510', 'step': 10849, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:00.271656', 'step': 10849, 'epoch': 1}
{'type': 'loss', 'content': 0.026483215391635895, 'timestamp': '2025-10-02 00:30:00.280999', 'step': 10850, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:00.336272', 'step': 10850, 'epoch': 1}
{'type': 'loss', 'content': 0.11776899546384811, 'timestamp': '2025-10-02 00:30:00.343808', 'step': 10851, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:00.399641', 'step': 10851, 'epoch': 1}
{'type': 'loss', 'content': 0.027537815272808075, 'timestamp': '2025-10-02 00:30:00.409791', 'step': 10852, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:00.463269', 'step': 10852, 'epoch': 1}
{'type': 'loss', 'content': 0.08187451958656311, 'timestamp': '2025-10-02 00:30:00.469547', 'step': 10853, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:00.523965', 'step': 10853, 'epoch': 1}
{'type': 'loss', 'content': 0.0761144682765007, 'timestamp': '2025-10-02 00:30:00.526447', 'step': 10854, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:00.581149', 'step': 10854, 'epoch': 1}
{'type': 'loss', 'content': 0.051305338740348816, 'timestamp': '2025-10-02 00:30:00.585466', 'step': 10855, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:00.642199', 'step': 10855, 'epoch': 1}
{'type': 'loss', 'content': 0.043915726244449615, 'timestamp': '2025-10-02 00:30:00.652540', 'step': 10856, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:00.707515', 'step': 10856, 'epoch': 1}
{'type': 'loss', 'content': 0.058216292411088943, 'timestamp': '2025-10-02 00:30:00.710242', 'step': 10857, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:00.767677', 'step': 10857, 'epoch': 1}
{'type': 'loss', 'content': 0.005859317258000374, 'timestamp': '2025-10-02 00:30:00.777262', 'step': 10858, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:00.834389', 'step': 10858, 'epoch': 1}
{'type': 'loss', 'content': 0.03931446000933647, 'timestamp': '2025-10-02 00:30:00.843768', 'step': 10859, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:00.900235', 'step': 10859, 'epoch': 1}
{'type': 'loss', 'content': 0.04283593222498894, 'timestamp': '2025-10-02 00:30:00.910595', 'step': 10860, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:00.969441', 'step': 10860, 'epoch': 1}
{'type': 'loss', 'content': 0.017174990847706795, 'timestamp': '2025-10-02 00:30:00.972238', 'step': 10861, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:01.034061', 'step': 10861, 'epoch': 1}
{'type': 'loss', 'content': 0.11212816834449768, 'timestamp': '2025-10-02 00:30:01.039551', 'step': 10862, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:01.096287', 'step': 10862, 'epoch': 1}
{'type': 'loss', 'content': 0.5149341821670532, 'timestamp': '2025-10-02 00:30:01.100883', 'step': 10863, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:01.159206', 'step': 10863, 'epoch': 1}
{'type': 'loss', 'content': 0.05360618978738785, 'timestamp': '2025-10-02 00:30:01.166592', 'step': 10864, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:01.222321', 'step': 10864, 'epoch': 1}
{'type': 'loss', 'content': 0.11823878437280655, 'timestamp': '2025-10-02 00:30:01.226018', 'step': 10865, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:01.283273', 'step': 10865, 'epoch': 1}
{'type': 'loss', 'content': 0.15450060367584229, 'timestamp': '2025-10-02 00:30:01.297437', 'step': 10866, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:01.378162', 'step': 10866, 'epoch': 1}
{'type': 'loss', 'content': 0.21155086159706116, 'timestamp': '2025-10-02 00:30:01.380535', 'step': 10867, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:01.434790', 'step': 10867, 'epoch': 1}
{'type': 'loss', 'content': 0.1529829502105713, 'timestamp': '2025-10-02 00:30:01.441647', 'step': 10868, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:01.497635', 'step': 10868, 'epoch': 1}
{'type': 'loss', 'content': 0.0733376145362854, 'timestamp': '2025-10-02 00:30:01.501131', 'step': 10869, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:01.560095', 'step': 10869, 'epoch': 1}
{'type': 'loss', 'content': 0.2148546576499939, 'timestamp': '2025-10-02 00:30:01.562845', 'step': 10870, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:01.620154', 'step': 10870, 'epoch': 1}
{'type': 'loss', 'content': 0.04994833841919899, 'timestamp': '2025-10-02 00:30:01.622764', 'step': 10871, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:01.679458', 'step': 10871, 'epoch': 1}
{'type': 'loss', 'content': 0.15592308342456818, 'timestamp': '2025-10-02 00:30:01.685674', 'step': 10872, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:01.740932', 'step': 10872, 'epoch': 1}
{'type': 'loss', 'content': 0.12032352387905121, 'timestamp': '2025-10-02 00:30:01.743343', 'step': 10873, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:01.799396', 'step': 10873, 'epoch': 1}
{'type': 'loss', 'content': 0.09121225029230118, 'timestamp': '2025-10-02 00:30:01.802820', 'step': 10874, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:01.861519', 'step': 10874, 'epoch': 1}
{'type': 'loss', 'content': 0.048264164477586746, 'timestamp': '2025-10-02 00:30:01.867646', 'step': 10875, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:01.922193', 'step': 10875, 'epoch': 1}
{'type': 'loss', 'content': 0.13305078446865082, 'timestamp': '2025-10-02 00:30:01.932321', 'step': 10876, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:01.994599', 'step': 10876, 'epoch': 1}
{'type': 'loss', 'content': 0.06663828343153, 'timestamp': '2025-10-02 00:30:02.005891', 'step': 10877, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:02.070923', 'step': 10877, 'epoch': 1}
{'type': 'loss', 'content': 0.018797021359205246, 'timestamp': '2025-10-02 00:30:02.081438', 'step': 10878, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:02.137124', 'step': 10878, 'epoch': 1}
{'type': 'loss', 'content': 0.01649317890405655, 'timestamp': '2025-10-02 00:30:02.143129', 'step': 10879, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:02.212288', 'step': 10879, 'epoch': 1}
{'type': 'loss', 'content': 0.02026631310582161, 'timestamp': '2025-10-02 00:30:02.223580', 'step': 10880, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:02.280594', 'step': 10880, 'epoch': 1}
{'type': 'loss', 'content': 0.04350738972425461, 'timestamp': '2025-10-02 00:30:02.286753', 'step': 10881, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:02.343426', 'step': 10881, 'epoch': 1}
{'type': 'loss', 'content': 0.10967028886079788, 'timestamp': '2025-10-02 00:30:02.349597', 'step': 10882, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:02.406860', 'step': 10882, 'epoch': 1}
{'type': 'loss', 'content': 0.15234841406345367, 'timestamp': '2025-10-02 00:30:02.409537', 'step': 10883, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:02.466052', 'step': 10883, 'epoch': 1}
{'type': 'loss', 'content': 0.10821272432804108, 'timestamp': '2025-10-02 00:30:02.472725', 'step': 10884, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:02.529818', 'step': 10884, 'epoch': 1}
{'type': 'loss', 'content': 0.027699192985892296, 'timestamp': '2025-10-02 00:30:02.536050', 'step': 10885, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:02.601596', 'step': 10885, 'epoch': 1}
{'type': 'loss', 'content': 0.02741669863462448, 'timestamp': '2025-10-02 00:30:02.612297', 'step': 10886, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:02.671703', 'step': 10886, 'epoch': 1}
{'type': 'loss', 'content': 0.15717457234859467, 'timestamp': '2025-10-02 00:30:02.674879', 'step': 10887, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:02.732400', 'step': 10887, 'epoch': 1}
{'type': 'loss', 'content': 0.03128392621874809, 'timestamp': '2025-10-02 00:30:02.741003', 'step': 10888, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:02.797148', 'step': 10888, 'epoch': 1}
{'type': 'loss', 'content': 0.1293765753507614, 'timestamp': '2025-10-02 00:30:02.811939', 'step': 10889, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:02.873010', 'step': 10889, 'epoch': 1}
{'type': 'loss', 'content': 0.06122398003935814, 'timestamp': '2025-10-02 00:30:02.882584', 'step': 10890, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:02.940616', 'step': 10890, 'epoch': 1}
{'type': 'loss', 'content': 0.02314525656402111, 'timestamp': '2025-10-02 00:30:02.944348', 'step': 10891, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:03.000100', 'step': 10891, 'epoch': 1}
{'type': 'loss', 'content': 0.1874484121799469, 'timestamp': '2025-10-02 00:30:03.008182', 'step': 10892, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:30:03.078570', 'step': 10892, 'epoch': 1}
{'type': 'loss', 'content': 0.05331623554229736, 'timestamp': '2025-10-02 00:30:03.092337', 'step': 10893, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:03.151116', 'step': 10893, 'epoch': 1}
{'type': 'loss', 'content': 0.07805575430393219, 'timestamp': '2025-10-02 00:30:03.160710', 'step': 10894, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:03.224813', 'step': 10894, 'epoch': 1}
{'type': 'loss', 'content': 0.1123744398355484, 'timestamp': '2025-10-02 00:30:03.227160', 'step': 10895, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:03.281521', 'step': 10895, 'epoch': 1}
{'type': 'loss', 'content': 0.023876534774899483, 'timestamp': '2025-10-02 00:30:03.289904', 'step': 10896, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:03.344380', 'step': 10896, 'epoch': 1}
{'type': 'loss', 'content': 0.16027885675430298, 'timestamp': '2025-10-02 00:30:03.353930', 'step': 10897, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:03.408815', 'step': 10897, 'epoch': 1}
{'type': 'loss', 'content': 0.05399753898382187, 'timestamp': '2025-10-02 00:30:03.411356', 'step': 10898, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:03.467066', 'step': 10898, 'epoch': 1}
{'type': 'loss', 'content': 0.19212375581264496, 'timestamp': '2025-10-02 00:30:03.469683', 'step': 10899, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:03.525069', 'step': 10899, 'epoch': 1}
{'type': 'loss', 'content': 0.08994851261377335, 'timestamp': '2025-10-02 00:30:03.531353', 'step': 10900, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:03.593339', 'step': 10900, 'epoch': 1}
{'type': 'loss', 'content': 0.06650178134441376, 'timestamp': '2025-10-02 00:30:03.604304', 'step': 10901, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:03.658756', 'step': 10901, 'epoch': 1}
{'type': 'loss', 'content': 0.14845874905586243, 'timestamp': '2025-10-02 00:30:03.661369', 'step': 10902, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:03.715595', 'step': 10902, 'epoch': 1}
{'type': 'loss', 'content': 0.12239430844783783, 'timestamp': '2025-10-02 00:30:03.718390', 'step': 10903, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:03.773326', 'step': 10903, 'epoch': 1}
{'type': 'loss', 'content': 0.0342562310397625, 'timestamp': '2025-10-02 00:30:03.779977', 'step': 10904, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:03.839714', 'step': 10904, 'epoch': 1}
{'type': 'loss', 'content': 0.043054502457380295, 'timestamp': '2025-10-02 00:30:03.842593', 'step': 10905, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:03.902194', 'step': 10905, 'epoch': 1}
{'type': 'loss', 'content': 0.11360230296850204, 'timestamp': '2025-10-02 00:30:03.904423', 'step': 10906, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:03.960937', 'step': 10906, 'epoch': 1}
{'type': 'loss', 'content': 0.08150852471590042, 'timestamp': '2025-10-02 00:30:03.970460', 'step': 10907, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:04.028298', 'step': 10907, 'epoch': 1}
{'type': 'loss', 'content': 0.03245345503091812, 'timestamp': '2025-10-02 00:30:04.038627', 'step': 10908, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:04.093646', 'step': 10908, 'epoch': 1}
{'type': 'loss', 'content': 0.12800580263137817, 'timestamp': '2025-10-02 00:30:04.095994', 'step': 10909, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:04.151659', 'step': 10909, 'epoch': 1}
{'type': 'loss', 'content': 0.052614904940128326, 'timestamp': '2025-10-02 00:30:04.159584', 'step': 10910, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:04.215669', 'step': 10910, 'epoch': 1}
{'type': 'loss', 'content': 0.182667076587677, 'timestamp': '2025-10-02 00:30:04.218109', 'step': 10911, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:04.274014', 'step': 10911, 'epoch': 1}
{'type': 'loss', 'content': 0.04800316318869591, 'timestamp': '2025-10-02 00:30:04.279928', 'step': 10912, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:04.335150', 'step': 10912, 'epoch': 1}
{'type': 'loss', 'content': 0.15934155881404877, 'timestamp': '2025-10-02 00:30:04.341298', 'step': 10913, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:04.396353', 'step': 10913, 'epoch': 1}
{'type': 'loss', 'content': 0.029749048873782158, 'timestamp': '2025-10-02 00:30:04.405720', 'step': 10914, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:04.460273', 'step': 10914, 'epoch': 1}
{'type': 'loss', 'content': 0.19573403894901276, 'timestamp': '2025-10-02 00:30:04.463100', 'step': 10915, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:04.527087', 'step': 10915, 'epoch': 1}
{'type': 'loss', 'content': 0.07583414018154144, 'timestamp': '2025-10-02 00:30:04.533430', 'step': 10916, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:04.587648', 'step': 10916, 'epoch': 1}
{'type': 'loss', 'content': 0.10246579349040985, 'timestamp': '2025-10-02 00:30:04.589808', 'step': 10917, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:04.643809', 'step': 10917, 'epoch': 1}
{'type': 'loss', 'content': 0.10245541483163834, 'timestamp': '2025-10-02 00:30:04.647379', 'step': 10918, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:04.702373', 'step': 10918, 'epoch': 1}
{'type': 'loss', 'content': 0.10954175889492035, 'timestamp': '2025-10-02 00:30:04.711762', 'step': 10919, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:04.767857', 'step': 10919, 'epoch': 1}
{'type': 'loss', 'content': 0.06781657040119171, 'timestamp': '2025-10-02 00:30:04.774787', 'step': 10920, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:04.828943', 'step': 10920, 'epoch': 1}
{'type': 'loss', 'content': 0.03140726685523987, 'timestamp': '2025-10-02 00:30:04.838979', 'step': 10921, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:04.892930', 'step': 10921, 'epoch': 1}
{'type': 'loss', 'content': 0.08129438012838364, 'timestamp': '2025-10-02 00:30:04.895233', 'step': 10922, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:04.959019', 'step': 10922, 'epoch': 1}
{'type': 'loss', 'content': 0.06248743087053299, 'timestamp': '2025-10-02 00:30:04.962007', 'step': 10923, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:05.016546', 'step': 10923, 'epoch': 1}
{'type': 'loss', 'content': 0.02696576528251171, 'timestamp': '2025-10-02 00:30:05.025268', 'step': 10924, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:05.088358', 'step': 10924, 'epoch': 1}
{'type': 'loss', 'content': 0.0657852292060852, 'timestamp': '2025-10-02 00:30:05.099897', 'step': 10925, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:05.154992', 'step': 10925, 'epoch': 1}
{'type': 'loss', 'content': 0.10455750674009323, 'timestamp': '2025-10-02 00:30:05.157561', 'step': 10926, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:05.212757', 'step': 10926, 'epoch': 1}
{'type': 'loss', 'content': 0.16285458207130432, 'timestamp': '2025-10-02 00:30:05.215303', 'step': 10927, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:05.271587', 'step': 10927, 'epoch': 1}
{'type': 'loss', 'content': 0.07073231041431427, 'timestamp': '2025-10-02 00:30:05.281901', 'step': 10928, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:05.336851', 'step': 10928, 'epoch': 1}
{'type': 'loss', 'content': 0.09259321540594101, 'timestamp': '2025-10-02 00:30:05.339633', 'step': 10929, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:05.394735', 'step': 10929, 'epoch': 1}
{'type': 'loss', 'content': 0.1979266256093979, 'timestamp': '2025-10-02 00:30:05.397131', 'step': 10930, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:05.459918', 'step': 10930, 'epoch': 1}
{'type': 'loss', 'content': 0.1037544533610344, 'timestamp': '2025-10-02 00:30:05.462257', 'step': 10931, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:05.518212', 'step': 10931, 'epoch': 1}
{'type': 'loss', 'content': 0.018655117601156235, 'timestamp': '2025-10-02 00:30:05.528367', 'step': 10932, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:05.583615', 'step': 10932, 'epoch': 1}
{'type': 'loss', 'content': 0.2965523302555084, 'timestamp': '2025-10-02 00:30:05.586155', 'step': 10933, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:05.639911', 'step': 10933, 'epoch': 1}
{'type': 'loss', 'content': 0.15639494359493256, 'timestamp': '2025-10-02 00:30:05.642541', 'step': 10934, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:05.698415', 'step': 10934, 'epoch': 1}
{'type': 'loss', 'content': 0.03637245297431946, 'timestamp': '2025-10-02 00:30:05.707987', 'step': 10935, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:05.764966', 'step': 10935, 'epoch': 1}
{'type': 'loss', 'content': 0.039206363260746, 'timestamp': '2025-10-02 00:30:05.771003', 'step': 10936, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:05.826772', 'step': 10936, 'epoch': 1}
{'type': 'loss', 'content': 0.06270314007997513, 'timestamp': '2025-10-02 00:30:05.836522', 'step': 10937, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:05.895571', 'step': 10937, 'epoch': 1}
{'type': 'loss', 'content': 0.04288739711046219, 'timestamp': '2025-10-02 00:30:05.897714', 'step': 10938, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:05.951716', 'step': 10938, 'epoch': 1}
{'type': 'loss', 'content': 0.09785941243171692, 'timestamp': '2025-10-02 00:30:05.954139', 'step': 10939, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:06.010589', 'step': 10939, 'epoch': 1}
{'type': 'loss', 'content': 0.05475000664591789, 'timestamp': '2025-10-02 00:30:06.017495', 'step': 10940, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:06.071727', 'step': 10940, 'epoch': 1}
{'type': 'loss', 'content': 0.12205568701028824, 'timestamp': '2025-10-02 00:30:06.074215', 'step': 10941, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:06.127956', 'step': 10941, 'epoch': 1}
{'type': 'loss', 'content': 0.1948917806148529, 'timestamp': '2025-10-02 00:30:06.130508', 'step': 10942, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:06.184306', 'step': 10942, 'epoch': 1}
{'type': 'loss', 'content': 0.10238619893789291, 'timestamp': '2025-10-02 00:30:06.196016', 'step': 10943, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:06.257361', 'step': 10943, 'epoch': 1}
{'type': 'loss', 'content': 0.041185714304447174, 'timestamp': '2025-10-02 00:30:06.268817', 'step': 10944, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:06.323464', 'step': 10944, 'epoch': 1}
{'type': 'loss', 'content': 0.0918332189321518, 'timestamp': '2025-10-02 00:30:06.325914', 'step': 10945, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:06.381360', 'step': 10945, 'epoch': 1}
{'type': 'loss', 'content': 0.10953736305236816, 'timestamp': '2025-10-02 00:30:06.383755', 'step': 10946, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:06.438744', 'step': 10946, 'epoch': 1}
{'type': 'loss', 'content': 0.08410240709781647, 'timestamp': '2025-10-02 00:30:06.448070', 'step': 10947, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:06.503046', 'step': 10947, 'epoch': 1}
{'type': 'loss', 'content': 0.03378133848309517, 'timestamp': '2025-10-02 00:30:06.511514', 'step': 10948, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:06.565997', 'step': 10948, 'epoch': 1}
{'type': 'loss', 'content': 0.021222785115242004, 'timestamp': '2025-10-02 00:30:06.575768', 'step': 10949, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:06.630989', 'step': 10949, 'epoch': 1}
{'type': 'loss', 'content': 0.10842623561620712, 'timestamp': '2025-10-02 00:30:06.633704', 'step': 10950, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:06.689266', 'step': 10950, 'epoch': 1}
{'type': 'loss', 'content': 0.11493386328220367, 'timestamp': '2025-10-02 00:30:06.692229', 'step': 10951, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:06.746904', 'step': 10951, 'epoch': 1}
{'type': 'loss', 'content': 0.053410738706588745, 'timestamp': '2025-10-02 00:30:06.753910', 'step': 10952, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:06.808743', 'step': 10952, 'epoch': 1}
{'type': 'loss', 'content': 0.01808003894984722, 'timestamp': '2025-10-02 00:30:06.814790', 'step': 10953, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:06.876031', 'step': 10953, 'epoch': 1}
{'type': 'loss', 'content': 0.0996922105550766, 'timestamp': '2025-10-02 00:30:06.886211', 'step': 10954, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:06.941286', 'step': 10954, 'epoch': 1}
{'type': 'loss', 'content': 0.07837367057800293, 'timestamp': '2025-10-02 00:30:06.943981', 'step': 10955, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:06.998777', 'step': 10955, 'epoch': 1}
{'type': 'loss', 'content': 0.09188975393772125, 'timestamp': '2025-10-02 00:30:07.007265', 'step': 10956, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:07.061167', 'step': 10956, 'epoch': 1}
{'type': 'loss', 'content': 0.043254416435956955, 'timestamp': '2025-10-02 00:30:07.068909', 'step': 10957, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:07.131211', 'step': 10957, 'epoch': 1}
{'type': 'loss', 'content': 0.031846702098846436, 'timestamp': '2025-10-02 00:30:07.141887', 'step': 10958, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:07.195827', 'step': 10958, 'epoch': 1}
{'type': 'loss', 'content': 0.04630497843027115, 'timestamp': '2025-10-02 00:30:07.198448', 'step': 10959, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:07.252710', 'step': 10959, 'epoch': 1}
{'type': 'loss', 'content': 0.16397254168987274, 'timestamp': '2025-10-02 00:30:07.259240', 'step': 10960, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:07.313801', 'step': 10960, 'epoch': 1}
{'type': 'loss', 'content': 0.026530258357524872, 'timestamp': '2025-10-02 00:30:07.316234', 'step': 10961, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:30:07.378877', 'step': 10961, 'epoch': 1}
{'type': 'loss', 'content': 0.03921390324831009, 'timestamp': '2025-10-02 00:30:07.389772', 'step': 10962, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:07.443783', 'step': 10962, 'epoch': 1}
{'type': 'loss', 'content': 0.16375859081745148, 'timestamp': '2025-10-02 00:30:07.446321', 'step': 10963, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:07.501155', 'step': 10963, 'epoch': 1}
{'type': 'loss', 'content': 0.03548545017838478, 'timestamp': '2025-10-02 00:30:07.507049', 'step': 10964, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:07.561568', 'step': 10964, 'epoch': 1}
{'type': 'loss', 'content': 0.05717984214425087, 'timestamp': '2025-10-02 00:30:07.569295', 'step': 10965, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:07.624460', 'step': 10965, 'epoch': 1}
{'type': 'loss', 'content': 0.14429035782814026, 'timestamp': '2025-10-02 00:30:07.626917', 'step': 10966, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:07.683415', 'step': 10966, 'epoch': 1}
{'type': 'loss', 'content': 0.031250808387994766, 'timestamp': '2025-10-02 00:30:07.686584', 'step': 10967, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:07.740593', 'step': 10967, 'epoch': 1}
{'type': 'loss', 'content': 0.12132123857736588, 'timestamp': '2025-10-02 00:30:07.746912', 'step': 10968, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:07.807660', 'step': 10968, 'epoch': 1}
{'type': 'loss', 'content': 0.038120973855257034, 'timestamp': '2025-10-02 00:30:07.819003', 'step': 10969, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:07.874351', 'step': 10969, 'epoch': 1}
{'type': 'loss', 'content': 0.13806650042533875, 'timestamp': '2025-10-02 00:30:07.876806', 'step': 10970, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:07.939139', 'step': 10970, 'epoch': 1}
{'type': 'loss', 'content': 0.03740045800805092, 'timestamp': '2025-10-02 00:30:07.949807', 'step': 10971, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:08.006230', 'step': 10971, 'epoch': 1}
{'type': 'loss', 'content': 0.03711709752678871, 'timestamp': '2025-10-02 00:30:08.016576', 'step': 10972, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:08.071266', 'step': 10972, 'epoch': 1}
{'type': 'loss', 'content': 0.036000724881887436, 'timestamp': '2025-10-02 00:30:08.073741', 'step': 10973, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:08.128054', 'step': 10973, 'epoch': 1}
{'type': 'loss', 'content': 0.06025514751672745, 'timestamp': '2025-10-02 00:30:08.135952', 'step': 10974, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:08.191110', 'step': 10974, 'epoch': 1}
{'type': 'loss', 'content': 0.047496240586042404, 'timestamp': '2025-10-02 00:30:08.200418', 'step': 10975, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:08.254637', 'step': 10975, 'epoch': 1}
{'type': 'loss', 'content': 0.1382455676794052, 'timestamp': '2025-10-02 00:30:08.260849', 'step': 10976, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:08.314910', 'step': 10976, 'epoch': 1}
{'type': 'loss', 'content': 0.040638647973537445, 'timestamp': '2025-10-02 00:30:08.324649', 'step': 10977, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:08.381593', 'step': 10977, 'epoch': 1}
{'type': 'loss', 'content': 0.05148296803236008, 'timestamp': '2025-10-02 00:30:08.390932', 'step': 10978, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:08.446521', 'step': 10978, 'epoch': 1}
{'type': 'loss', 'content': 0.07617124170064926, 'timestamp': '2025-10-02 00:30:08.452683', 'step': 10979, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:08.507273', 'step': 10979, 'epoch': 1}
{'type': 'loss', 'content': 0.127167209982872, 'timestamp': '2025-10-02 00:30:08.514160', 'step': 10980, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:08.567751', 'step': 10980, 'epoch': 1}
{'type': 'loss', 'content': 0.06951320916414261, 'timestamp': '2025-10-02 00:30:08.570455', 'step': 10981, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:08.625778', 'step': 10981, 'epoch': 1}
{'type': 'loss', 'content': 0.05888594314455986, 'timestamp': '2025-10-02 00:30:08.635133', 'step': 10982, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:08.689964', 'step': 10982, 'epoch': 1}
{'type': 'loss', 'content': 0.14238515496253967, 'timestamp': '2025-10-02 00:30:08.692443', 'step': 10983, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:08.747813', 'step': 10983, 'epoch': 1}
{'type': 'loss', 'content': 0.049336180090904236, 'timestamp': '2025-10-02 00:30:08.753622', 'step': 10984, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:08.808240', 'step': 10984, 'epoch': 1}
{'type': 'loss', 'content': 0.0416906476020813, 'timestamp': '2025-10-02 00:30:08.810807', 'step': 10985, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:08.864690', 'step': 10985, 'epoch': 1}
{'type': 'loss', 'content': 0.18238753080368042, 'timestamp': '2025-10-02 00:30:08.867122', 'step': 10986, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:08.921332', 'step': 10986, 'epoch': 1}
{'type': 'loss', 'content': 0.09439855068922043, 'timestamp': '2025-10-02 00:30:08.923611', 'step': 10987, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:08.978288', 'step': 10987, 'epoch': 1}
{'type': 'loss', 'content': 0.11900211870670319, 'timestamp': '2025-10-02 00:30:08.983980', 'step': 10988, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:09.037772', 'step': 10988, 'epoch': 1}
{'type': 'loss', 'content': 0.06393010914325714, 'timestamp': '2025-10-02 00:30:09.047726', 'step': 10989, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:09.103146', 'step': 10989, 'epoch': 1}
{'type': 'loss', 'content': 0.09331361204385757, 'timestamp': '2025-10-02 00:30:09.108257', 'step': 10990, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:09.165127', 'step': 10990, 'epoch': 1}
{'type': 'loss', 'content': 0.1295662671327591, 'timestamp': '2025-10-02 00:30:09.167919', 'step': 10991, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:09.227119', 'step': 10991, 'epoch': 1}
{'type': 'loss', 'content': 0.05385122075676918, 'timestamp': '2025-10-02 00:30:09.238115', 'step': 10992, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:09.292202', 'step': 10992, 'epoch': 1}
{'type': 'loss', 'content': 0.09234822541475296, 'timestamp': '2025-10-02 00:30:09.299931', 'step': 10993, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:30:09.378056', 'step': 10993, 'epoch': 1}
{'type': 'loss', 'content': 0.09088592976331711, 'timestamp': '2025-10-02 00:30:09.391889', 'step': 10994, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:09.454174', 'step': 10994, 'epoch': 1}
{'type': 'loss', 'content': 0.013380615971982479, 'timestamp': '2025-10-02 00:30:09.464663', 'step': 10995, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:09.519045', 'step': 10995, 'epoch': 1}
{'type': 'loss', 'content': 0.16196244955062866, 'timestamp': '2025-10-02 00:30:09.525162', 'step': 10996, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:09.579048', 'step': 10996, 'epoch': 1}
{'type': 'loss', 'content': 0.07211919873952866, 'timestamp': '2025-10-02 00:30:09.581942', 'step': 10997, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:09.636516', 'step': 10997, 'epoch': 1}
{'type': 'loss', 'content': 0.19326941668987274, 'timestamp': '2025-10-02 00:30:09.639358', 'step': 10998, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:09.694236', 'step': 10998, 'epoch': 1}
{'type': 'loss', 'content': 0.04970371350646019, 'timestamp': '2025-10-02 00:30:09.703564', 'step': 10999, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:09.758832', 'step': 10999, 'epoch': 1}
{'type': 'loss', 'content': 0.14771632850170135, 'timestamp': '2025-10-02 00:30:09.766241', 'step': 11000, 'epoch': 1}
{'type': 'info', 'content': 'Checkpoint saved at step 11000', 'timestamp': '2025-10-02 00:30:10.178424', 'step': 11000, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:10.230880', 'step': 11000, 'epoch': 1}
{'type': 'loss', 'content': 0.1284216344356537, 'timestamp': '2025-10-02 00:30:10.233329', 'step': 11001, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:30:10.289254', 'step': 11001, 'epoch': 1}
{'type': 'loss', 'content': 0.36868003010749817, 'timestamp': '2025-10-02 00:30:10.291657', 'step': 11002, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:10.347300', 'step': 11002, 'epoch': 1}
{'type': 'loss', 'content': 0.039615657180547714, 'timestamp': '2025-10-02 00:30:10.349978', 'step': 11003, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:10.403806', 'step': 11003, 'epoch': 1}
{'type': 'loss', 'content': 0.1705995500087738, 'timestamp': '2025-10-02 00:30:10.411034', 'step': 11004, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:10.465296', 'step': 11004, 'epoch': 1}
{'type': 'loss', 'content': 0.17356276512145996, 'timestamp': '2025-10-02 00:30:10.468155', 'step': 11005, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:10.524560', 'step': 11005, 'epoch': 1}
{'type': 'loss', 'content': 0.018789704889059067, 'timestamp': '2025-10-02 00:30:10.533966', 'step': 11006, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:10.590072', 'step': 11006, 'epoch': 1}
{'type': 'loss', 'content': 0.1561530977487564, 'timestamp': '2025-10-02 00:30:10.592646', 'step': 11007, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:30:10.656506', 'step': 11007, 'epoch': 1}
{'type': 'loss', 'content': 0.05649414658546448, 'timestamp': '2025-10-02 00:30:10.668154', 'step': 11008, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:10.723590', 'step': 11008, 'epoch': 1}
{'type': 'loss', 'content': 0.052004411816596985, 'timestamp': '2025-10-02 00:30:10.725985', 'step': 11009, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:10.781352', 'step': 11009, 'epoch': 1}
{'type': 'loss', 'content': 0.03890928253531456, 'timestamp': '2025-10-02 00:30:10.789462', 'step': 11010, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:10.844320', 'step': 11010, 'epoch': 1}
{'type': 'loss', 'content': 0.04033181071281433, 'timestamp': '2025-10-02 00:30:10.850523', 'step': 11011, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:10.912036', 'step': 11011, 'epoch': 1}
{'type': 'loss', 'content': 0.07201734930276871, 'timestamp': '2025-10-02 00:30:10.918874', 'step': 11012, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:10.974137', 'step': 11012, 'epoch': 1}
{'type': 'loss', 'content': 0.2031320184469223, 'timestamp': '2025-10-02 00:30:10.980259', 'step': 11013, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:11.036337', 'step': 11013, 'epoch': 1}
{'type': 'loss', 'content': 0.03658868372440338, 'timestamp': '2025-10-02 00:30:11.044155', 'step': 11014, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:11.099781', 'step': 11014, 'epoch': 1}
{'type': 'loss', 'content': 0.16014213860034943, 'timestamp': '2025-10-02 00:30:11.102546', 'step': 11015, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:11.156976', 'step': 11015, 'epoch': 1}
{'type': 'loss', 'content': 0.055436037480831146, 'timestamp': '2025-10-02 00:30:11.162899', 'step': 11016, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:11.217889', 'step': 11016, 'epoch': 1}
{'type': 'loss', 'content': 0.06431582570075989, 'timestamp': '2025-10-02 00:30:11.224154', 'step': 11017, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:30:11.278925', 'step': 11017, 'epoch': 1}
{'type': 'loss', 'content': 0.07567382603883743, 'timestamp': '2025-10-02 00:30:11.281999', 'step': 11018, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:11.339357', 'step': 11018, 'epoch': 1}
{'type': 'loss', 'content': 0.06775545328855515, 'timestamp': '2025-10-02 00:30:11.348897', 'step': 11019, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:11.414874', 'step': 11019, 'epoch': 1}
{'type': 'loss', 'content': 0.015596605837345123, 'timestamp': '2025-10-02 00:30:11.426321', 'step': 11020, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:11.481714', 'step': 11020, 'epoch': 1}
{'type': 'loss', 'content': 0.06801753491163254, 'timestamp': '2025-10-02 00:30:11.487958', 'step': 11021, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:11.544916', 'step': 11021, 'epoch': 1}
{'type': 'loss', 'content': 0.14421573281288147, 'timestamp': '2025-10-02 00:30:11.547829', 'step': 11022, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:11.607233', 'step': 11022, 'epoch': 1}
{'type': 'loss', 'content': 0.03762809932231903, 'timestamp': '2025-10-02 00:30:11.617381', 'step': 11023, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:11.673152', 'step': 11023, 'epoch': 1}
{'type': 'loss', 'content': 0.14399118721485138, 'timestamp': '2025-10-02 00:30:11.679301', 'step': 11024, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:11.733051', 'step': 11024, 'epoch': 1}
{'type': 'loss', 'content': 0.08297014236450195, 'timestamp': '2025-10-02 00:30:11.735384', 'step': 11025, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:11.789852', 'step': 11025, 'epoch': 1}
{'type': 'loss', 'content': 0.12296608835458755, 'timestamp': '2025-10-02 00:30:11.792966', 'step': 11026, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:11.848103', 'step': 11026, 'epoch': 1}
{'type': 'loss', 'content': 0.219059556722641, 'timestamp': '2025-10-02 00:30:11.850742', 'step': 11027, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:11.905604', 'step': 11027, 'epoch': 1}
{'type': 'loss', 'content': 0.11643575131893158, 'timestamp': '2025-10-02 00:30:11.911761', 'step': 11028, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:11.965779', 'step': 11028, 'epoch': 1}
{'type': 'loss', 'content': 0.040547315031290054, 'timestamp': '2025-10-02 00:30:11.968123', 'step': 11029, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:12.022160', 'step': 11029, 'epoch': 1}
{'type': 'loss', 'content': 0.08439089357852936, 'timestamp': '2025-10-02 00:30:12.029980', 'step': 11030, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:12.084326', 'step': 11030, 'epoch': 1}
{'type': 'loss', 'content': 0.09298034012317657, 'timestamp': '2025-10-02 00:30:12.086642', 'step': 11031, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:12.140779', 'step': 11031, 'epoch': 1}
{'type': 'loss', 'content': 0.04969298094511032, 'timestamp': '2025-10-02 00:30:12.146764', 'step': 11032, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:12.202782', 'step': 11032, 'epoch': 1}
{'type': 'loss', 'content': 0.15796005725860596, 'timestamp': '2025-10-02 00:30:12.205782', 'step': 11033, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:12.262058', 'step': 11033, 'epoch': 1}
{'type': 'loss', 'content': 0.07726732641458511, 'timestamp': '2025-10-02 00:30:12.264851', 'step': 11034, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:12.319592', 'step': 11034, 'epoch': 1}
{'type': 'loss', 'content': 0.09871815890073776, 'timestamp': '2025-10-02 00:30:12.322180', 'step': 11035, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:12.377602', 'step': 11035, 'epoch': 1}
{'type': 'loss', 'content': 0.03606712818145752, 'timestamp': '2025-10-02 00:30:12.386161', 'step': 11036, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:30:12.453962', 'step': 11036, 'epoch': 1}
{'type': 'loss', 'content': 0.05257223919034004, 'timestamp': '2025-10-02 00:30:12.467381', 'step': 11037, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:12.522681', 'step': 11037, 'epoch': 1}
{'type': 'loss', 'content': 0.0354447215795517, 'timestamp': '2025-10-02 00:30:12.530498', 'step': 11038, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:12.585601', 'step': 11038, 'epoch': 1}
{'type': 'loss', 'content': 0.10445325821638107, 'timestamp': '2025-10-02 00:30:12.588350', 'step': 11039, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:12.644209', 'step': 11039, 'epoch': 1}
{'type': 'loss', 'content': 0.06840971112251282, 'timestamp': '2025-10-02 00:30:12.651543', 'step': 11040, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:12.706733', 'step': 11040, 'epoch': 1}
{'type': 'loss', 'content': 0.02511690929532051, 'timestamp': '2025-10-02 00:30:12.709153', 'step': 11041, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:12.763672', 'step': 11041, 'epoch': 1}
{'type': 'loss', 'content': 0.13651974499225616, 'timestamp': '2025-10-02 00:30:12.766214', 'step': 11042, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:12.820015', 'step': 11042, 'epoch': 1}
{'type': 'loss', 'content': 0.16273432970046997, 'timestamp': '2025-10-02 00:30:12.822816', 'step': 11043, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:12.885153', 'step': 11043, 'epoch': 1}
{'type': 'loss', 'content': 0.04362824931740761, 'timestamp': '2025-10-02 00:30:12.896405', 'step': 11044, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:12.949646', 'step': 11044, 'epoch': 1}
{'type': 'loss', 'content': 0.13693353533744812, 'timestamp': '2025-10-02 00:30:12.952240', 'step': 11045, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:13.007380', 'step': 11045, 'epoch': 1}
{'type': 'loss', 'content': 0.08417940884828568, 'timestamp': '2025-10-02 00:30:13.009893', 'step': 11046, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:13.069115', 'step': 11046, 'epoch': 1}
{'type': 'loss', 'content': 0.10556270182132721, 'timestamp': '2025-10-02 00:30:13.079305', 'step': 11047, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:13.134446', 'step': 11047, 'epoch': 1}
{'type': 'loss', 'content': 0.14598561823368073, 'timestamp': '2025-10-02 00:30:13.140975', 'step': 11048, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:30:13.208800', 'step': 11048, 'epoch': 1}
{'type': 'loss', 'content': 0.049560826271772385, 'timestamp': '2025-10-02 00:30:13.222163', 'step': 11049, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:13.277122', 'step': 11049, 'epoch': 1}
{'type': 'loss', 'content': 0.07305499911308289, 'timestamp': '2025-10-02 00:30:13.286448', 'step': 11050, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:13.344279', 'step': 11050, 'epoch': 1}
{'type': 'loss', 'content': 0.07258877903223038, 'timestamp': '2025-10-02 00:30:13.346814', 'step': 11051, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:13.401489', 'step': 11051, 'epoch': 1}
{'type': 'loss', 'content': 0.06897421181201935, 'timestamp': '2025-10-02 00:30:13.409857', 'step': 11052, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:13.463592', 'step': 11052, 'epoch': 1}
{'type': 'loss', 'content': 0.08864951878786087, 'timestamp': '2025-10-02 00:30:13.474394', 'step': 11053, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:13.548716', 'step': 11053, 'epoch': 1}
{'type': 'loss', 'content': 0.09127142280340195, 'timestamp': '2025-10-02 00:30:13.574500', 'step': 11054, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:13.656362', 'step': 11054, 'epoch': 1}
{'type': 'loss', 'content': 0.1724306344985962, 'timestamp': '2025-10-02 00:30:13.664292', 'step': 11055, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:13.755793', 'step': 11055, 'epoch': 1}
{'type': 'loss', 'content': 0.14845681190490723, 'timestamp': '2025-10-02 00:30:13.771146', 'step': 11056, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:13.842589', 'step': 11056, 'epoch': 1}
{'type': 'loss', 'content': 0.14516596496105194, 'timestamp': '2025-10-02 00:30:13.856490', 'step': 11057, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:13.937010', 'step': 11057, 'epoch': 1}
{'type': 'loss', 'content': 0.08227239549160004, 'timestamp': '2025-10-02 00:30:13.943056', 'step': 11058, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:14.044204', 'step': 11058, 'epoch': 1}
{'type': 'loss', 'content': 0.113970547914505, 'timestamp': '2025-10-02 00:30:14.051948', 'step': 11059, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:14.149898', 'step': 11059, 'epoch': 1}
{'type': 'loss', 'content': 0.1799849271774292, 'timestamp': '2025-10-02 00:30:14.170914', 'step': 11060, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:14.263651', 'step': 11060, 'epoch': 1}
{'type': 'loss', 'content': 0.11425930261611938, 'timestamp': '2025-10-02 00:30:14.275302', 'step': 11061, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:14.353499', 'step': 11061, 'epoch': 1}
{'type': 'loss', 'content': 0.06226148456335068, 'timestamp': '2025-10-02 00:30:14.362081', 'step': 11062, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:14.436009', 'step': 11062, 'epoch': 1}
{'type': 'loss', 'content': 0.07037453353404999, 'timestamp': '2025-10-02 00:30:14.441644', 'step': 11063, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:14.531635', 'step': 11063, 'epoch': 1}
{'type': 'loss', 'content': 0.05334680899977684, 'timestamp': '2025-10-02 00:30:14.546993', 'step': 11064, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:14.624132', 'step': 11064, 'epoch': 1}
{'type': 'loss', 'content': 0.10467807948589325, 'timestamp': '2025-10-02 00:30:14.630083', 'step': 11065, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:14.710907', 'step': 11065, 'epoch': 1}
{'type': 'loss', 'content': 0.02579977735877037, 'timestamp': '2025-10-02 00:30:14.720249', 'step': 11066, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:14.774595', 'step': 11066, 'epoch': 1}
{'type': 'loss', 'content': 0.1894696205854416, 'timestamp': '2025-10-02 00:30:14.777107', 'step': 11067, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:14.832287', 'step': 11067, 'epoch': 1}
{'type': 'loss', 'content': 0.04220963642001152, 'timestamp': '2025-10-02 00:30:14.838252', 'step': 11068, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:14.892435', 'step': 11068, 'epoch': 1}
{'type': 'loss', 'content': 0.09802953898906708, 'timestamp': '2025-10-02 00:30:14.902721', 'step': 11069, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:14.957657', 'step': 11069, 'epoch': 1}
{'type': 'loss', 'content': 0.08948220312595367, 'timestamp': '2025-10-02 00:30:14.960213', 'step': 11070, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:15.014688', 'step': 11070, 'epoch': 1}
{'type': 'loss', 'content': 0.07188263535499573, 'timestamp': '2025-10-02 00:30:15.016960', 'step': 11071, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:15.075107', 'step': 11071, 'epoch': 1}
{'type': 'loss', 'content': 0.10911683738231659, 'timestamp': '2025-10-02 00:30:15.082071', 'step': 11072, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:15.137502', 'step': 11072, 'epoch': 1}
{'type': 'loss', 'content': 0.14392343163490295, 'timestamp': '2025-10-02 00:30:15.140095', 'step': 11073, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:15.194397', 'step': 11073, 'epoch': 1}
{'type': 'loss', 'content': 0.06920834630727768, 'timestamp': '2025-10-02 00:30:15.196749', 'step': 11074, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:15.251092', 'step': 11074, 'epoch': 1}
{'type': 'loss', 'content': 0.12222455441951752, 'timestamp': '2025-10-02 00:30:15.253483', 'step': 11075, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:15.309133', 'step': 11075, 'epoch': 1}
{'type': 'loss', 'content': 0.06462708860635757, 'timestamp': '2025-10-02 00:30:15.315051', 'step': 11076, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:30:15.383997', 'step': 11076, 'epoch': 1}
{'type': 'loss', 'content': 0.02510463260114193, 'timestamp': '2025-10-02 00:30:15.397563', 'step': 11077, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:15.466449', 'step': 11077, 'epoch': 1}
{'type': 'loss', 'content': 0.20616118609905243, 'timestamp': '2025-10-02 00:30:15.469061', 'step': 11078, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:15.523923', 'step': 11078, 'epoch': 1}
{'type': 'loss', 'content': 0.08422403782606125, 'timestamp': '2025-10-02 00:30:15.526268', 'step': 11079, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:15.580622', 'step': 11079, 'epoch': 1}
{'type': 'loss', 'content': 0.014428185299038887, 'timestamp': '2025-10-02 00:30:15.586518', 'step': 11080, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:15.640354', 'step': 11080, 'epoch': 1}
{'type': 'loss', 'content': 0.10001326352357864, 'timestamp': '2025-10-02 00:30:15.642796', 'step': 11081, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:15.697114', 'step': 11081, 'epoch': 1}
{'type': 'loss', 'content': 0.09764552861452103, 'timestamp': '2025-10-02 00:30:15.699860', 'step': 11082, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:15.758981', 'step': 11082, 'epoch': 1}
{'type': 'loss', 'content': 0.10514724254608154, 'timestamp': '2025-10-02 00:30:15.769181', 'step': 11083, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:15.823751', 'step': 11083, 'epoch': 1}
{'type': 'loss', 'content': 0.05993938818573952, 'timestamp': '2025-10-02 00:30:15.832282', 'step': 11084, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:15.886077', 'step': 11084, 'epoch': 1}
{'type': 'loss', 'content': 0.10487545281648636, 'timestamp': '2025-10-02 00:30:15.888619', 'step': 11085, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:15.944048', 'step': 11085, 'epoch': 1}
{'type': 'loss', 'content': 0.056797437369823456, 'timestamp': '2025-10-02 00:30:15.950292', 'step': 11086, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:16.005147', 'step': 11086, 'epoch': 1}
{'type': 'loss', 'content': 0.028305526822805405, 'timestamp': '2025-10-02 00:30:16.007764', 'step': 11087, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:16.061708', 'step': 11087, 'epoch': 1}
{'type': 'loss', 'content': 0.13632813096046448, 'timestamp': '2025-10-02 00:30:16.067713', 'step': 11088, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:16.124180', 'step': 11088, 'epoch': 1}
{'type': 'loss', 'content': 0.026580313220620155, 'timestamp': '2025-10-02 00:30:16.134425', 'step': 11089, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:16.191067', 'step': 11089, 'epoch': 1}
{'type': 'loss', 'content': 0.04047857224941254, 'timestamp': '2025-10-02 00:30:16.200616', 'step': 11090, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:16.255377', 'step': 11090, 'epoch': 1}
{'type': 'loss', 'content': 0.04471656680107117, 'timestamp': '2025-10-02 00:30:16.261632', 'step': 11091, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:16.321663', 'step': 11091, 'epoch': 1}
{'type': 'loss', 'content': 0.13027650117874146, 'timestamp': '2025-10-02 00:30:16.332623', 'step': 11092, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:16.386264', 'step': 11092, 'epoch': 1}
{'type': 'loss', 'content': 0.13488242030143738, 'timestamp': '2025-10-02 00:30:16.388977', 'step': 11093, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:16.443226', 'step': 11093, 'epoch': 1}
{'type': 'loss', 'content': 0.12648507952690125, 'timestamp': '2025-10-02 00:30:16.445915', 'step': 11094, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:30:16.514053', 'step': 11094, 'epoch': 1}
{'type': 'loss', 'content': 0.008635654114186764, 'timestamp': '2025-10-02 00:30:16.526043', 'step': 11095, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:16.580999', 'step': 11095, 'epoch': 1}
{'type': 'loss', 'content': 0.10915689170360565, 'timestamp': '2025-10-02 00:30:16.587078', 'step': 11096, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:16.641744', 'step': 11096, 'epoch': 1}
{'type': 'loss', 'content': 0.06143372505903244, 'timestamp': '2025-10-02 00:30:16.644227', 'step': 11097, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:16.698760', 'step': 11097, 'epoch': 1}
{'type': 'loss', 'content': 0.04247557744383812, 'timestamp': '2025-10-02 00:30:16.701121', 'step': 11098, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:16.755809', 'step': 11098, 'epoch': 1}
{'type': 'loss', 'content': 0.008952263742685318, 'timestamp': '2025-10-02 00:30:16.759663', 'step': 11099, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:16.815661', 'step': 11099, 'epoch': 1}
{'type': 'loss', 'content': 0.05725063756108284, 'timestamp': '2025-10-02 00:30:16.825982', 'step': 11100, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:16.880094', 'step': 11100, 'epoch': 1}
{'type': 'loss', 'content': 0.19012506306171417, 'timestamp': '2025-10-02 00:30:16.882460', 'step': 11101, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:16.937965', 'step': 11101, 'epoch': 1}
{'type': 'loss', 'content': 0.01563214138150215, 'timestamp': '2025-10-02 00:30:16.945911', 'step': 11102, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:17.009237', 'step': 11102, 'epoch': 1}
{'type': 'loss', 'content': 0.021467745304107666, 'timestamp': '2025-10-02 00:30:17.019899', 'step': 11103, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:17.074828', 'step': 11103, 'epoch': 1}
{'type': 'loss', 'content': 0.055336035788059235, 'timestamp': '2025-10-02 00:30:17.081713', 'step': 11104, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:17.136423', 'step': 11104, 'epoch': 1}
{'type': 'loss', 'content': 0.055128905922174454, 'timestamp': '2025-10-02 00:30:17.146686', 'step': 11105, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:17.200895', 'step': 11105, 'epoch': 1}
{'type': 'loss', 'content': 0.13732193410396576, 'timestamp': '2025-10-02 00:30:17.203283', 'step': 11106, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:17.258033', 'step': 11106, 'epoch': 1}
{'type': 'loss', 'content': 0.02900313027203083, 'timestamp': '2025-10-02 00:30:17.260418', 'step': 11107, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:17.314491', 'step': 11107, 'epoch': 1}
{'type': 'loss', 'content': 0.02115832455456257, 'timestamp': '2025-10-02 00:30:17.321356', 'step': 11108, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:17.374995', 'step': 11108, 'epoch': 1}
{'type': 'loss', 'content': 0.11376141756772995, 'timestamp': '2025-10-02 00:30:17.377576', 'step': 11109, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:17.431843', 'step': 11109, 'epoch': 1}
{'type': 'loss', 'content': 0.06659281998872757, 'timestamp': '2025-10-02 00:30:17.434617', 'step': 11110, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:17.489572', 'step': 11110, 'epoch': 1}
{'type': 'loss', 'content': 0.18132562935352325, 'timestamp': '2025-10-02 00:30:17.492294', 'step': 11111, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:17.546782', 'step': 11111, 'epoch': 1}
{'type': 'loss', 'content': 0.0771612748503685, 'timestamp': '2025-10-02 00:30:17.552777', 'step': 11112, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:17.607199', 'step': 11112, 'epoch': 1}
{'type': 'loss', 'content': 0.08329460024833679, 'timestamp': '2025-10-02 00:30:17.609569', 'step': 11113, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:17.663928', 'step': 11113, 'epoch': 1}
{'type': 'loss', 'content': 0.05959773063659668, 'timestamp': '2025-10-02 00:30:17.666540', 'step': 11114, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:17.721267', 'step': 11114, 'epoch': 1}
{'type': 'loss', 'content': 0.11662176996469498, 'timestamp': '2025-10-02 00:30:17.723473', 'step': 11115, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:17.778685', 'step': 11115, 'epoch': 1}
{'type': 'loss', 'content': 0.08221248537302017, 'timestamp': '2025-10-02 00:30:17.784569', 'step': 11116, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:17.838483', 'step': 11116, 'epoch': 1}
{'type': 'loss', 'content': 0.09066268801689148, 'timestamp': '2025-10-02 00:30:17.848675', 'step': 11117, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:17.903589', 'step': 11117, 'epoch': 1}
{'type': 'loss', 'content': 0.0957050547003746, 'timestamp': '2025-10-02 00:30:17.909824', 'step': 11118, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:17.965604', 'step': 11118, 'epoch': 1}
{'type': 'loss', 'content': 0.020872656255960464, 'timestamp': '2025-10-02 00:30:17.968720', 'step': 11119, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:18.028460', 'step': 11119, 'epoch': 1}
{'type': 'loss', 'content': 0.048951759934425354, 'timestamp': '2025-10-02 00:30:18.034715', 'step': 11120, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:18.094582', 'step': 11120, 'epoch': 1}
{'type': 'loss', 'content': 0.1471557766199112, 'timestamp': '2025-10-02 00:30:18.097670', 'step': 11121, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:18.169587', 'step': 11121, 'epoch': 1}
{'type': 'loss', 'content': 0.05207519605755806, 'timestamp': '2025-10-02 00:30:18.180040', 'step': 11122, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:18.255428', 'step': 11122, 'epoch': 1}
{'type': 'loss', 'content': 0.08938131481409073, 'timestamp': '2025-10-02 00:30:18.257863', 'step': 11123, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:18.319106', 'step': 11123, 'epoch': 1}
{'type': 'loss', 'content': 0.03548053279519081, 'timestamp': '2025-10-02 00:30:18.330099', 'step': 11124, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:18.412247', 'step': 11124, 'epoch': 1}
{'type': 'loss', 'content': 0.03469790890812874, 'timestamp': '2025-10-02 00:30:18.420148', 'step': 11125, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:18.491802', 'step': 11125, 'epoch': 1}
{'type': 'loss', 'content': 0.18705523014068604, 'timestamp': '2025-10-02 00:30:18.494829', 'step': 11126, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:18.567414', 'step': 11126, 'epoch': 1}
{'type': 'loss', 'content': 0.10735796391963959, 'timestamp': '2025-10-02 00:30:18.569412', 'step': 11127, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:18.633496', 'step': 11127, 'epoch': 1}
{'type': 'loss', 'content': 0.018768051639199257, 'timestamp': '2025-10-02 00:30:18.639886', 'step': 11128, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:18.704938', 'step': 11128, 'epoch': 1}
{'type': 'loss', 'content': 0.10344604402780533, 'timestamp': '2025-10-02 00:30:18.708948', 'step': 11129, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:18.788966', 'step': 11129, 'epoch': 1}
{'type': 'loss', 'content': 0.01967354491353035, 'timestamp': '2025-10-02 00:30:18.805231', 'step': 11130, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:18.868154', 'step': 11130, 'epoch': 1}
{'type': 'loss', 'content': 0.06689170002937317, 'timestamp': '2025-10-02 00:30:18.871397', 'step': 11131, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:18.928565', 'step': 11131, 'epoch': 1}
{'type': 'loss', 'content': 0.1236390620470047, 'timestamp': '2025-10-02 00:30:18.935276', 'step': 11132, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:19.015596', 'step': 11132, 'epoch': 1}
{'type': 'loss', 'content': 0.11735785752534866, 'timestamp': '2025-10-02 00:30:19.018316', 'step': 11133, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:19.089978', 'step': 11133, 'epoch': 1}
{'type': 'loss', 'content': 0.15426906943321228, 'timestamp': '2025-10-02 00:30:19.093254', 'step': 11134, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:19.162140', 'step': 11134, 'epoch': 1}
{'type': 'loss', 'content': 0.06405294686555862, 'timestamp': '2025-10-02 00:30:19.164604', 'step': 11135, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:19.218997', 'step': 11135, 'epoch': 1}
{'type': 'loss', 'content': 0.05235746130347252, 'timestamp': '2025-10-02 00:30:19.225154', 'step': 11136, 'epoch': 1}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:30:49.121729', 'step': 11136, 'epoch': 1}
{'type': 'pplx', 'content': 97.00214101769006, 'timestamp': '2025-10-02 00:30:49.126477', 'step': 11136, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:49.192266', 'step': 11136, 'epoch': 1}
{'type': 'loss', 'content': 0.07125414907932281, 'timestamp': '2025-10-02 00:30:49.195715', 'step': 11137, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:49.264341', 'step': 11137, 'epoch': 1}
{'type': 'loss', 'content': 0.12879140675067902, 'timestamp': '2025-10-02 00:30:49.276104', 'step': 11138, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:49.378612', 'step': 11138, 'epoch': 1}
{'type': 'loss', 'content': 0.025124842301011086, 'timestamp': '2025-10-02 00:30:49.390990', 'step': 11139, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:30:49.493216', 'step': 11139, 'epoch': 1}
{'type': 'loss', 'content': 0.026528673246502876, 'timestamp': '2025-10-02 00:30:49.504839', 'step': 11140, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:49.585656', 'step': 11140, 'epoch': 1}
{'type': 'loss', 'content': 0.131140798330307, 'timestamp': '2025-10-02 00:30:49.598565', 'step': 11141, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:49.659095', 'step': 11141, 'epoch': 1}
{'type': 'loss', 'content': 0.03442030027508736, 'timestamp': '2025-10-02 00:30:49.666735', 'step': 11142, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:49.751795', 'step': 11142, 'epoch': 1}
{'type': 'loss', 'content': 0.05674169957637787, 'timestamp': '2025-10-02 00:30:49.761221', 'step': 11143, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:49.835844', 'step': 11143, 'epoch': 1}
{'type': 'loss', 'content': 0.0791810154914856, 'timestamp': '2025-10-02 00:30:49.843956', 'step': 11144, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:49.913465', 'step': 11144, 'epoch': 1}
{'type': 'loss', 'content': 0.03584826737642288, 'timestamp': '2025-10-02 00:30:49.917402', 'step': 11145, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:49.978711', 'step': 11145, 'epoch': 1}
{'type': 'loss', 'content': 0.053046274930238724, 'timestamp': '2025-10-02 00:30:49.984069', 'step': 11146, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:50.070099', 'step': 11146, 'epoch': 1}
{'type': 'loss', 'content': 0.09070394188165665, 'timestamp': '2025-10-02 00:30:50.076222', 'step': 11147, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 112], 'flops': 560003483248.0}, 'timestamp': '2025-10-02 00:30:50.168792', 'step': 11147, 'epoch': 1}
{'type': 'loss', 'content': 0.06682749092578888, 'timestamp': '2025-10-02 00:30:50.175322', 'step': 11148, 'epoch': 1}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:50.260075', 'step': 11148, 'epoch': 2}
{'type': 'loss', 'content': 0.04997444525361061, 'timestamp': '2025-10-02 00:30:50.269562', 'step': 11149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:50.335815', 'step': 11149, 'epoch': 2}
{'type': 'loss', 'content': 0.08518907427787781, 'timestamp': '2025-10-02 00:30:50.339053', 'step': 11150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:50.421775', 'step': 11150, 'epoch': 2}
{'type': 'loss', 'content': 0.19802062213420868, 'timestamp': '2025-10-02 00:30:50.426490', 'step': 11151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:50.485130', 'step': 11151, 'epoch': 2}
{'type': 'loss', 'content': 0.06267242878675461, 'timestamp': '2025-10-02 00:30:50.492397', 'step': 11152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:50.551923', 'step': 11152, 'epoch': 2}
{'type': 'loss', 'content': 0.06411713361740112, 'timestamp': '2025-10-02 00:30:50.562233', 'step': 11153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:50.632734', 'step': 11153, 'epoch': 2}
{'type': 'loss', 'content': 0.0690455362200737, 'timestamp': '2025-10-02 00:30:50.636405', 'step': 11154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:50.693714', 'step': 11154, 'epoch': 2}
{'type': 'loss', 'content': 0.030663544312119484, 'timestamp': '2025-10-02 00:30:50.706222', 'step': 11155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:50.791247', 'step': 11155, 'epoch': 2}
{'type': 'loss', 'content': 0.046243563294410706, 'timestamp': '2025-10-02 00:30:50.805788', 'step': 11156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:50.861843', 'step': 11156, 'epoch': 2}
{'type': 'loss', 'content': 0.11299216747283936, 'timestamp': '2025-10-02 00:30:50.866793', 'step': 11157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:50.927253', 'step': 11157, 'epoch': 2}
{'type': 'loss', 'content': 0.10455973446369171, 'timestamp': '2025-10-02 00:30:50.951205', 'step': 11158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:51.039183', 'step': 11158, 'epoch': 2}
{'type': 'loss', 'content': 0.10301429033279419, 'timestamp': '2025-10-02 00:30:51.042771', 'step': 11159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:51.101415', 'step': 11159, 'epoch': 2}
{'type': 'loss', 'content': 0.09793742746114731, 'timestamp': '2025-10-02 00:30:51.109380', 'step': 11160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:51.191370', 'step': 11160, 'epoch': 2}
{'type': 'loss', 'content': 0.07155022025108337, 'timestamp': '2025-10-02 00:30:51.207554', 'step': 11161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:51.306840', 'step': 11161, 'epoch': 2}
{'type': 'loss', 'content': 0.01921709254384041, 'timestamp': '2025-10-02 00:30:51.317311', 'step': 11162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:51.378379', 'step': 11162, 'epoch': 2}
{'type': 'loss', 'content': 0.07292574644088745, 'timestamp': '2025-10-02 00:30:51.391663', 'step': 11163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:51.448877', 'step': 11163, 'epoch': 2}
{'type': 'loss', 'content': 0.16647319495677948, 'timestamp': '2025-10-02 00:30:51.457005', 'step': 11164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:51.514570', 'step': 11164, 'epoch': 2}
{'type': 'loss', 'content': 0.047244589775800705, 'timestamp': '2025-10-02 00:30:51.518505', 'step': 11165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:51.602267', 'step': 11165, 'epoch': 2}
{'type': 'loss', 'content': 0.02861448936164379, 'timestamp': '2025-10-02 00:30:51.605928', 'step': 11166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:51.675055', 'step': 11166, 'epoch': 2}
{'type': 'loss', 'content': 0.03127007558941841, 'timestamp': '2025-10-02 00:30:51.688117', 'step': 11167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:51.757000', 'step': 11167, 'epoch': 2}
{'type': 'loss', 'content': 0.10031923651695251, 'timestamp': '2025-10-02 00:30:51.763952', 'step': 11168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:51.820880', 'step': 11168, 'epoch': 2}
{'type': 'loss', 'content': 0.11176405102014542, 'timestamp': '2025-10-02 00:30:51.826050', 'step': 11169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:51.893490', 'step': 11169, 'epoch': 2}
{'type': 'loss', 'content': 0.0907888412475586, 'timestamp': '2025-10-02 00:30:51.896679', 'step': 11170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:51.991393', 'step': 11170, 'epoch': 2}
{'type': 'loss', 'content': 0.030063314363360405, 'timestamp': '2025-10-02 00:30:52.000700', 'step': 11171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:52.075890', 'step': 11171, 'epoch': 2}
{'type': 'loss', 'content': 0.017521340399980545, 'timestamp': '2025-10-02 00:30:52.084310', 'step': 11172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:52.167122', 'step': 11172, 'epoch': 2}
{'type': 'loss', 'content': 0.07529210299253464, 'timestamp': '2025-10-02 00:30:52.183631', 'step': 11173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:52.245868', 'step': 11173, 'epoch': 2}
{'type': 'loss', 'content': 0.013914266601204872, 'timestamp': '2025-10-02 00:30:52.258632', 'step': 11174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:52.328417', 'step': 11174, 'epoch': 2}
{'type': 'loss', 'content': 0.1311662495136261, 'timestamp': '2025-10-02 00:30:52.342851', 'step': 11175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:52.421591', 'step': 11175, 'epoch': 2}
{'type': 'loss', 'content': 0.22741365432739258, 'timestamp': '2025-10-02 00:30:52.442264', 'step': 11176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:52.512518', 'step': 11176, 'epoch': 2}
{'type': 'loss', 'content': 0.03887411206960678, 'timestamp': '2025-10-02 00:30:52.528374', 'step': 11177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:52.609220', 'step': 11177, 'epoch': 2}
{'type': 'loss', 'content': 0.07244156301021576, 'timestamp': '2025-10-02 00:30:52.614095', 'step': 11178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:52.700652', 'step': 11178, 'epoch': 2}
{'type': 'loss', 'content': 0.08760451525449753, 'timestamp': '2025-10-02 00:30:52.718229', 'step': 11179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:52.820340', 'step': 11179, 'epoch': 2}
{'type': 'loss', 'content': 0.060417529195547104, 'timestamp': '2025-10-02 00:30:52.837157', 'step': 11180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:52.932462', 'step': 11180, 'epoch': 2}
{'type': 'loss', 'content': 0.06957435607910156, 'timestamp': '2025-10-02 00:30:52.938999', 'step': 11181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:53.021648', 'step': 11181, 'epoch': 2}
{'type': 'loss', 'content': 0.12193647772073746, 'timestamp': '2025-10-02 00:30:53.045747', 'step': 11182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:30:53.128214', 'step': 11182, 'epoch': 2}
{'type': 'loss', 'content': 0.08117961138486862, 'timestamp': '2025-10-02 00:30:53.134367', 'step': 11183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:53.197353', 'step': 11183, 'epoch': 2}
{'type': 'loss', 'content': 0.1391829550266266, 'timestamp': '2025-10-02 00:30:53.222886', 'step': 11184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:53.310934', 'step': 11184, 'epoch': 2}
{'type': 'loss', 'content': 0.0839373916387558, 'timestamp': '2025-10-02 00:30:53.315634', 'step': 11185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:53.377670', 'step': 11185, 'epoch': 2}
{'type': 'loss', 'content': 0.0526776947081089, 'timestamp': '2025-10-02 00:30:53.381708', 'step': 11186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:30:53.497606', 'step': 11186, 'epoch': 2}
{'type': 'loss', 'content': 0.013489939272403717, 'timestamp': '2025-10-02 00:30:53.509837', 'step': 11187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:53.586141', 'step': 11187, 'epoch': 2}
{'type': 'loss', 'content': 0.07671892642974854, 'timestamp': '2025-10-02 00:30:53.607328', 'step': 11188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:53.680585', 'step': 11188, 'epoch': 2}
{'type': 'loss', 'content': 0.054348528385162354, 'timestamp': '2025-10-02 00:30:53.684874', 'step': 11189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:53.748602', 'step': 11189, 'epoch': 2}
{'type': 'loss', 'content': 0.140558123588562, 'timestamp': '2025-10-02 00:30:53.753289', 'step': 11190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:30:53.837394', 'step': 11190, 'epoch': 2}
{'type': 'loss', 'content': 0.04010758176445961, 'timestamp': '2025-10-02 00:30:53.849317', 'step': 11191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:53.923004', 'step': 11191, 'epoch': 2}
{'type': 'loss', 'content': 0.10645388066768646, 'timestamp': '2025-10-02 00:30:53.930038', 'step': 11192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:53.989441', 'step': 11192, 'epoch': 2}
{'type': 'loss', 'content': 0.07520942389965057, 'timestamp': '2025-10-02 00:30:53.993770', 'step': 11193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:54.051652', 'step': 11193, 'epoch': 2}
{'type': 'loss', 'content': 0.07874754816293716, 'timestamp': '2025-10-02 00:30:54.059535', 'step': 11194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:54.118104', 'step': 11194, 'epoch': 2}
{'type': 'loss', 'content': 0.0583820603787899, 'timestamp': '2025-10-02 00:30:54.121224', 'step': 11195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:54.178942', 'step': 11195, 'epoch': 2}
{'type': 'loss', 'content': 0.038005296140909195, 'timestamp': '2025-10-02 00:30:54.185814', 'step': 11196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:54.241394', 'step': 11196, 'epoch': 2}
{'type': 'loss', 'content': 0.08577898889780045, 'timestamp': '2025-10-02 00:30:54.251665', 'step': 11197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:54.307384', 'step': 11197, 'epoch': 2}
{'type': 'loss', 'content': 0.05329849570989609, 'timestamp': '2025-10-02 00:30:54.310488', 'step': 11198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:54.369919', 'step': 11198, 'epoch': 2}
{'type': 'loss', 'content': 0.1378076821565628, 'timestamp': '2025-10-02 00:30:54.372463', 'step': 11199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:54.438442', 'step': 11199, 'epoch': 2}
{'type': 'loss', 'content': 0.02808614820241928, 'timestamp': '2025-10-02 00:30:54.455454', 'step': 11200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:54.535218', 'step': 11200, 'epoch': 2}
{'type': 'loss', 'content': 0.11350973695516586, 'timestamp': '2025-10-02 00:30:54.538901', 'step': 11201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:54.604719', 'step': 11201, 'epoch': 2}
{'type': 'loss', 'content': 0.03300505131483078, 'timestamp': '2025-10-02 00:30:54.615728', 'step': 11202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:54.692783', 'step': 11202, 'epoch': 2}
{'type': 'loss', 'content': 0.04885239899158478, 'timestamp': '2025-10-02 00:30:54.696757', 'step': 11203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:54.770495', 'step': 11203, 'epoch': 2}
{'type': 'loss', 'content': 0.044004086405038834, 'timestamp': '2025-10-02 00:30:54.780632', 'step': 11204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:54.836646', 'step': 11204, 'epoch': 2}
{'type': 'loss', 'content': 0.16451242566108704, 'timestamp': '2025-10-02 00:30:54.850035', 'step': 11205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:54.917031', 'step': 11205, 'epoch': 2}
{'type': 'loss', 'content': 0.041899558156728745, 'timestamp': '2025-10-02 00:30:54.919918', 'step': 11206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:54.997749', 'step': 11206, 'epoch': 2}
{'type': 'loss', 'content': 0.09948534518480301, 'timestamp': '2025-10-02 00:30:55.001210', 'step': 11207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:55.064066', 'step': 11207, 'epoch': 2}
{'type': 'loss', 'content': 0.10679614543914795, 'timestamp': '2025-10-02 00:30:55.071998', 'step': 11208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:30:55.143578', 'step': 11208, 'epoch': 2}
{'type': 'loss', 'content': 0.11283650994300842, 'timestamp': '2025-10-02 00:30:55.147008', 'step': 11209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:55.218774', 'step': 11209, 'epoch': 2}
{'type': 'loss', 'content': 0.03390178084373474, 'timestamp': '2025-10-02 00:30:55.228179', 'step': 11210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:55.295292', 'step': 11210, 'epoch': 2}
{'type': 'loss', 'content': 0.08147167414426804, 'timestamp': '2025-10-02 00:30:55.304764', 'step': 11211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:55.373157', 'step': 11211, 'epoch': 2}
{'type': 'loss', 'content': 0.07849905639886856, 'timestamp': '2025-10-02 00:30:55.382761', 'step': 11212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:55.443872', 'step': 11212, 'epoch': 2}
{'type': 'loss', 'content': 0.15774217247962952, 'timestamp': '2025-10-02 00:30:55.447235', 'step': 11213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:55.507079', 'step': 11213, 'epoch': 2}
{'type': 'loss', 'content': 0.05133133381605148, 'timestamp': '2025-10-02 00:30:55.514937', 'step': 11214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:55.579771', 'step': 11214, 'epoch': 2}
{'type': 'loss', 'content': 0.0854538232088089, 'timestamp': '2025-10-02 00:30:55.587540', 'step': 11215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:55.644660', 'step': 11215, 'epoch': 2}
{'type': 'loss', 'content': 0.2125680148601532, 'timestamp': '2025-10-02 00:30:55.654613', 'step': 11216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:55.725598', 'step': 11216, 'epoch': 2}
{'type': 'loss', 'content': 0.13616861402988434, 'timestamp': '2025-10-02 00:30:55.733258', 'step': 11217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:55.801431', 'step': 11217, 'epoch': 2}
{'type': 'loss', 'content': 0.19858451187610626, 'timestamp': '2025-10-02 00:30:55.804707', 'step': 11218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:55.875976', 'step': 11218, 'epoch': 2}
{'type': 'loss', 'content': 0.037725772708654404, 'timestamp': '2025-10-02 00:30:55.879777', 'step': 11219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:55.947714', 'step': 11219, 'epoch': 2}
{'type': 'loss', 'content': 0.07032620161771774, 'timestamp': '2025-10-02 00:30:55.957289', 'step': 11220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:56.022902', 'step': 11220, 'epoch': 2}
{'type': 'loss', 'content': 0.02768205665051937, 'timestamp': '2025-10-02 00:30:56.033000', 'step': 11221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:56.091603', 'step': 11221, 'epoch': 2}
{'type': 'loss', 'content': 0.06813119351863861, 'timestamp': '2025-10-02 00:30:56.094820', 'step': 11222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:56.161872', 'step': 11222, 'epoch': 2}
{'type': 'loss', 'content': 0.015191471204161644, 'timestamp': '2025-10-02 00:30:56.171430', 'step': 11223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:56.243089', 'step': 11223, 'epoch': 2}
{'type': 'loss', 'content': 0.1517096906900406, 'timestamp': '2025-10-02 00:30:56.251043', 'step': 11224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:56.326900', 'step': 11224, 'epoch': 2}
{'type': 'loss', 'content': 0.10820003598928452, 'timestamp': '2025-10-02 00:30:56.333178', 'step': 11225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:56.417500', 'step': 11225, 'epoch': 2}
{'type': 'loss', 'content': 0.03669729828834534, 'timestamp': '2025-10-02 00:30:56.427813', 'step': 11226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:56.497314', 'step': 11226, 'epoch': 2}
{'type': 'loss', 'content': 0.1142924576997757, 'timestamp': '2025-10-02 00:30:56.501718', 'step': 11227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:56.563158', 'step': 11227, 'epoch': 2}
{'type': 'loss', 'content': 0.16906386613845825, 'timestamp': '2025-10-02 00:30:56.576962', 'step': 11228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:56.632634', 'step': 11228, 'epoch': 2}
{'type': 'loss', 'content': 0.17768941819667816, 'timestamp': '2025-10-02 00:30:56.638849', 'step': 11229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:56.710596', 'step': 11229, 'epoch': 2}
{'type': 'loss', 'content': 0.038578011095523834, 'timestamp': '2025-10-02 00:30:56.720828', 'step': 11230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:56.781343', 'step': 11230, 'epoch': 2}
{'type': 'loss', 'content': 0.18787117302417755, 'timestamp': '2025-10-02 00:30:56.783906', 'step': 11231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:56.848391', 'step': 11231, 'epoch': 2}
{'type': 'loss', 'content': 0.059781160205602646, 'timestamp': '2025-10-02 00:30:56.859915', 'step': 11232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:56.921438', 'step': 11232, 'epoch': 2}
{'type': 'loss', 'content': 0.12568777799606323, 'timestamp': '2025-10-02 00:30:56.924542', 'step': 11233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:56.992225', 'step': 11233, 'epoch': 2}
{'type': 'loss', 'content': 0.09400950372219086, 'timestamp': '2025-10-02 00:30:57.001816', 'step': 11234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:57.061595', 'step': 11234, 'epoch': 2}
{'type': 'loss', 'content': 0.04924819990992546, 'timestamp': '2025-10-02 00:30:57.064972', 'step': 11235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:57.130837', 'step': 11235, 'epoch': 2}
{'type': 'loss', 'content': 0.03719441220164299, 'timestamp': '2025-10-02 00:30:57.142305', 'step': 11236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:57.210643', 'step': 11236, 'epoch': 2}
{'type': 'loss', 'content': 0.003808562643826008, 'timestamp': '2025-10-02 00:30:57.218639', 'step': 11237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:57.298156', 'step': 11237, 'epoch': 2}
{'type': 'loss', 'content': 0.03661579266190529, 'timestamp': '2025-10-02 00:30:57.300817', 'step': 11238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:57.369435', 'step': 11238, 'epoch': 2}
{'type': 'loss', 'content': 0.05786272510886192, 'timestamp': '2025-10-02 00:30:57.372057', 'step': 11239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:57.425844', 'step': 11239, 'epoch': 2}
{'type': 'loss', 'content': 0.09059556573629379, 'timestamp': '2025-10-02 00:30:57.431844', 'step': 11240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:57.486672', 'step': 11240, 'epoch': 2}
{'type': 'loss', 'content': 0.021338969469070435, 'timestamp': '2025-10-02 00:30:57.489075', 'step': 11241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:57.544498', 'step': 11241, 'epoch': 2}
{'type': 'loss', 'content': 0.01841823384165764, 'timestamp': '2025-10-02 00:30:57.553861', 'step': 11242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:57.608903', 'step': 11242, 'epoch': 2}
{'type': 'loss', 'content': 0.08416888862848282, 'timestamp': '2025-10-02 00:30:57.615063', 'step': 11243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:57.673029', 'step': 11243, 'epoch': 2}
{'type': 'loss', 'content': 0.1563618928194046, 'timestamp': '2025-10-02 00:30:57.679111', 'step': 11244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:57.737526', 'step': 11244, 'epoch': 2}
{'type': 'loss', 'content': 0.06289313733577728, 'timestamp': '2025-10-02 00:30:57.747309', 'step': 11245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:57.802891', 'step': 11245, 'epoch': 2}
{'type': 'loss', 'content': 0.017320267856121063, 'timestamp': '2025-10-02 00:30:57.805333', 'step': 11246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:57.859876', 'step': 11246, 'epoch': 2}
{'type': 'loss', 'content': 0.061151664704084396, 'timestamp': '2025-10-02 00:30:57.862241', 'step': 11247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:57.919303', 'step': 11247, 'epoch': 2}
{'type': 'loss', 'content': 0.07865091413259506, 'timestamp': '2025-10-02 00:30:57.925229', 'step': 11248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:30:57.978070', 'step': 11248, 'epoch': 2}
{'type': 'loss', 'content': 0.16287341713905334, 'timestamp': '2025-10-02 00:30:57.980830', 'step': 11249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:58.034961', 'step': 11249, 'epoch': 2}
{'type': 'loss', 'content': 0.07410867512226105, 'timestamp': '2025-10-02 00:30:58.037646', 'step': 11250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:58.099089', 'step': 11250, 'epoch': 2}
{'type': 'loss', 'content': 0.174991175532341, 'timestamp': '2025-10-02 00:30:58.101985', 'step': 11251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:30:58.164332', 'step': 11251, 'epoch': 2}
{'type': 'loss', 'content': 0.04196900129318237, 'timestamp': '2025-10-02 00:30:58.175840', 'step': 11252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:58.237301', 'step': 11252, 'epoch': 2}
{'type': 'loss', 'content': 0.01641734316945076, 'timestamp': '2025-10-02 00:30:58.248691', 'step': 11253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:30:58.311906', 'step': 11253, 'epoch': 2}
{'type': 'loss', 'content': 0.011866528540849686, 'timestamp': '2025-10-02 00:30:58.322547', 'step': 11254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:58.378183', 'step': 11254, 'epoch': 2}
{'type': 'loss', 'content': 0.07225288450717926, 'timestamp': '2025-10-02 00:30:58.387511', 'step': 11255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:30:58.443651', 'step': 11255, 'epoch': 2}
{'type': 'loss', 'content': 0.06115148216485977, 'timestamp': '2025-10-02 00:30:58.450027', 'step': 11256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:58.504421', 'step': 11256, 'epoch': 2}
{'type': 'loss', 'content': 0.12815575301647186, 'timestamp': '2025-10-02 00:30:58.518325', 'step': 11257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:58.573590', 'step': 11257, 'epoch': 2}
{'type': 'loss', 'content': 0.08469930291175842, 'timestamp': '2025-10-02 00:30:58.576334', 'step': 11258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:30:58.630235', 'step': 11258, 'epoch': 2}
{'type': 'loss', 'content': 0.08397698402404785, 'timestamp': '2025-10-02 00:30:58.633043', 'step': 11259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:58.687483', 'step': 11259, 'epoch': 2}
{'type': 'loss', 'content': 0.16996438801288605, 'timestamp': '2025-10-02 00:30:58.693682', 'step': 11260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:58.751757', 'step': 11260, 'epoch': 2}
{'type': 'loss', 'content': 0.07160460203886032, 'timestamp': '2025-10-02 00:30:58.755187', 'step': 11261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:58.809527', 'step': 11261, 'epoch': 2}
{'type': 'loss', 'content': 0.1919195055961609, 'timestamp': '2025-10-02 00:30:58.812380', 'step': 11262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:30:58.867674', 'step': 11262, 'epoch': 2}
{'type': 'loss', 'content': 0.025330394506454468, 'timestamp': '2025-10-02 00:30:58.870223', 'step': 11263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:30:58.924962', 'step': 11263, 'epoch': 2}
{'type': 'loss', 'content': 0.09014983475208282, 'timestamp': '2025-10-02 00:30:58.931753', 'step': 11264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:58.986390', 'step': 11264, 'epoch': 2}
{'type': 'loss', 'content': 0.21064364910125732, 'timestamp': '2025-10-02 00:30:58.988498', 'step': 11265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:59.044618', 'step': 11265, 'epoch': 2}
{'type': 'loss', 'content': 0.04338281974196434, 'timestamp': '2025-10-02 00:30:59.052295', 'step': 11266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:30:59.112795', 'step': 11266, 'epoch': 2}
{'type': 'loss', 'content': 0.028899215161800385, 'timestamp': '2025-10-02 00:30:59.122991', 'step': 11267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:59.178090', 'step': 11267, 'epoch': 2}
{'type': 'loss', 'content': 0.3037380874156952, 'timestamp': '2025-10-02 00:30:59.185040', 'step': 11268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:30:59.240003', 'step': 11268, 'epoch': 2}
{'type': 'loss', 'content': 0.14854739606380463, 'timestamp': '2025-10-02 00:30:59.244090', 'step': 11269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:30:59.298892', 'step': 11269, 'epoch': 2}
{'type': 'loss', 'content': 0.058664046227931976, 'timestamp': '2025-10-02 00:30:59.301627', 'step': 11270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:30:59.357210', 'step': 11270, 'epoch': 2}
{'type': 'loss', 'content': 0.10088849812746048, 'timestamp': '2025-10-02 00:30:59.360249', 'step': 11271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:30:59.414998', 'step': 11271, 'epoch': 2}
{'type': 'loss', 'content': 0.13076460361480713, 'timestamp': '2025-10-02 00:30:59.420719', 'step': 11272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:30:59.475911', 'step': 11272, 'epoch': 2}
{'type': 'loss', 'content': 0.02867577224969864, 'timestamp': '2025-10-02 00:30:59.486174', 'step': 11273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:59.542889', 'step': 11273, 'epoch': 2}
{'type': 'loss', 'content': 0.18768206238746643, 'timestamp': '2025-10-02 00:30:59.545318', 'step': 11274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:30:59.610802', 'step': 11274, 'epoch': 2}
{'type': 'loss', 'content': 0.037734437733888626, 'timestamp': '2025-10-02 00:30:59.620117', 'step': 11275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:30:59.676492', 'step': 11275, 'epoch': 2}
{'type': 'loss', 'content': 0.09428627043962479, 'timestamp': '2025-10-02 00:30:59.685147', 'step': 11276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:59.739806', 'step': 11276, 'epoch': 2}
{'type': 'loss', 'content': 0.021729836240410805, 'timestamp': '2025-10-02 00:30:59.742279', 'step': 11277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:30:59.796813', 'step': 11277, 'epoch': 2}
{'type': 'loss', 'content': 0.016411883756518364, 'timestamp': '2025-10-02 00:30:59.799239', 'step': 11278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:59.853980', 'step': 11278, 'epoch': 2}
{'type': 'loss', 'content': 0.14943177998065948, 'timestamp': '2025-10-02 00:30:59.856588', 'step': 11279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:30:59.910606', 'step': 11279, 'epoch': 2}
{'type': 'loss', 'content': 0.17042185366153717, 'timestamp': '2025-10-02 00:30:59.917566', 'step': 11280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:30:59.972391', 'step': 11280, 'epoch': 2}
{'type': 'loss', 'content': 0.04253032058477402, 'timestamp': '2025-10-02 00:30:59.980259', 'step': 11281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:00.034508', 'step': 11281, 'epoch': 2}
{'type': 'loss', 'content': 0.20086443424224854, 'timestamp': '2025-10-02 00:31:00.037393', 'step': 11282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:00.096765', 'step': 11282, 'epoch': 2}
{'type': 'loss', 'content': 0.051954712718725204, 'timestamp': '2025-10-02 00:31:00.106961', 'step': 11283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:00.167479', 'step': 11283, 'epoch': 2}
{'type': 'loss', 'content': 0.00528726726770401, 'timestamp': '2025-10-02 00:31:00.178506', 'step': 11284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:00.231342', 'step': 11284, 'epoch': 2}
{'type': 'loss', 'content': 0.22611309587955475, 'timestamp': '2025-10-02 00:31:00.233870', 'step': 11285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:00.288594', 'step': 11285, 'epoch': 2}
{'type': 'loss', 'content': 0.019434602931141853, 'timestamp': '2025-10-02 00:31:00.294877', 'step': 11286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:00.351723', 'step': 11286, 'epoch': 2}
{'type': 'loss', 'content': 0.13081294298171997, 'timestamp': '2025-10-02 00:31:00.354305', 'step': 11287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:00.409285', 'step': 11287, 'epoch': 2}
{'type': 'loss', 'content': 0.05296137183904648, 'timestamp': '2025-10-02 00:31:00.419430', 'step': 11288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:31:00.490352', 'step': 11288, 'epoch': 2}
{'type': 'loss', 'content': 0.04466792568564415, 'timestamp': '2025-10-02 00:31:00.504147', 'step': 11289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:00.559250', 'step': 11289, 'epoch': 2}
{'type': 'loss', 'content': 0.1505960077047348, 'timestamp': '2025-10-02 00:31:00.561840', 'step': 11290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:00.616614', 'step': 11290, 'epoch': 2}
{'type': 'loss', 'content': 0.09135352820158005, 'timestamp': '2025-10-02 00:31:00.619491', 'step': 11291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:00.673927', 'step': 11291, 'epoch': 2}
{'type': 'loss', 'content': 0.025795741006731987, 'timestamp': '2025-10-02 00:31:00.682488', 'step': 11292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:00.736507', 'step': 11292, 'epoch': 2}
{'type': 'loss', 'content': 0.13077636063098907, 'timestamp': '2025-10-02 00:31:00.739000', 'step': 11293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:00.795237', 'step': 11293, 'epoch': 2}
{'type': 'loss', 'content': 0.0181413684040308, 'timestamp': '2025-10-02 00:31:00.801340', 'step': 11294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:00.856069', 'step': 11294, 'epoch': 2}
{'type': 'loss', 'content': 0.1830706000328064, 'timestamp': '2025-10-02 00:31:00.858428', 'step': 11295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:00.912250', 'step': 11295, 'epoch': 2}
{'type': 'loss', 'content': 0.04307388886809349, 'timestamp': '2025-10-02 00:31:00.919144', 'step': 11296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:00.974271', 'step': 11296, 'epoch': 2}
{'type': 'loss', 'content': 0.12696176767349243, 'timestamp': '2025-10-02 00:31:00.976962', 'step': 11297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:01.030679', 'step': 11297, 'epoch': 2}
{'type': 'loss', 'content': 0.09427863359451294, 'timestamp': '2025-10-02 00:31:01.033111', 'step': 11298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:01.087163', 'step': 11298, 'epoch': 2}
{'type': 'loss', 'content': 0.2725420594215393, 'timestamp': '2025-10-02 00:31:01.089923', 'step': 11299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:01.152186', 'step': 11299, 'epoch': 2}
{'type': 'loss', 'content': 0.08187395334243774, 'timestamp': '2025-10-02 00:31:01.163477', 'step': 11300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:01.218633', 'step': 11300, 'epoch': 2}
{'type': 'loss', 'content': 0.08180036395788193, 'timestamp': '2025-10-02 00:31:01.221222', 'step': 11301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:01.276307', 'step': 11301, 'epoch': 2}
{'type': 'loss', 'content': 0.031480904668569565, 'timestamp': '2025-10-02 00:31:01.284117', 'step': 11302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:01.339280', 'step': 11302, 'epoch': 2}
{'type': 'loss', 'content': 0.27395498752593994, 'timestamp': '2025-10-02 00:31:01.342017', 'step': 11303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:01.396632', 'step': 11303, 'epoch': 2}
{'type': 'loss', 'content': 0.09960222989320755, 'timestamp': '2025-10-02 00:31:01.403018', 'step': 11304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:01.460998', 'step': 11304, 'epoch': 2}
{'type': 'loss', 'content': 0.05693262070417404, 'timestamp': '2025-10-02 00:31:01.471999', 'step': 11305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:01.528135', 'step': 11305, 'epoch': 2}
{'type': 'loss', 'content': 0.20676426589488983, 'timestamp': '2025-10-02 00:31:01.531226', 'step': 11306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:01.588030', 'step': 11306, 'epoch': 2}
{'type': 'loss', 'content': 0.1505063772201538, 'timestamp': '2025-10-02 00:31:01.591520', 'step': 11307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:01.648218', 'step': 11307, 'epoch': 2}
{'type': 'loss', 'content': 0.17708474397659302, 'timestamp': '2025-10-02 00:31:01.654401', 'step': 11308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:01.710013', 'step': 11308, 'epoch': 2}
{'type': 'loss', 'content': 0.18662025034427643, 'timestamp': '2025-10-02 00:31:01.716265', 'step': 11309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:01.772147', 'step': 11309, 'epoch': 2}
{'type': 'loss', 'content': 0.08258303999900818, 'timestamp': '2025-10-02 00:31:01.775101', 'step': 11310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:01.835315', 'step': 11310, 'epoch': 2}
{'type': 'loss', 'content': 0.10038456320762634, 'timestamp': '2025-10-02 00:31:01.837851', 'step': 11311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:01.894041', 'step': 11311, 'epoch': 2}
{'type': 'loss', 'content': 0.14611414074897766, 'timestamp': '2025-10-02 00:31:01.900023', 'step': 11312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:01.955665', 'step': 11312, 'epoch': 2}
{'type': 'loss', 'content': 0.02871943637728691, 'timestamp': '2025-10-02 00:31:01.963575', 'step': 11313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:02.021252', 'step': 11313, 'epoch': 2}
{'type': 'loss', 'content': 0.026423688977956772, 'timestamp': '2025-10-02 00:31:02.030861', 'step': 11314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:02.089847', 'step': 11314, 'epoch': 2}
{'type': 'loss', 'content': 0.021725207567214966, 'timestamp': '2025-10-02 00:31:02.099205', 'step': 11315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:02.154480', 'step': 11315, 'epoch': 2}
{'type': 'loss', 'content': 0.3002373278141022, 'timestamp': '2025-10-02 00:31:02.161611', 'step': 11316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:02.223649', 'step': 11316, 'epoch': 2}
{'type': 'loss', 'content': 0.09880249202251434, 'timestamp': '2025-10-02 00:31:02.226693', 'step': 11317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:02.290392', 'step': 11317, 'epoch': 2}
{'type': 'loss', 'content': 0.03395364433526993, 'timestamp': '2025-10-02 00:31:02.300884', 'step': 11318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:02.357860', 'step': 11318, 'epoch': 2}
{'type': 'loss', 'content': 0.07049254328012466, 'timestamp': '2025-10-02 00:31:02.361408', 'step': 11319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:02.419669', 'step': 11319, 'epoch': 2}
{'type': 'loss', 'content': 0.05171152949333191, 'timestamp': '2025-10-02 00:31:02.426371', 'step': 11320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:02.483644', 'step': 11320, 'epoch': 2}
{'type': 'loss', 'content': 0.1283663511276245, 'timestamp': '2025-10-02 00:31:02.486751', 'step': 11321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:02.544606', 'step': 11321, 'epoch': 2}
{'type': 'loss', 'content': 0.1062009185552597, 'timestamp': '2025-10-02 00:31:02.548124', 'step': 11322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:02.606277', 'step': 11322, 'epoch': 2}
{'type': 'loss', 'content': 0.1857098639011383, 'timestamp': '2025-10-02 00:31:02.611954', 'step': 11323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:02.677700', 'step': 11323, 'epoch': 2}
{'type': 'loss', 'content': 0.05341807007789612, 'timestamp': '2025-10-02 00:31:02.683636', 'step': 11324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:02.748161', 'step': 11324, 'epoch': 2}
{'type': 'loss', 'content': 0.04428659379482269, 'timestamp': '2025-10-02 00:31:02.759494', 'step': 11325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:31:02.817356', 'step': 11325, 'epoch': 2}
{'type': 'loss', 'content': 0.198322132229805, 'timestamp': '2025-10-02 00:31:02.820271', 'step': 11326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:31:02.884344', 'step': 11326, 'epoch': 2}
{'type': 'loss', 'content': 0.12570568919181824, 'timestamp': '2025-10-02 00:31:02.887380', 'step': 11327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:02.943811', 'step': 11327, 'epoch': 2}
{'type': 'loss', 'content': 0.09687173366546631, 'timestamp': '2025-10-02 00:31:02.958745', 'step': 11328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:03.017176', 'step': 11328, 'epoch': 2}
{'type': 'loss', 'content': 0.05031093955039978, 'timestamp': '2025-10-02 00:31:03.024988', 'step': 11329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:03.082707', 'step': 11329, 'epoch': 2}
{'type': 'loss', 'content': 0.10261541604995728, 'timestamp': '2025-10-02 00:31:03.085761', 'step': 11330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:03.142102', 'step': 11330, 'epoch': 2}
{'type': 'loss', 'content': 0.07717885822057724, 'timestamp': '2025-10-02 00:31:03.146528', 'step': 11331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:03.206399', 'step': 11331, 'epoch': 2}
{'type': 'loss', 'content': 0.03737606853246689, 'timestamp': '2025-10-02 00:31:03.212780', 'step': 11332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:03.268186', 'step': 11332, 'epoch': 2}
{'type': 'loss', 'content': 0.16173361241817474, 'timestamp': '2025-10-02 00:31:03.270942', 'step': 11333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:03.329848', 'step': 11333, 'epoch': 2}
{'type': 'loss', 'content': 0.035825133323669434, 'timestamp': '2025-10-02 00:31:03.332774', 'step': 11334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:03.388013', 'step': 11334, 'epoch': 2}
{'type': 'loss', 'content': 0.10928921401500702, 'timestamp': '2025-10-02 00:31:03.391096', 'step': 11335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:03.445941', 'step': 11335, 'epoch': 2}
{'type': 'loss', 'content': 0.05156036838889122, 'timestamp': '2025-10-02 00:31:03.452753', 'step': 11336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:03.507881', 'step': 11336, 'epoch': 2}
{'type': 'loss', 'content': 0.012530466541647911, 'timestamp': '2025-10-02 00:31:03.517596', 'step': 11337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:03.572795', 'step': 11337, 'epoch': 2}
{'type': 'loss', 'content': 0.07449281960725784, 'timestamp': '2025-10-02 00:31:03.576087', 'step': 11338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:03.631440', 'step': 11338, 'epoch': 2}
{'type': 'loss', 'content': 0.05088932067155838, 'timestamp': '2025-10-02 00:31:03.634025', 'step': 11339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:03.688292', 'step': 11339, 'epoch': 2}
{'type': 'loss', 'content': 0.10451614111661911, 'timestamp': '2025-10-02 00:31:03.694263', 'step': 11340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:03.748444', 'step': 11340, 'epoch': 2}
{'type': 'loss', 'content': 0.025326333940029144, 'timestamp': '2025-10-02 00:31:03.751551', 'step': 11341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:03.806828', 'step': 11341, 'epoch': 2}
{'type': 'loss', 'content': 0.1417781263589859, 'timestamp': '2025-10-02 00:31:03.809690', 'step': 11342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:31:03.865243', 'step': 11342, 'epoch': 2}
{'type': 'loss', 'content': 0.04820845648646355, 'timestamp': '2025-10-02 00:31:03.868200', 'step': 11343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:03.924108', 'step': 11343, 'epoch': 2}
{'type': 'loss', 'content': 0.02269456349313259, 'timestamp': '2025-10-02 00:31:03.930285', 'step': 11344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:03.984846', 'step': 11344, 'epoch': 2}
{'type': 'loss', 'content': 0.04961783438920975, 'timestamp': '2025-10-02 00:31:03.988042', 'step': 11345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:04.042193', 'step': 11345, 'epoch': 2}
{'type': 'loss', 'content': 0.23932549357414246, 'timestamp': '2025-10-02 00:31:04.044585', 'step': 11346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:04.100206', 'step': 11346, 'epoch': 2}
{'type': 'loss', 'content': 0.0743275135755539, 'timestamp': '2025-10-02 00:31:04.109583', 'step': 11347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:04.164920', 'step': 11347, 'epoch': 2}
{'type': 'loss', 'content': 0.11926431208848953, 'timestamp': '2025-10-02 00:31:04.171329', 'step': 11348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:04.227344', 'step': 11348, 'epoch': 2}
{'type': 'loss', 'content': 0.04949217289686203, 'timestamp': '2025-10-02 00:31:04.232922', 'step': 11349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:04.290099', 'step': 11349, 'epoch': 2}
{'type': 'loss', 'content': 0.02440010942518711, 'timestamp': '2025-10-02 00:31:04.299621', 'step': 11350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:04.356567', 'step': 11350, 'epoch': 2}
{'type': 'loss', 'content': 0.1139708012342453, 'timestamp': '2025-10-02 00:31:04.359275', 'step': 11351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:04.415485', 'step': 11351, 'epoch': 2}
{'type': 'loss', 'content': 0.04637596011161804, 'timestamp': '2025-10-02 00:31:04.425626', 'step': 11352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:04.480335', 'step': 11352, 'epoch': 2}
{'type': 'loss', 'content': 0.10096996277570724, 'timestamp': '2025-10-02 00:31:04.486446', 'step': 11353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:04.549352', 'step': 11353, 'epoch': 2}
{'type': 'loss', 'content': 0.03291327878832817, 'timestamp': '2025-10-02 00:31:04.559856', 'step': 11354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:04.616066', 'step': 11354, 'epoch': 2}
{'type': 'loss', 'content': 0.0749807059764862, 'timestamp': '2025-10-02 00:31:04.618696', 'step': 11355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:04.675153', 'step': 11355, 'epoch': 2}
{'type': 'loss', 'content': 0.049135852605104446, 'timestamp': '2025-10-02 00:31:04.685065', 'step': 11356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:04.744309', 'step': 11356, 'epoch': 2}
{'type': 'loss', 'content': 0.10367787629365921, 'timestamp': '2025-10-02 00:31:04.755292', 'step': 11357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:04.811772', 'step': 11357, 'epoch': 2}
{'type': 'loss', 'content': 0.022620338946580887, 'timestamp': '2025-10-02 00:31:04.821185', 'step': 11358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:04.876323', 'step': 11358, 'epoch': 2}
{'type': 'loss', 'content': 0.16248705983161926, 'timestamp': '2025-10-02 00:31:04.878684', 'step': 11359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:04.934271', 'step': 11359, 'epoch': 2}
{'type': 'loss', 'content': 0.050182949751615524, 'timestamp': '2025-10-02 00:31:04.941206', 'step': 11360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:04.997412', 'step': 11360, 'epoch': 2}
{'type': 'loss', 'content': 0.11159870028495789, 'timestamp': '2025-10-02 00:31:04.999809', 'step': 11361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:05.054323', 'step': 11361, 'epoch': 2}
{'type': 'loss', 'content': 0.12341208755970001, 'timestamp': '2025-10-02 00:31:05.057757', 'step': 11362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:05.113496', 'step': 11362, 'epoch': 2}
{'type': 'loss', 'content': 0.11003540456295013, 'timestamp': '2025-10-02 00:31:05.122814', 'step': 11363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:05.182949', 'step': 11363, 'epoch': 2}
{'type': 'loss', 'content': 0.035322412848472595, 'timestamp': '2025-10-02 00:31:05.193935', 'step': 11364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:05.248981', 'step': 11364, 'epoch': 2}
{'type': 'loss', 'content': 0.06313861906528473, 'timestamp': '2025-10-02 00:31:05.252363', 'step': 11365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:31:05.310166', 'step': 11365, 'epoch': 2}
{'type': 'loss', 'content': 0.19411587715148926, 'timestamp': '2025-10-02 00:31:05.312673', 'step': 11366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:05.367668', 'step': 11366, 'epoch': 2}
{'type': 'loss', 'content': 0.18586324155330658, 'timestamp': '2025-10-02 00:31:05.370484', 'step': 11367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:05.429674', 'step': 11367, 'epoch': 2}
{'type': 'loss', 'content': 0.05477053299546242, 'timestamp': '2025-10-02 00:31:05.440016', 'step': 11368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:05.494318', 'step': 11368, 'epoch': 2}
{'type': 'loss', 'content': 0.0772155225276947, 'timestamp': '2025-10-02 00:31:05.502138', 'step': 11369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:05.557248', 'step': 11369, 'epoch': 2}
{'type': 'loss', 'content': 0.15679185092449188, 'timestamp': '2025-10-02 00:31:05.560120', 'step': 11370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:05.615289', 'step': 11370, 'epoch': 2}
{'type': 'loss', 'content': 0.08320441842079163, 'timestamp': '2025-10-02 00:31:05.618330', 'step': 11371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:05.675615', 'step': 11371, 'epoch': 2}
{'type': 'loss', 'content': 0.009112231433391571, 'timestamp': '2025-10-02 00:31:05.682330', 'step': 11372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:05.737524', 'step': 11372, 'epoch': 2}
{'type': 'loss', 'content': 0.009991882368922234, 'timestamp': '2025-10-02 00:31:05.747261', 'step': 11373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:05.802411', 'step': 11373, 'epoch': 2}
{'type': 'loss', 'content': 0.13234475255012512, 'timestamp': '2025-10-02 00:31:05.805707', 'step': 11374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:05.867259', 'step': 11374, 'epoch': 2}
{'type': 'loss', 'content': 0.020621851086616516, 'timestamp': '2025-10-02 00:31:05.876832', 'step': 11375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:05.932196', 'step': 11375, 'epoch': 2}
{'type': 'loss', 'content': 0.054135266691446304, 'timestamp': '2025-10-02 00:31:05.938681', 'step': 11376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:05.997250', 'step': 11376, 'epoch': 2}
{'type': 'loss', 'content': 0.014968059957027435, 'timestamp': '2025-10-02 00:31:06.008243', 'step': 11377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:06.063223', 'step': 11377, 'epoch': 2}
{'type': 'loss', 'content': 0.1328887641429901, 'timestamp': '2025-10-02 00:31:06.066461', 'step': 11378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:06.122164', 'step': 11378, 'epoch': 2}
{'type': 'loss', 'content': 0.08037013560533524, 'timestamp': '2025-10-02 00:31:06.124713', 'step': 11379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:06.181410', 'step': 11379, 'epoch': 2}
{'type': 'loss', 'content': 0.22193042933940887, 'timestamp': '2025-10-02 00:31:06.188330', 'step': 11380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:06.242475', 'step': 11380, 'epoch': 2}
{'type': 'loss', 'content': 0.038949307054281235, 'timestamp': '2025-10-02 00:31:06.245544', 'step': 11381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:06.301509', 'step': 11381, 'epoch': 2}
{'type': 'loss', 'content': 0.0550755113363266, 'timestamp': '2025-10-02 00:31:06.303977', 'step': 11382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:06.358597', 'step': 11382, 'epoch': 2}
{'type': 'loss', 'content': 0.07212669402360916, 'timestamp': '2025-10-02 00:31:06.361337', 'step': 11383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:06.416419', 'step': 11383, 'epoch': 2}
{'type': 'loss', 'content': 0.0035960422828793526, 'timestamp': '2025-10-02 00:31:06.422337', 'step': 11384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:06.477350', 'step': 11384, 'epoch': 2}
{'type': 'loss', 'content': 0.09795738756656647, 'timestamp': '2025-10-02 00:31:06.487624', 'step': 11385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:06.543842', 'step': 11385, 'epoch': 2}
{'type': 'loss', 'content': 0.062302250415086746, 'timestamp': '2025-10-02 00:31:06.546485', 'step': 11386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:06.601230', 'step': 11386, 'epoch': 2}
{'type': 'loss', 'content': 0.12912867963314056, 'timestamp': '2025-10-02 00:31:06.603595', 'step': 11387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:06.658152', 'step': 11387, 'epoch': 2}
{'type': 'loss', 'content': 0.05553263798356056, 'timestamp': '2025-10-02 00:31:06.668288', 'step': 11388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:31:06.728993', 'step': 11388, 'epoch': 2}
{'type': 'loss', 'content': 0.024653995409607887, 'timestamp': '2025-10-02 00:31:06.740527', 'step': 11389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:06.794928', 'step': 11389, 'epoch': 2}
{'type': 'loss', 'content': 0.05412556976079941, 'timestamp': '2025-10-02 00:31:06.801318', 'step': 11390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:06.857034', 'step': 11390, 'epoch': 2}
{'type': 'loss', 'content': 0.057600248605012894, 'timestamp': '2025-10-02 00:31:06.864823', 'step': 11391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:06.919052', 'step': 11391, 'epoch': 2}
{'type': 'loss', 'content': 0.09506852179765701, 'timestamp': '2025-10-02 00:31:06.925119', 'step': 11392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:31:06.992019', 'step': 11392, 'epoch': 2}
{'type': 'loss', 'content': 0.014430089853703976, 'timestamp': '2025-10-02 00:31:07.004979', 'step': 11393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:07.061055', 'step': 11393, 'epoch': 2}
{'type': 'loss', 'content': 0.12054131180047989, 'timestamp': '2025-10-02 00:31:07.063627', 'step': 11394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:07.118829', 'step': 11394, 'epoch': 2}
{'type': 'loss', 'content': 0.05972727760672569, 'timestamp': '2025-10-02 00:31:07.124867', 'step': 11395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:07.180064', 'step': 11395, 'epoch': 2}
{'type': 'loss', 'content': 0.014075261540710926, 'timestamp': '2025-10-02 00:31:07.186307', 'step': 11396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:07.240989', 'step': 11396, 'epoch': 2}
{'type': 'loss', 'content': 0.03422045335173607, 'timestamp': '2025-10-02 00:31:07.251203', 'step': 11397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:07.310668', 'step': 11397, 'epoch': 2}
{'type': 'loss', 'content': 0.017650919035077095, 'timestamp': '2025-10-02 00:31:07.320848', 'step': 11398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:07.376377', 'step': 11398, 'epoch': 2}
{'type': 'loss', 'content': 0.04937003180384636, 'timestamp': '2025-10-02 00:31:07.378880', 'step': 11399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:07.433098', 'step': 11399, 'epoch': 2}
{'type': 'loss', 'content': 0.0649639442563057, 'timestamp': '2025-10-02 00:31:07.439531', 'step': 11400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:07.493602', 'step': 11400, 'epoch': 2}
{'type': 'loss', 'content': 0.10367005318403244, 'timestamp': '2025-10-02 00:31:07.497997', 'step': 11401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:31:07.562804', 'step': 11401, 'epoch': 2}
{'type': 'loss', 'content': 0.004240015521645546, 'timestamp': '2025-10-02 00:31:07.573494', 'step': 11402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:07.628181', 'step': 11402, 'epoch': 2}
{'type': 'loss', 'content': 0.12812282145023346, 'timestamp': '2025-10-02 00:31:07.630919', 'step': 11403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:07.685369', 'step': 11403, 'epoch': 2}
{'type': 'loss', 'content': 0.20561029016971588, 'timestamp': '2025-10-02 00:31:07.691853', 'step': 11404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:07.746239', 'step': 11404, 'epoch': 2}
{'type': 'loss', 'content': 0.03600727394223213, 'timestamp': '2025-10-02 00:31:07.752393', 'step': 11405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:07.808391', 'step': 11405, 'epoch': 2}
{'type': 'loss', 'content': 0.044278230518102646, 'timestamp': '2025-10-02 00:31:07.816178', 'step': 11406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:07.871410', 'step': 11406, 'epoch': 2}
{'type': 'loss', 'content': 0.07182882726192474, 'timestamp': '2025-10-02 00:31:07.874025', 'step': 11407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:07.928596', 'step': 11407, 'epoch': 2}
{'type': 'loss', 'content': 0.029476169496774673, 'timestamp': '2025-10-02 00:31:07.934713', 'step': 11408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:07.989287', 'step': 11408, 'epoch': 2}
{'type': 'loss', 'content': 0.045245781540870667, 'timestamp': '2025-10-02 00:31:07.995580', 'step': 11409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:08.049804', 'step': 11409, 'epoch': 2}
{'type': 'loss', 'content': 0.0850287526845932, 'timestamp': '2025-10-02 00:31:08.056081', 'step': 11410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:08.110993', 'step': 11410, 'epoch': 2}
{'type': 'loss', 'content': 0.11476516723632812, 'timestamp': '2025-10-02 00:31:08.114115', 'step': 11411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:08.174481', 'step': 11411, 'epoch': 2}
{'type': 'loss', 'content': 0.038196902722120285, 'timestamp': '2025-10-02 00:31:08.184678', 'step': 11412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:08.239356', 'step': 11412, 'epoch': 2}
{'type': 'loss', 'content': 0.08170189708471298, 'timestamp': '2025-10-02 00:31:08.244548', 'step': 11413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:08.299084', 'step': 11413, 'epoch': 2}
{'type': 'loss', 'content': 0.041769515722990036, 'timestamp': '2025-10-02 00:31:08.308448', 'step': 11414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:31:08.366182', 'step': 11414, 'epoch': 2}
{'type': 'loss', 'content': 0.08916014432907104, 'timestamp': '2025-10-02 00:31:08.369313', 'step': 11415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:08.423351', 'step': 11415, 'epoch': 2}
{'type': 'loss', 'content': 0.0506233386695385, 'timestamp': '2025-10-02 00:31:08.430187', 'step': 11416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:08.484909', 'step': 11416, 'epoch': 2}
{'type': 'loss', 'content': 0.002469840459525585, 'timestamp': '2025-10-02 00:31:08.492741', 'step': 11417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:08.547074', 'step': 11417, 'epoch': 2}
{'type': 'loss', 'content': 0.11867666989564896, 'timestamp': '2025-10-02 00:31:08.550090', 'step': 11418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:08.604786', 'step': 11418, 'epoch': 2}
{'type': 'loss', 'content': 0.15415401756763458, 'timestamp': '2025-10-02 00:31:08.612273', 'step': 11419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:08.681660', 'step': 11419, 'epoch': 2}
{'type': 'loss', 'content': 0.10699034482240677, 'timestamp': '2025-10-02 00:31:08.688777', 'step': 11420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:08.742325', 'step': 11420, 'epoch': 2}
{'type': 'loss', 'content': 0.11763452738523483, 'timestamp': '2025-10-02 00:31:08.744872', 'step': 11421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:08.799925', 'step': 11421, 'epoch': 2}
{'type': 'loss', 'content': 0.13849042356014252, 'timestamp': '2025-10-02 00:31:08.802441', 'step': 11422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:31:08.864773', 'step': 11422, 'epoch': 2}
{'type': 'loss', 'content': 0.028403911739587784, 'timestamp': '2025-10-02 00:31:08.875409', 'step': 11423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:08.930260', 'step': 11423, 'epoch': 2}
{'type': 'loss', 'content': 0.10925016552209854, 'timestamp': '2025-10-02 00:31:08.936318', 'step': 11424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:08.990866', 'step': 11424, 'epoch': 2}
{'type': 'loss', 'content': 0.08593881875276566, 'timestamp': '2025-10-02 00:31:08.998822', 'step': 11425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:09.053707', 'step': 11425, 'epoch': 2}
{'type': 'loss', 'content': 0.051118139177560806, 'timestamp': '2025-10-02 00:31:09.056456', 'step': 11426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:09.111649', 'step': 11426, 'epoch': 2}
{'type': 'loss', 'content': 0.14530755579471588, 'timestamp': '2025-10-02 00:31:09.121185', 'step': 11427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:09.176801', 'step': 11427, 'epoch': 2}
{'type': 'loss', 'content': 0.018317745998501778, 'timestamp': '2025-10-02 00:31:09.183928', 'step': 11428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:09.238759', 'step': 11428, 'epoch': 2}
{'type': 'loss', 'content': 0.010759970173239708, 'timestamp': '2025-10-02 00:31:09.246739', 'step': 11429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:09.301168', 'step': 11429, 'epoch': 2}
{'type': 'loss', 'content': 0.061607398092746735, 'timestamp': '2025-10-02 00:31:09.303694', 'step': 11430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:09.357769', 'step': 11430, 'epoch': 2}
{'type': 'loss', 'content': 0.09750541299581528, 'timestamp': '2025-10-02 00:31:09.360272', 'step': 11431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:09.414492', 'step': 11431, 'epoch': 2}
{'type': 'loss', 'content': 0.012607631273567677, 'timestamp': '2025-10-02 00:31:09.423232', 'step': 11432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:31:09.478363', 'step': 11432, 'epoch': 2}
{'type': 'loss', 'content': 0.08631185442209244, 'timestamp': '2025-10-02 00:31:09.481145', 'step': 11433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:09.535712', 'step': 11433, 'epoch': 2}
{'type': 'loss', 'content': 0.0876171663403511, 'timestamp': '2025-10-02 00:31:09.541945', 'step': 11434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:09.596417', 'step': 11434, 'epoch': 2}
{'type': 'loss', 'content': 0.1350874900817871, 'timestamp': '2025-10-02 00:31:09.598695', 'step': 11435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:09.653209', 'step': 11435, 'epoch': 2}
{'type': 'loss', 'content': 0.188002347946167, 'timestamp': '2025-10-02 00:31:09.659623', 'step': 11436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:09.713419', 'step': 11436, 'epoch': 2}
{'type': 'loss', 'content': 0.04950812831521034, 'timestamp': '2025-10-02 00:31:09.716324', 'step': 11437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:09.776613', 'step': 11437, 'epoch': 2}
{'type': 'loss', 'content': 0.05156458541750908, 'timestamp': '2025-10-02 00:31:09.786791', 'step': 11438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:09.840935', 'step': 11438, 'epoch': 2}
{'type': 'loss', 'content': 0.0870656818151474, 'timestamp': '2025-10-02 00:31:09.844281', 'step': 11439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:09.898159', 'step': 11439, 'epoch': 2}
{'type': 'loss', 'content': 0.15311375260353088, 'timestamp': '2025-10-02 00:31:09.905138', 'step': 11440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:09.959254', 'step': 11440, 'epoch': 2}
{'type': 'loss', 'content': 0.032458070665597916, 'timestamp': '2025-10-02 00:31:09.967225', 'step': 11441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:10.021351', 'step': 11441, 'epoch': 2}
{'type': 'loss', 'content': 0.15996691584587097, 'timestamp': '2025-10-02 00:31:10.025117', 'step': 11442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:10.081126', 'step': 11442, 'epoch': 2}
{'type': 'loss', 'content': 0.23286084830760956, 'timestamp': '2025-10-02 00:31:10.083987', 'step': 11443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:10.138411', 'step': 11443, 'epoch': 2}
{'type': 'loss', 'content': 0.10996299982070923, 'timestamp': '2025-10-02 00:31:10.144755', 'step': 11444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:10.206458', 'step': 11444, 'epoch': 2}
{'type': 'loss', 'content': 0.025358043611049652, 'timestamp': '2025-10-02 00:31:10.217785', 'step': 11445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:10.274777', 'step': 11445, 'epoch': 2}
{'type': 'loss', 'content': 0.11172158271074295, 'timestamp': '2025-10-02 00:31:10.277410', 'step': 11446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:10.334068', 'step': 11446, 'epoch': 2}
{'type': 'loss', 'content': 0.04166281968355179, 'timestamp': '2025-10-02 00:31:10.343419', 'step': 11447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:10.399239', 'step': 11447, 'epoch': 2}
{'type': 'loss', 'content': 0.027888597920536995, 'timestamp': '2025-10-02 00:31:10.405488', 'step': 11448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:10.460058', 'step': 11448, 'epoch': 2}
{'type': 'loss', 'content': 0.00242975284345448, 'timestamp': '2025-10-02 00:31:10.470118', 'step': 11449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:10.525929', 'step': 11449, 'epoch': 2}
{'type': 'loss', 'content': 0.12140651047229767, 'timestamp': '2025-10-02 00:31:10.528544', 'step': 11450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:10.583670', 'step': 11450, 'epoch': 2}
{'type': 'loss', 'content': 0.033134326338768005, 'timestamp': '2025-10-02 00:31:10.586571', 'step': 11451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:10.642127', 'step': 11451, 'epoch': 2}
{'type': 'loss', 'content': 0.017621086910367012, 'timestamp': '2025-10-02 00:31:10.650834', 'step': 11452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:10.740530', 'step': 11452, 'epoch': 2}
{'type': 'loss', 'content': 0.06954998522996902, 'timestamp': '2025-10-02 00:31:10.746885', 'step': 11453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:10.804354', 'step': 11453, 'epoch': 2}
{'type': 'loss', 'content': 0.09222012013196945, 'timestamp': '2025-10-02 00:31:10.808225', 'step': 11454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:10.914537', 'step': 11454, 'epoch': 2}
{'type': 'loss', 'content': 0.07961944490671158, 'timestamp': '2025-10-02 00:31:10.919328', 'step': 11455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:10.978113', 'step': 11455, 'epoch': 2}
{'type': 'loss', 'content': 0.13822558522224426, 'timestamp': '2025-10-02 00:31:10.987717', 'step': 11456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:11.070135', 'step': 11456, 'epoch': 2}
{'type': 'loss', 'content': 0.03233843296766281, 'timestamp': '2025-10-02 00:31:11.081492', 'step': 11457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:11.141383', 'step': 11457, 'epoch': 2}
{'type': 'loss', 'content': 0.06727984547615051, 'timestamp': '2025-10-02 00:31:11.150763', 'step': 11458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:11.239720', 'step': 11458, 'epoch': 2}
{'type': 'loss', 'content': 0.15823359787464142, 'timestamp': '2025-10-02 00:31:11.243979', 'step': 11459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:11.317721', 'step': 11459, 'epoch': 2}
{'type': 'loss', 'content': 0.052928339689970016, 'timestamp': '2025-10-02 00:31:11.336286', 'step': 11460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:31:11.411142', 'step': 11460, 'epoch': 2}
{'type': 'loss', 'content': 0.022712167352437973, 'timestamp': '2025-10-02 00:31:11.429109', 'step': 11461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:11.509223', 'step': 11461, 'epoch': 2}
{'type': 'loss', 'content': 0.09630069881677628, 'timestamp': '2025-10-02 00:31:11.518603', 'step': 11462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:11.601355', 'step': 11462, 'epoch': 2}
{'type': 'loss', 'content': 0.03610270842909813, 'timestamp': '2025-10-02 00:31:11.611804', 'step': 11463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:11.680249', 'step': 11463, 'epoch': 2}
{'type': 'loss', 'content': 0.13987387716770172, 'timestamp': '2025-10-02 00:31:11.701053', 'step': 11464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:11.801843', 'step': 11464, 'epoch': 2}
{'type': 'loss', 'content': 0.047847848385572433, 'timestamp': '2025-10-02 00:31:11.818209', 'step': 11465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:11.885260', 'step': 11465, 'epoch': 2}
{'type': 'loss', 'content': 0.033369649201631546, 'timestamp': '2025-10-02 00:31:11.897765', 'step': 11466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:11.986778', 'step': 11466, 'epoch': 2}
{'type': 'loss', 'content': 0.08440946787595749, 'timestamp': '2025-10-02 00:31:11.990807', 'step': 11467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:12.079322', 'step': 11467, 'epoch': 2}
{'type': 'loss', 'content': 0.08370339870452881, 'timestamp': '2025-10-02 00:31:12.087412', 'step': 11468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:12.165393', 'step': 11468, 'epoch': 2}
{'type': 'loss', 'content': 0.07930772751569748, 'timestamp': '2025-10-02 00:31:12.169597', 'step': 11469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:31:12.247327', 'step': 11469, 'epoch': 2}
{'type': 'loss', 'content': 0.04404363036155701, 'timestamp': '2025-10-02 00:31:12.258002', 'step': 11470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:12.332902', 'step': 11470, 'epoch': 2}
{'type': 'loss', 'content': 0.12548382580280304, 'timestamp': '2025-10-02 00:31:12.339141', 'step': 11471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:12.400074', 'step': 11471, 'epoch': 2}
{'type': 'loss', 'content': 0.011977998539805412, 'timestamp': '2025-10-02 00:31:12.420994', 'step': 11472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:12.479402', 'step': 11472, 'epoch': 2}
{'type': 'loss', 'content': 0.03810872882604599, 'timestamp': '2025-10-02 00:31:12.487101', 'step': 11473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:12.546114', 'step': 11473, 'epoch': 2}
{'type': 'loss', 'content': 0.08184288442134857, 'timestamp': '2025-10-02 00:31:12.563212', 'step': 11474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:12.632046', 'step': 11474, 'epoch': 2}
{'type': 'loss', 'content': 0.027981212362647057, 'timestamp': '2025-10-02 00:31:12.637073', 'step': 11475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:12.713148', 'step': 11475, 'epoch': 2}
{'type': 'loss', 'content': 0.012261634692549706, 'timestamp': '2025-10-02 00:31:12.724443', 'step': 11476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:12.783146', 'step': 11476, 'epoch': 2}
{'type': 'loss', 'content': 0.07156671583652496, 'timestamp': '2025-10-02 00:31:12.789554', 'step': 11477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:12.850465', 'step': 11477, 'epoch': 2}
{'type': 'loss', 'content': 0.058295879513025284, 'timestamp': '2025-10-02 00:31:12.865950', 'step': 11478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:12.953422', 'step': 11478, 'epoch': 2}
{'type': 'loss', 'content': 0.023732073605060577, 'timestamp': '2025-10-02 00:31:12.971108', 'step': 11479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:13.029548', 'step': 11479, 'epoch': 2}
{'type': 'loss', 'content': 0.13166333734989166, 'timestamp': '2025-10-02 00:31:13.036924', 'step': 11480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:13.126016', 'step': 11480, 'epoch': 2}
{'type': 'loss', 'content': 0.023272637277841568, 'timestamp': '2025-10-02 00:31:13.135807', 'step': 11481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:13.205049', 'step': 11481, 'epoch': 2}
{'type': 'loss', 'content': 0.07866312563419342, 'timestamp': '2025-10-02 00:31:13.215685', 'step': 11482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:13.297141', 'step': 11482, 'epoch': 2}
{'type': 'loss', 'content': 0.08855394273996353, 'timestamp': '2025-10-02 00:31:13.307337', 'step': 11483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:13.365835', 'step': 11483, 'epoch': 2}
{'type': 'loss', 'content': 0.021451575681567192, 'timestamp': '2025-10-02 00:31:13.384201', 'step': 11484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:13.460759', 'step': 11484, 'epoch': 2}
{'type': 'loss', 'content': 0.029714487493038177, 'timestamp': '2025-10-02 00:31:13.468788', 'step': 11485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:13.540451', 'step': 11485, 'epoch': 2}
{'type': 'loss', 'content': 0.019785741344094276, 'timestamp': '2025-10-02 00:31:13.545509', 'step': 11486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:13.609150', 'step': 11486, 'epoch': 2}
{'type': 'loss', 'content': 0.06815725564956665, 'timestamp': '2025-10-02 00:31:13.612595', 'step': 11487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:13.687264', 'step': 11487, 'epoch': 2}
{'type': 'loss', 'content': 0.032110534608364105, 'timestamp': '2025-10-02 00:31:13.702585', 'step': 11488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:13.774053', 'step': 11488, 'epoch': 2}
{'type': 'loss', 'content': 0.12204030901193619, 'timestamp': '2025-10-02 00:31:13.778112', 'step': 11489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:31:13.879452', 'step': 11489, 'epoch': 2}
{'type': 'loss', 'content': 0.042775388807058334, 'timestamp': '2025-10-02 00:31:13.893130', 'step': 11490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:13.951536', 'step': 11490, 'epoch': 2}
{'type': 'loss', 'content': 0.014717315323650837, 'timestamp': '2025-10-02 00:31:13.961630', 'step': 11491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:14.038590', 'step': 11491, 'epoch': 2}
{'type': 'loss', 'content': 0.011194482445716858, 'timestamp': '2025-10-02 00:31:14.053906', 'step': 11492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:14.129276', 'step': 11492, 'epoch': 2}
{'type': 'loss', 'content': 0.04645209386944771, 'timestamp': '2025-10-02 00:31:14.135512', 'step': 11493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:14.193595', 'step': 11493, 'epoch': 2}
{'type': 'loss', 'content': 0.11667095124721527, 'timestamp': '2025-10-02 00:31:14.202013', 'step': 11494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:14.284482', 'step': 11494, 'epoch': 2}
{'type': 'loss', 'content': 0.050505876541137695, 'timestamp': '2025-10-02 00:31:14.292931', 'step': 11495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:14.351486', 'step': 11495, 'epoch': 2}
{'type': 'loss', 'content': 0.01949726603925228, 'timestamp': '2025-10-02 00:31:14.361607', 'step': 11496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:14.424909', 'step': 11496, 'epoch': 2}
{'type': 'loss', 'content': 0.029676072299480438, 'timestamp': '2025-10-02 00:31:14.434686', 'step': 11497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:14.514119', 'step': 11497, 'epoch': 2}
{'type': 'loss', 'content': 0.17327424883842468, 'timestamp': '2025-10-02 00:31:14.522534', 'step': 11498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:14.588098', 'step': 11498, 'epoch': 2}
{'type': 'loss', 'content': 0.06551793962717056, 'timestamp': '2025-10-02 00:31:14.592112', 'step': 11499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:14.650024', 'step': 11499, 'epoch': 2}
{'type': 'loss', 'content': 0.14583951234817505, 'timestamp': '2025-10-02 00:31:14.660354', 'step': 11500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 11500', 'timestamp': '2025-10-02 00:31:15.216830', 'step': 11500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:15.285533', 'step': 11500, 'epoch': 2}
{'type': 'loss', 'content': 0.06868347525596619, 'timestamp': '2025-10-02 00:31:15.297244', 'step': 11501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:15.379481', 'step': 11501, 'epoch': 2}
{'type': 'loss', 'content': 0.05086647719144821, 'timestamp': '2025-10-02 00:31:15.389944', 'step': 11502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:15.464875', 'step': 11502, 'epoch': 2}
{'type': 'loss', 'content': 0.17969998717308044, 'timestamp': '2025-10-02 00:31:15.469076', 'step': 11503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:15.538849', 'step': 11503, 'epoch': 2}
{'type': 'loss', 'content': 0.09094437211751938, 'timestamp': '2025-10-02 00:31:15.553688', 'step': 11504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:15.621570', 'step': 11504, 'epoch': 2}
{'type': 'loss', 'content': 0.114613376557827, 'timestamp': '2025-10-02 00:31:15.635244', 'step': 11505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:31:15.720228', 'step': 11505, 'epoch': 2}
{'type': 'loss', 'content': 0.02224772237241268, 'timestamp': '2025-10-02 00:31:15.734408', 'step': 11506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:15.809859', 'step': 11506, 'epoch': 2}
{'type': 'loss', 'content': 0.1391199231147766, 'timestamp': '2025-10-02 00:31:15.821045', 'step': 11507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:15.890140', 'step': 11507, 'epoch': 2}
{'type': 'loss', 'content': 0.017473265528678894, 'timestamp': '2025-10-02 00:31:15.898675', 'step': 11508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:15.968300', 'step': 11508, 'epoch': 2}
{'type': 'loss', 'content': 0.12281636148691177, 'timestamp': '2025-10-02 00:31:15.976048', 'step': 11509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:16.046908', 'step': 11509, 'epoch': 2}
{'type': 'loss', 'content': 0.014614276587963104, 'timestamp': '2025-10-02 00:31:16.057386', 'step': 11510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:31:16.120425', 'step': 11510, 'epoch': 2}
{'type': 'loss', 'content': 0.09116169065237045, 'timestamp': '2025-10-02 00:31:16.124865', 'step': 11511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:16.202327', 'step': 11511, 'epoch': 2}
{'type': 'loss', 'content': 0.0360545888543129, 'timestamp': '2025-10-02 00:31:16.213585', 'step': 11512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:16.289503', 'step': 11512, 'epoch': 2}
{'type': 'loss', 'content': 0.18043524026870728, 'timestamp': '2025-10-02 00:31:16.302374', 'step': 11513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:16.369107', 'step': 11513, 'epoch': 2}
{'type': 'loss', 'content': 0.1465703248977661, 'timestamp': '2025-10-02 00:31:16.380284', 'step': 11514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:16.435776', 'step': 11514, 'epoch': 2}
{'type': 'loss', 'content': 0.029257265850901604, 'timestamp': '2025-10-02 00:31:16.439831', 'step': 11515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:16.497309', 'step': 11515, 'epoch': 2}
{'type': 'loss', 'content': 0.10319510847330093, 'timestamp': '2025-10-02 00:31:16.512333', 'step': 11516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:16.579626', 'step': 11516, 'epoch': 2}
{'type': 'loss', 'content': 0.06930053979158401, 'timestamp': '2025-10-02 00:31:16.585749', 'step': 11517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:16.646750', 'step': 11517, 'epoch': 2}
{'type': 'loss', 'content': 0.06958639621734619, 'timestamp': '2025-10-02 00:31:16.652893', 'step': 11518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:16.717624', 'step': 11518, 'epoch': 2}
{'type': 'loss', 'content': 0.1032472550868988, 'timestamp': '2025-10-02 00:31:16.721539', 'step': 11519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:16.785795', 'step': 11519, 'epoch': 2}
{'type': 'loss', 'content': 0.07181891798973083, 'timestamp': '2025-10-02 00:31:16.795910', 'step': 11520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:16.868617', 'step': 11520, 'epoch': 2}
{'type': 'loss', 'content': 0.03797886520624161, 'timestamp': '2025-10-02 00:31:16.878979', 'step': 11521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:16.935064', 'step': 11521, 'epoch': 2}
{'type': 'loss', 'content': 0.08212120831012726, 'timestamp': '2025-10-02 00:31:16.938588', 'step': 11522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:17.017493', 'step': 11522, 'epoch': 2}
{'type': 'loss', 'content': 0.05986470356583595, 'timestamp': '2025-10-02 00:31:17.027953', 'step': 11523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:17.091466', 'step': 11523, 'epoch': 2}
{'type': 'loss', 'content': 0.054810330271720886, 'timestamp': '2025-10-02 00:31:17.105850', 'step': 11524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:17.162472', 'step': 11524, 'epoch': 2}
{'type': 'loss', 'content': 0.037543490529060364, 'timestamp': '2025-10-02 00:31:17.172811', 'step': 11525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:17.241800', 'step': 11525, 'epoch': 2}
{'type': 'loss', 'content': 0.03896588832139969, 'timestamp': '2025-10-02 00:31:17.244914', 'step': 11526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:17.308489', 'step': 11526, 'epoch': 2}
{'type': 'loss', 'content': 0.08407795429229736, 'timestamp': '2025-10-02 00:31:17.312041', 'step': 11527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:17.373265', 'step': 11527, 'epoch': 2}
{'type': 'loss', 'content': 0.05531831085681915, 'timestamp': '2025-10-02 00:31:17.387007', 'step': 11528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:17.445606', 'step': 11528, 'epoch': 2}
{'type': 'loss', 'content': 0.1307249516248703, 'timestamp': '2025-10-02 00:31:17.455146', 'step': 11529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:17.531997', 'step': 11529, 'epoch': 2}
{'type': 'loss', 'content': 0.04015939682722092, 'timestamp': '2025-10-02 00:31:17.542144', 'step': 11530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:17.605277', 'step': 11530, 'epoch': 2}
{'type': 'loss', 'content': 0.015117506496608257, 'timestamp': '2025-10-02 00:31:17.611440', 'step': 11531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:31:17.681531', 'step': 11531, 'epoch': 2}
{'type': 'loss', 'content': 0.021016519516706467, 'timestamp': '2025-10-02 00:31:17.695113', 'step': 11532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:17.768000', 'step': 11532, 'epoch': 2}
{'type': 'loss', 'content': 0.004932434298098087, 'timestamp': '2025-10-02 00:31:17.778803', 'step': 11533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:17.843907', 'step': 11533, 'epoch': 2}
{'type': 'loss', 'content': 0.06927437335252762, 'timestamp': '2025-10-02 00:31:17.851755', 'step': 11534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:17.917721', 'step': 11534, 'epoch': 2}
{'type': 'loss', 'content': 0.15393535792827606, 'timestamp': '2025-10-02 00:31:17.929962', 'step': 11535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:18.003890', 'step': 11535, 'epoch': 2}
{'type': 'loss', 'content': 0.15345874428749084, 'timestamp': '2025-10-02 00:31:18.010390', 'step': 11536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:18.072025', 'step': 11536, 'epoch': 2}
{'type': 'loss', 'content': 0.08718002587556839, 'timestamp': '2025-10-02 00:31:18.083568', 'step': 11537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:18.158679', 'step': 11537, 'epoch': 2}
{'type': 'loss', 'content': 0.05984911695122719, 'timestamp': '2025-10-02 00:31:18.168047', 'step': 11538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:18.253408', 'step': 11538, 'epoch': 2}
{'type': 'loss', 'content': 0.13957585394382477, 'timestamp': '2025-10-02 00:31:18.258351', 'step': 11539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:18.323725', 'step': 11539, 'epoch': 2}
{'type': 'loss', 'content': 0.03865797817707062, 'timestamp': '2025-10-02 00:31:18.338134', 'step': 11540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:18.403566', 'step': 11540, 'epoch': 2}
{'type': 'loss', 'content': 0.05242553725838661, 'timestamp': '2025-10-02 00:31:18.407084', 'step': 11541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:18.475220', 'step': 11541, 'epoch': 2}
{'type': 'loss', 'content': 0.02152777463197708, 'timestamp': '2025-10-02 00:31:18.483204', 'step': 11542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:18.540868', 'step': 11542, 'epoch': 2}
{'type': 'loss', 'content': 0.07069781422615051, 'timestamp': '2025-10-02 00:31:18.551412', 'step': 11543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:18.611733', 'step': 11543, 'epoch': 2}
{'type': 'loss', 'content': 0.04274303838610649, 'timestamp': '2025-10-02 00:31:18.622287', 'step': 11544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:18.682571', 'step': 11544, 'epoch': 2}
{'type': 'loss', 'content': 0.06141462177038193, 'timestamp': '2025-10-02 00:31:18.686632', 'step': 11545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:18.752947', 'step': 11545, 'epoch': 2}
{'type': 'loss', 'content': 0.10874083638191223, 'timestamp': '2025-10-02 00:31:18.761962', 'step': 11546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:18.829249', 'step': 11546, 'epoch': 2}
{'type': 'loss', 'content': 0.009619032964110374, 'timestamp': '2025-10-02 00:31:18.842451', 'step': 11547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:18.916205', 'step': 11547, 'epoch': 2}
{'type': 'loss', 'content': 0.22184379398822784, 'timestamp': '2025-10-02 00:31:18.934490', 'step': 11548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:19.010037', 'step': 11548, 'epoch': 2}
{'type': 'loss', 'content': 0.06971883028745651, 'timestamp': '2025-10-02 00:31:19.017802', 'step': 11549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:19.097830', 'step': 11549, 'epoch': 2}
{'type': 'loss', 'content': 0.031547315418720245, 'timestamp': '2025-10-02 00:31:19.104233', 'step': 11550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:19.171135', 'step': 11550, 'epoch': 2}
{'type': 'loss', 'content': 0.06666538864374161, 'timestamp': '2025-10-02 00:31:19.181317', 'step': 11551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:19.264010', 'step': 11551, 'epoch': 2}
{'type': 'loss', 'content': 0.09110987186431885, 'timestamp': '2025-10-02 00:31:19.271253', 'step': 11552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:19.332167', 'step': 11552, 'epoch': 2}
{'type': 'loss', 'content': 0.06292235106229782, 'timestamp': '2025-10-02 00:31:19.340160', 'step': 11553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:19.416542', 'step': 11553, 'epoch': 2}
{'type': 'loss', 'content': 0.11772675812244415, 'timestamp': '2025-10-02 00:31:19.427408', 'step': 11554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:19.517781', 'step': 11554, 'epoch': 2}
{'type': 'loss', 'content': 0.050246819853782654, 'timestamp': '2025-10-02 00:31:19.528252', 'step': 11555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:19.589124', 'step': 11555, 'epoch': 2}
{'type': 'loss', 'content': 0.029444411396980286, 'timestamp': '2025-10-02 00:31:19.599440', 'step': 11556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:19.655160', 'step': 11556, 'epoch': 2}
{'type': 'loss', 'content': 0.020525259897112846, 'timestamp': '2025-10-02 00:31:19.658228', 'step': 11557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:19.714252', 'step': 11557, 'epoch': 2}
{'type': 'loss', 'content': 0.0644027441740036, 'timestamp': '2025-10-02 00:31:19.717038', 'step': 11558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:19.780396', 'step': 11558, 'epoch': 2}
{'type': 'loss', 'content': 0.04583102464675903, 'timestamp': '2025-10-02 00:31:19.785578', 'step': 11559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:19.852488', 'step': 11559, 'epoch': 2}
{'type': 'loss', 'content': 0.0608181431889534, 'timestamp': '2025-10-02 00:31:19.863485', 'step': 11560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:19.922417', 'step': 11560, 'epoch': 2}
{'type': 'loss', 'content': 0.17825888097286224, 'timestamp': '2025-10-02 00:31:19.927501', 'step': 11561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:19.989599', 'step': 11561, 'epoch': 2}
{'type': 'loss', 'content': 0.10525217652320862, 'timestamp': '2025-10-02 00:31:19.997688', 'step': 11562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:20.073296', 'step': 11562, 'epoch': 2}
{'type': 'loss', 'content': 0.12999103963375092, 'timestamp': '2025-10-02 00:31:20.083463', 'step': 11563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:20.153248', 'step': 11563, 'epoch': 2}
{'type': 'loss', 'content': 0.040083713829517365, 'timestamp': '2025-10-02 00:31:20.161335', 'step': 11564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:20.220328', 'step': 11564, 'epoch': 2}
{'type': 'loss', 'content': 0.1377129852771759, 'timestamp': '2025-10-02 00:31:20.224122', 'step': 11565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:20.284472', 'step': 11565, 'epoch': 2}
{'type': 'loss', 'content': 0.06394842267036438, 'timestamp': '2025-10-02 00:31:20.289955', 'step': 11566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:20.347953', 'step': 11566, 'epoch': 2}
{'type': 'loss', 'content': 0.06082914024591446, 'timestamp': '2025-10-02 00:31:20.353609', 'step': 11567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:20.429209', 'step': 11567, 'epoch': 2}
{'type': 'loss', 'content': 0.03426015004515648, 'timestamp': '2025-10-02 00:31:20.440454', 'step': 11568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:31:20.522158', 'step': 11568, 'epoch': 2}
{'type': 'loss', 'content': 0.07746714353561401, 'timestamp': '2025-10-02 00:31:20.528562', 'step': 11569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:20.598287', 'step': 11569, 'epoch': 2}
{'type': 'loss', 'content': 0.0522272065281868, 'timestamp': '2025-10-02 00:31:20.604345', 'step': 11570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:20.674358', 'step': 11570, 'epoch': 2}
{'type': 'loss', 'content': 0.0934569463133812, 'timestamp': '2025-10-02 00:31:20.679228', 'step': 11571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:20.739221', 'step': 11571, 'epoch': 2}
{'type': 'loss', 'content': 0.07597674429416656, 'timestamp': '2025-10-02 00:31:20.745905', 'step': 11572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:20.807921', 'step': 11572, 'epoch': 2}
{'type': 'loss', 'content': 0.08704753965139389, 'timestamp': '2025-10-02 00:31:20.815279', 'step': 11573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:20.880198', 'step': 11573, 'epoch': 2}
{'type': 'loss', 'content': 0.14641599357128143, 'timestamp': '2025-10-02 00:31:20.892748', 'step': 11574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:20.951844', 'step': 11574, 'epoch': 2}
{'type': 'loss', 'content': 0.04374867305159569, 'timestamp': '2025-10-02 00:31:20.955826', 'step': 11575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:21.035507', 'step': 11575, 'epoch': 2}
{'type': 'loss', 'content': 0.060330960899591446, 'timestamp': '2025-10-02 00:31:21.044342', 'step': 11576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:21.108492', 'step': 11576, 'epoch': 2}
{'type': 'loss', 'content': 0.10660721361637115, 'timestamp': '2025-10-02 00:31:21.113045', 'step': 11577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:21.179276', 'step': 11577, 'epoch': 2}
{'type': 'loss', 'content': 0.05586167797446251, 'timestamp': '2025-10-02 00:31:21.187209', 'step': 11578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:21.259298', 'step': 11578, 'epoch': 2}
{'type': 'loss', 'content': 0.07551272958517075, 'timestamp': '2025-10-02 00:31:21.263742', 'step': 11579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:21.324897', 'step': 11579, 'epoch': 2}
{'type': 'loss', 'content': 0.054818589240312576, 'timestamp': '2025-10-02 00:31:21.333287', 'step': 11580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:21.395227', 'step': 11580, 'epoch': 2}
{'type': 'loss', 'content': 0.027725230902433395, 'timestamp': '2025-10-02 00:31:21.411801', 'step': 11581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:21.480463', 'step': 11581, 'epoch': 2}
{'type': 'loss', 'content': 0.052534010261297226, 'timestamp': '2025-10-02 00:31:21.486542', 'step': 11582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:21.546891', 'step': 11582, 'epoch': 2}
{'type': 'loss', 'content': 0.04074238985776901, 'timestamp': '2025-10-02 00:31:21.561750', 'step': 11583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:21.620363', 'step': 11583, 'epoch': 2}
{'type': 'loss', 'content': 0.07586587220430374, 'timestamp': '2025-10-02 00:31:21.627028', 'step': 11584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:21.683443', 'step': 11584, 'epoch': 2}
{'type': 'loss', 'content': 0.04481589421629906, 'timestamp': '2025-10-02 00:31:21.693301', 'step': 11585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:21.749495', 'step': 11585, 'epoch': 2}
{'type': 'loss', 'content': 0.14943091571331024, 'timestamp': '2025-10-02 00:31:21.752720', 'step': 11586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:21.823862', 'step': 11586, 'epoch': 2}
{'type': 'loss', 'content': 0.11396121978759766, 'timestamp': '2025-10-02 00:31:21.833416', 'step': 11587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:31:21.906241', 'step': 11587, 'epoch': 2}
{'type': 'loss', 'content': 0.018831538036465645, 'timestamp': '2025-10-02 00:31:21.919492', 'step': 11588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:21.990853', 'step': 11588, 'epoch': 2}
{'type': 'loss', 'content': 0.08879595249891281, 'timestamp': '2025-10-02 00:31:22.002236', 'step': 11589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:22.060381', 'step': 11589, 'epoch': 2}
{'type': 'loss', 'content': 0.0784425288438797, 'timestamp': '2025-10-02 00:31:22.063959', 'step': 11590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:22.131702', 'step': 11590, 'epoch': 2}
{'type': 'loss', 'content': 0.04705626517534256, 'timestamp': '2025-10-02 00:31:22.139568', 'step': 11591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:22.206239', 'step': 11591, 'epoch': 2}
{'type': 'loss', 'content': 0.08524181693792343, 'timestamp': '2025-10-02 00:31:22.212898', 'step': 11592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:22.268172', 'step': 11592, 'epoch': 2}
{'type': 'loss', 'content': 0.17097856104373932, 'timestamp': '2025-10-02 00:31:22.271385', 'step': 11593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:22.337222', 'step': 11593, 'epoch': 2}
{'type': 'loss', 'content': 0.06274190545082092, 'timestamp': '2025-10-02 00:31:22.345107', 'step': 11594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:22.408293', 'step': 11594, 'epoch': 2}
{'type': 'loss', 'content': 0.07524159550666809, 'timestamp': '2025-10-02 00:31:22.417820', 'step': 11595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:22.474478', 'step': 11595, 'epoch': 2}
{'type': 'loss', 'content': 0.028869669884443283, 'timestamp': '2025-10-02 00:31:22.482763', 'step': 11596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:22.550579', 'step': 11596, 'epoch': 2}
{'type': 'loss', 'content': 0.026288295164704323, 'timestamp': '2025-10-02 00:31:22.560827', 'step': 11597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:22.623157', 'step': 11597, 'epoch': 2}
{'type': 'loss', 'content': 0.14844365417957306, 'timestamp': '2025-10-02 00:31:22.626488', 'step': 11598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:22.687024', 'step': 11598, 'epoch': 2}
{'type': 'loss', 'content': 0.05753707140684128, 'timestamp': '2025-10-02 00:31:22.695036', 'step': 11599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:22.753394', 'step': 11599, 'epoch': 2}
{'type': 'loss', 'content': 0.050614871084690094, 'timestamp': '2025-10-02 00:31:22.764543', 'step': 11600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:22.820377', 'step': 11600, 'epoch': 2}
{'type': 'loss', 'content': 0.13592654466629028, 'timestamp': '2025-10-02 00:31:22.823355', 'step': 11601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:22.880372', 'step': 11601, 'epoch': 2}
{'type': 'loss', 'content': 0.06831970065832138, 'timestamp': '2025-10-02 00:31:22.889309', 'step': 11602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:22.946046', 'step': 11602, 'epoch': 2}
{'type': 'loss', 'content': 0.11671094596385956, 'timestamp': '2025-10-02 00:31:22.949267', 'step': 11603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:23.009655', 'step': 11603, 'epoch': 2}
{'type': 'loss', 'content': 0.04933737963438034, 'timestamp': '2025-10-02 00:31:23.016905', 'step': 11604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:23.074877', 'step': 11604, 'epoch': 2}
{'type': 'loss', 'content': 0.10361427813768387, 'timestamp': '2025-10-02 00:31:23.084922', 'step': 11605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:23.146319', 'step': 11605, 'epoch': 2}
{'type': 'loss', 'content': 0.06239921599626541, 'timestamp': '2025-10-02 00:31:23.154212', 'step': 11606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:23.218537', 'step': 11606, 'epoch': 2}
{'type': 'loss', 'content': 0.056114714592695236, 'timestamp': '2025-10-02 00:31:23.221686', 'step': 11607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:23.280391', 'step': 11607, 'epoch': 2}
{'type': 'loss', 'content': 0.04206793010234833, 'timestamp': '2025-10-02 00:31:23.290743', 'step': 11608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:23.363652', 'step': 11608, 'epoch': 2}
{'type': 'loss', 'content': 0.05184070020914078, 'timestamp': '2025-10-02 00:31:23.369943', 'step': 11609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:23.428168', 'step': 11609, 'epoch': 2}
{'type': 'loss', 'content': 0.06130022183060646, 'timestamp': '2025-10-02 00:31:23.436050', 'step': 11610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:31:23.498407', 'step': 11610, 'epoch': 2}
{'type': 'loss', 'content': 0.1436089277267456, 'timestamp': '2025-10-02 00:31:23.507959', 'step': 11611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:23.565377', 'step': 11611, 'epoch': 2}
{'type': 'loss', 'content': 0.07865753769874573, 'timestamp': '2025-10-02 00:31:23.575741', 'step': 11612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:23.630627', 'step': 11612, 'epoch': 2}
{'type': 'loss', 'content': 0.11976888030767441, 'timestamp': '2025-10-02 00:31:23.634158', 'step': 11613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:23.698928', 'step': 11613, 'epoch': 2}
{'type': 'loss', 'content': 0.055313266813755035, 'timestamp': '2025-10-02 00:31:23.706824', 'step': 11614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:23.761087', 'step': 11614, 'epoch': 2}
{'type': 'loss', 'content': 0.04496131092309952, 'timestamp': '2025-10-02 00:31:23.768756', 'step': 11615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:23.824113', 'step': 11615, 'epoch': 2}
{'type': 'loss', 'content': 0.054129693657159805, 'timestamp': '2025-10-02 00:31:23.831128', 'step': 11616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:23.892877', 'step': 11616, 'epoch': 2}
{'type': 'loss', 'content': 0.07490881532430649, 'timestamp': '2025-10-02 00:31:23.895712', 'step': 11617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:23.949086', 'step': 11617, 'epoch': 2}
{'type': 'loss', 'content': 0.0656135305762291, 'timestamp': '2025-10-02 00:31:23.956948', 'step': 11618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:24.013241', 'step': 11618, 'epoch': 2}
{'type': 'loss', 'content': 0.03671411797404289, 'timestamp': '2025-10-02 00:31:24.022610', 'step': 11619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:24.077585', 'step': 11619, 'epoch': 2}
{'type': 'loss', 'content': 0.04816485941410065, 'timestamp': '2025-10-02 00:31:24.084383', 'step': 11620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:24.138367', 'step': 11620, 'epoch': 2}
{'type': 'loss', 'content': 0.06483094394207001, 'timestamp': '2025-10-02 00:31:24.141073', 'step': 11621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:24.196927', 'step': 11621, 'epoch': 2}
{'type': 'loss', 'content': 0.031277820467948914, 'timestamp': '2025-10-02 00:31:24.206520', 'step': 11622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:24.261112', 'step': 11622, 'epoch': 2}
{'type': 'loss', 'content': 0.027364112436771393, 'timestamp': '2025-10-02 00:31:24.263818', 'step': 11623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:24.318488', 'step': 11623, 'epoch': 2}
{'type': 'loss', 'content': 0.08818546682596207, 'timestamp': '2025-10-02 00:31:24.328598', 'step': 11624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:24.383817', 'step': 11624, 'epoch': 2}
{'type': 'loss', 'content': 0.07740873843431473, 'timestamp': '2025-10-02 00:31:24.387172', 'step': 11625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:24.441649', 'step': 11625, 'epoch': 2}
{'type': 'loss', 'content': 0.1001567617058754, 'timestamp': '2025-10-02 00:31:24.444419', 'step': 11626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:24.498644', 'step': 11626, 'epoch': 2}
{'type': 'loss', 'content': 0.07697183638811111, 'timestamp': '2025-10-02 00:31:24.504881', 'step': 11627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:24.560021', 'step': 11627, 'epoch': 2}
{'type': 'loss', 'content': 0.05749325826764107, 'timestamp': '2025-10-02 00:31:24.566201', 'step': 11628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:24.621242', 'step': 11628, 'epoch': 2}
{'type': 'loss', 'content': 0.08561622351408005, 'timestamp': '2025-10-02 00:31:24.623809', 'step': 11629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:24.679774', 'step': 11629, 'epoch': 2}
{'type': 'loss', 'content': 0.04890279099345207, 'timestamp': '2025-10-02 00:31:24.689172', 'step': 11630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:24.742845', 'step': 11630, 'epoch': 2}
{'type': 'loss', 'content': 0.17736180126667023, 'timestamp': '2025-10-02 00:31:24.745675', 'step': 11631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:24.799899', 'step': 11631, 'epoch': 2}
{'type': 'loss', 'content': 0.08555822819471359, 'timestamp': '2025-10-02 00:31:24.806232', 'step': 11632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:24.860024', 'step': 11632, 'epoch': 2}
{'type': 'loss', 'content': 0.04436100646853447, 'timestamp': '2025-10-02 00:31:24.862655', 'step': 11633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:24.917649', 'step': 11633, 'epoch': 2}
{'type': 'loss', 'content': 0.07310959696769714, 'timestamp': '2025-10-02 00:31:24.920182', 'step': 11634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:24.974348', 'step': 11634, 'epoch': 2}
{'type': 'loss', 'content': 0.04722354933619499, 'timestamp': '2025-10-02 00:31:24.977079', 'step': 11635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:25.039289', 'step': 11635, 'epoch': 2}
{'type': 'loss', 'content': 0.06876765191555023, 'timestamp': '2025-10-02 00:31:25.050513', 'step': 11636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:25.105202', 'step': 11636, 'epoch': 2}
{'type': 'loss', 'content': 0.1369314193725586, 'timestamp': '2025-10-02 00:31:25.109003', 'step': 11637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:25.163731', 'step': 11637, 'epoch': 2}
{'type': 'loss', 'content': 0.07578843832015991, 'timestamp': '2025-10-02 00:31:25.166473', 'step': 11638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:25.220680', 'step': 11638, 'epoch': 2}
{'type': 'loss', 'content': 0.04678303003311157, 'timestamp': '2025-10-02 00:31:25.223423', 'step': 11639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:25.277499', 'step': 11639, 'epoch': 2}
{'type': 'loss', 'content': 0.07950400561094284, 'timestamp': '2025-10-02 00:31:25.283758', 'step': 11640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:25.338560', 'step': 11640, 'epoch': 2}
{'type': 'loss', 'content': 0.09103266894817352, 'timestamp': '2025-10-02 00:31:25.342013', 'step': 11641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:25.396483', 'step': 11641, 'epoch': 2}
{'type': 'loss', 'content': 0.06471433490514755, 'timestamp': '2025-10-02 00:31:25.398911', 'step': 11642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:25.456090', 'step': 11642, 'epoch': 2}
{'type': 'loss', 'content': 0.058849480003118515, 'timestamp': '2025-10-02 00:31:25.465629', 'step': 11643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:25.520449', 'step': 11643, 'epoch': 2}
{'type': 'loss', 'content': 0.05650290101766586, 'timestamp': '2025-10-02 00:31:25.529022', 'step': 11644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:25.584023', 'step': 11644, 'epoch': 2}
{'type': 'loss', 'content': 0.016582289710640907, 'timestamp': '2025-10-02 00:31:25.590284', 'step': 11645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:25.645886', 'step': 11645, 'epoch': 2}
{'type': 'loss', 'content': 0.020807316526770592, 'timestamp': '2025-10-02 00:31:25.648801', 'step': 11646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:25.703192', 'step': 11646, 'epoch': 2}
{'type': 'loss', 'content': 0.045313503593206406, 'timestamp': '2025-10-02 00:31:25.709343', 'step': 11647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:25.764180', 'step': 11647, 'epoch': 2}
{'type': 'loss', 'content': 0.040263477712869644, 'timestamp': '2025-10-02 00:31:25.772563', 'step': 11648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:25.849742', 'step': 11648, 'epoch': 2}
{'type': 'loss', 'content': 0.07941101491451263, 'timestamp': '2025-10-02 00:31:25.852177', 'step': 11649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:25.906424', 'step': 11649, 'epoch': 2}
{'type': 'loss', 'content': 0.05161161348223686, 'timestamp': '2025-10-02 00:31:25.909949', 'step': 11650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:25.967860', 'step': 11650, 'epoch': 2}
{'type': 'loss', 'content': 0.047973256558179855, 'timestamp': '2025-10-02 00:31:25.972149', 'step': 11651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:26.029483', 'step': 11651, 'epoch': 2}
{'type': 'loss', 'content': 0.05714498832821846, 'timestamp': '2025-10-02 00:31:26.035490', 'step': 11652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:26.091386', 'step': 11652, 'epoch': 2}
{'type': 'loss', 'content': 0.04463885724544525, 'timestamp': '2025-10-02 00:31:26.094782', 'step': 11653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:26.151640', 'step': 11653, 'epoch': 2}
{'type': 'loss', 'content': 0.11601965874433517, 'timestamp': '2025-10-02 00:31:26.154312', 'step': 11654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:26.210592', 'step': 11654, 'epoch': 2}
{'type': 'loss', 'content': 0.21655544638633728, 'timestamp': '2025-10-02 00:31:26.214041', 'step': 11655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:26.270596', 'step': 11655, 'epoch': 2}
{'type': 'loss', 'content': 0.10837385058403015, 'timestamp': '2025-10-02 00:31:26.277405', 'step': 11656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:26.331162', 'step': 11656, 'epoch': 2}
{'type': 'loss', 'content': 0.018727635964751244, 'timestamp': '2025-10-02 00:31:26.333527', 'step': 11657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:31:26.404683', 'step': 11657, 'epoch': 2}
{'type': 'loss', 'content': 0.027890056371688843, 'timestamp': '2025-10-02 00:31:26.416639', 'step': 11658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:26.474708', 'step': 11658, 'epoch': 2}
{'type': 'loss', 'content': 0.052576396614313126, 'timestamp': '2025-10-02 00:31:26.484077', 'step': 11659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:26.540933', 'step': 11659, 'epoch': 2}
{'type': 'loss', 'content': 0.09964340925216675, 'timestamp': '2025-10-02 00:31:26.547841', 'step': 11660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:26.603255', 'step': 11660, 'epoch': 2}
{'type': 'loss', 'content': 0.1573808193206787, 'timestamp': '2025-10-02 00:31:26.606338', 'step': 11661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:26.667167', 'step': 11661, 'epoch': 2}
{'type': 'loss', 'content': 0.07745376974344254, 'timestamp': '2025-10-02 00:31:26.670679', 'step': 11662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:26.725471', 'step': 11662, 'epoch': 2}
{'type': 'loss', 'content': 0.04231743887066841, 'timestamp': '2025-10-02 00:31:26.731780', 'step': 11663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:31:26.805573', 'step': 11663, 'epoch': 2}
{'type': 'loss', 'content': 0.017984135076403618, 'timestamp': '2025-10-02 00:31:26.818962', 'step': 11664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:26.874491', 'step': 11664, 'epoch': 2}
{'type': 'loss', 'content': 0.08798675239086151, 'timestamp': '2025-10-02 00:31:26.882322', 'step': 11665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:26.937896', 'step': 11665, 'epoch': 2}
{'type': 'loss', 'content': 0.10287868976593018, 'timestamp': '2025-10-02 00:31:26.940397', 'step': 11666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:26.996113', 'step': 11666, 'epoch': 2}
{'type': 'loss', 'content': 0.1695563644170761, 'timestamp': '2025-10-02 00:31:26.999470', 'step': 11667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:27.056585', 'step': 11667, 'epoch': 2}
{'type': 'loss', 'content': 0.04340843856334686, 'timestamp': '2025-10-02 00:31:27.065178', 'step': 11668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:27.120895', 'step': 11668, 'epoch': 2}
{'type': 'loss', 'content': 0.07752031832933426, 'timestamp': '2025-10-02 00:31:27.128783', 'step': 11669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:27.186205', 'step': 11669, 'epoch': 2}
{'type': 'loss', 'content': 0.17806018888950348, 'timestamp': '2025-10-02 00:31:27.193841', 'step': 11670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:27.249893', 'step': 11670, 'epoch': 2}
{'type': 'loss', 'content': 0.055182017385959625, 'timestamp': '2025-10-02 00:31:27.253731', 'step': 11671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:27.309600', 'step': 11671, 'epoch': 2}
{'type': 'loss', 'content': 0.14704999327659607, 'timestamp': '2025-10-02 00:31:27.317157', 'step': 11672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:31:27.380363', 'step': 11672, 'epoch': 2}
{'type': 'loss', 'content': 0.026789706200361252, 'timestamp': '2025-10-02 00:31:27.392189', 'step': 11673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:27.447335', 'step': 11673, 'epoch': 2}
{'type': 'loss', 'content': 0.0216099563986063, 'timestamp': '2025-10-02 00:31:27.455460', 'step': 11674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:27.512142', 'step': 11674, 'epoch': 2}
{'type': 'loss', 'content': 0.03431599959731102, 'timestamp': '2025-10-02 00:31:27.515446', 'step': 11675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:27.572264', 'step': 11675, 'epoch': 2}
{'type': 'loss', 'content': 0.08361871540546417, 'timestamp': '2025-10-02 00:31:27.579179', 'step': 11676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:27.634693', 'step': 11676, 'epoch': 2}
{'type': 'loss', 'content': 0.08628340810537338, 'timestamp': '2025-10-02 00:31:27.637302', 'step': 11677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:27.695979', 'step': 11677, 'epoch': 2}
{'type': 'loss', 'content': 0.06020911782979965, 'timestamp': '2025-10-02 00:31:27.699055', 'step': 11678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:27.757788', 'step': 11678, 'epoch': 2}
{'type': 'loss', 'content': 0.046428877860307693, 'timestamp': '2025-10-02 00:31:27.767382', 'step': 11679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:27.821963', 'step': 11679, 'epoch': 2}
{'type': 'loss', 'content': 0.16447176039218903, 'timestamp': '2025-10-02 00:31:27.827961', 'step': 11680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:27.882253', 'step': 11680, 'epoch': 2}
{'type': 'loss', 'content': 0.08504549413919449, 'timestamp': '2025-10-02 00:31:27.885237', 'step': 11681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:27.946853', 'step': 11681, 'epoch': 2}
{'type': 'loss', 'content': 0.056034330278635025, 'timestamp': '2025-10-02 00:31:27.957357', 'step': 11682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:28.012160', 'step': 11682, 'epoch': 2}
{'type': 'loss', 'content': 0.14192301034927368, 'timestamp': '2025-10-02 00:31:28.014913', 'step': 11683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:28.068881', 'step': 11683, 'epoch': 2}
{'type': 'loss', 'content': 0.11859258264303207, 'timestamp': '2025-10-02 00:31:28.074842', 'step': 11684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:28.128630', 'step': 11684, 'epoch': 2}
{'type': 'loss', 'content': 0.04313911125063896, 'timestamp': '2025-10-02 00:31:28.131594', 'step': 11685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:28.185981', 'step': 11685, 'epoch': 2}
{'type': 'loss', 'content': 0.11260427534580231, 'timestamp': '2025-10-02 00:31:28.189067', 'step': 11686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:28.243258', 'step': 11686, 'epoch': 2}
{'type': 'loss', 'content': 0.11773849278688431, 'timestamp': '2025-10-02 00:31:28.246284', 'step': 11687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:28.301042', 'step': 11687, 'epoch': 2}
{'type': 'loss', 'content': 0.2508246898651123, 'timestamp': '2025-10-02 00:31:28.307589', 'step': 11688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:28.361724', 'step': 11688, 'epoch': 2}
{'type': 'loss', 'content': 0.07172661274671555, 'timestamp': '2025-10-02 00:31:28.372000', 'step': 11689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:28.426644', 'step': 11689, 'epoch': 2}
{'type': 'loss', 'content': 0.02767324447631836, 'timestamp': '2025-10-02 00:31:28.436002', 'step': 11690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:28.490390', 'step': 11690, 'epoch': 2}
{'type': 'loss', 'content': 0.10356231778860092, 'timestamp': '2025-10-02 00:31:28.493067', 'step': 11691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:28.547165', 'step': 11691, 'epoch': 2}
{'type': 'loss', 'content': 0.3717203140258789, 'timestamp': '2025-10-02 00:31:28.553238', 'step': 11692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:28.608047', 'step': 11692, 'epoch': 2}
{'type': 'loss', 'content': 0.08296111226081848, 'timestamp': '2025-10-02 00:31:28.610885', 'step': 11693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:28.665825', 'step': 11693, 'epoch': 2}
{'type': 'loss', 'content': 0.14212243258953094, 'timestamp': '2025-10-02 00:31:28.669017', 'step': 11694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:28.723433', 'step': 11694, 'epoch': 2}
{'type': 'loss', 'content': 0.04080531746149063, 'timestamp': '2025-10-02 00:31:28.725685', 'step': 11695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:28.779843', 'step': 11695, 'epoch': 2}
{'type': 'loss', 'content': 0.07063030451536179, 'timestamp': '2025-10-02 00:31:28.785683', 'step': 11696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:28.839690', 'step': 11696, 'epoch': 2}
{'type': 'loss', 'content': 0.02977042831480503, 'timestamp': '2025-10-02 00:31:28.842388', 'step': 11697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:28.898236', 'step': 11697, 'epoch': 2}
{'type': 'loss', 'content': 0.008700387552380562, 'timestamp': '2025-10-02 00:31:28.907626', 'step': 11698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:28.962157', 'step': 11698, 'epoch': 2}
{'type': 'loss', 'content': 0.11239238828420639, 'timestamp': '2025-10-02 00:31:28.969916', 'step': 11699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:29.025235', 'step': 11699, 'epoch': 2}
{'type': 'loss', 'content': 0.1013011634349823, 'timestamp': '2025-10-02 00:31:29.031272', 'step': 11700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:29.084686', 'step': 11700, 'epoch': 2}
{'type': 'loss', 'content': 0.19312863051891327, 'timestamp': '2025-10-02 00:31:29.087646', 'step': 11701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:31:29.151248', 'step': 11701, 'epoch': 2}
{'type': 'loss', 'content': 0.05767140910029411, 'timestamp': '2025-10-02 00:31:29.161946', 'step': 11702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:29.216571', 'step': 11702, 'epoch': 2}
{'type': 'loss', 'content': 0.07846313714981079, 'timestamp': '2025-10-02 00:31:29.219544', 'step': 11703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:29.273827', 'step': 11703, 'epoch': 2}
{'type': 'loss', 'content': 0.14909027516841888, 'timestamp': '2025-10-02 00:31:29.282551', 'step': 11704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:29.337118', 'step': 11704, 'epoch': 2}
{'type': 'loss', 'content': 0.08206726610660553, 'timestamp': '2025-10-02 00:31:29.340079', 'step': 11705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:29.395202', 'step': 11705, 'epoch': 2}
{'type': 'loss', 'content': 0.035105638206005096, 'timestamp': '2025-10-02 00:31:29.402999', 'step': 11706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:29.458089', 'step': 11706, 'epoch': 2}
{'type': 'loss', 'content': 0.035543292760849, 'timestamp': '2025-10-02 00:31:29.464031', 'step': 11707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:29.518811', 'step': 11707, 'epoch': 2}
{'type': 'loss', 'content': 0.0591256320476532, 'timestamp': '2025-10-02 00:31:29.527445', 'step': 11708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:29.582074', 'step': 11708, 'epoch': 2}
{'type': 'loss', 'content': 0.17728184163570404, 'timestamp': '2025-10-02 00:31:29.584828', 'step': 11709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:29.639012', 'step': 11709, 'epoch': 2}
{'type': 'loss', 'content': 0.11915598064661026, 'timestamp': '2025-10-02 00:31:29.642020', 'step': 11710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:29.704516', 'step': 11710, 'epoch': 2}
{'type': 'loss', 'content': 0.02908404916524887, 'timestamp': '2025-10-02 00:31:29.715060', 'step': 11711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:29.770907', 'step': 11711, 'epoch': 2}
{'type': 'loss', 'content': 0.09738615155220032, 'timestamp': '2025-10-02 00:31:29.777980', 'step': 11712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:31:29.831316', 'step': 11712, 'epoch': 2}
{'type': 'loss', 'content': 0.09335792809724808, 'timestamp': '2025-10-02 00:31:29.834623', 'step': 11713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:29.889759', 'step': 11713, 'epoch': 2}
{'type': 'loss', 'content': 0.04337858408689499, 'timestamp': '2025-10-02 00:31:29.892553', 'step': 11714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:31:29.967681', 'step': 11714, 'epoch': 2}
{'type': 'loss', 'content': 0.03486338257789612, 'timestamp': '2025-10-02 00:31:29.980914', 'step': 11715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:30.035268', 'step': 11715, 'epoch': 2}
{'type': 'loss', 'content': 0.08940267562866211, 'timestamp': '2025-10-02 00:31:30.041590', 'step': 11716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:30.096885', 'step': 11716, 'epoch': 2}
{'type': 'loss', 'content': 0.03998973220586777, 'timestamp': '2025-10-02 00:31:30.099671', 'step': 11717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:30.155847', 'step': 11717, 'epoch': 2}
{'type': 'loss', 'content': 0.11932030320167542, 'timestamp': '2025-10-02 00:31:30.159388', 'step': 11718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:30.214442', 'step': 11718, 'epoch': 2}
{'type': 'loss', 'content': 0.05689500644803047, 'timestamp': '2025-10-02 00:31:30.217914', 'step': 11719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:30.274766', 'step': 11719, 'epoch': 2}
{'type': 'loss', 'content': 0.041732605546712875, 'timestamp': '2025-10-02 00:31:30.284890', 'step': 11720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:30.340789', 'step': 11720, 'epoch': 2}
{'type': 'loss', 'content': 0.0164957195520401, 'timestamp': '2025-10-02 00:31:30.350724', 'step': 11721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:30.438507', 'step': 11721, 'epoch': 2}
{'type': 'loss', 'content': 0.004043626133352518, 'timestamp': '2025-10-02 00:31:30.459848', 'step': 11722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:30.555296', 'step': 11722, 'epoch': 2}
{'type': 'loss', 'content': 0.08603642880916595, 'timestamp': '2025-10-02 00:31:30.560875', 'step': 11723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:30.633380', 'step': 11723, 'epoch': 2}
{'type': 'loss', 'content': 0.11520560830831528, 'timestamp': '2025-10-02 00:31:30.643551', 'step': 11724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:30.721538', 'step': 11724, 'epoch': 2}
{'type': 'loss', 'content': 0.03230888769030571, 'timestamp': '2025-10-02 00:31:30.728858', 'step': 11725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:30.788683', 'step': 11725, 'epoch': 2}
{'type': 'loss', 'content': 0.171335369348526, 'timestamp': '2025-10-02 00:31:30.809668', 'step': 11726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:30.884307', 'step': 11726, 'epoch': 2}
{'type': 'loss', 'content': 0.10070764273405075, 'timestamp': '2025-10-02 00:31:30.888834', 'step': 11727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:30.949200', 'step': 11727, 'epoch': 2}
{'type': 'loss', 'content': 0.07222531735897064, 'timestamp': '2025-10-02 00:31:30.967839', 'step': 11728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:31.041341', 'step': 11728, 'epoch': 2}
{'type': 'loss', 'content': 0.08095923066139221, 'timestamp': '2025-10-02 00:31:31.045978', 'step': 11729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:31.117414', 'step': 11729, 'epoch': 2}
{'type': 'loss', 'content': 0.17280255258083344, 'timestamp': '2025-10-02 00:31:31.121302', 'step': 11730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:31.195935', 'step': 11730, 'epoch': 2}
{'type': 'loss', 'content': 0.19349484145641327, 'timestamp': '2025-10-02 00:31:31.199375', 'step': 11731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:31.266860', 'step': 11731, 'epoch': 2}
{'type': 'loss', 'content': 0.0550897940993309, 'timestamp': '2025-10-02 00:31:31.284755', 'step': 11732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:31.365655', 'step': 11732, 'epoch': 2}
{'type': 'loss', 'content': 0.08413903415203094, 'timestamp': '2025-10-02 00:31:31.380743', 'step': 11733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:31.449642', 'step': 11733, 'epoch': 2}
{'type': 'loss', 'content': 0.09825620800256729, 'timestamp': '2025-10-02 00:31:31.454476', 'step': 11734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:31.514418', 'step': 11734, 'epoch': 2}
{'type': 'loss', 'content': 0.05682341009378433, 'timestamp': '2025-10-02 00:31:31.519290', 'step': 11735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:31.585944', 'step': 11735, 'epoch': 2}
{'type': 'loss', 'content': 0.11525102704763412, 'timestamp': '2025-10-02 00:31:31.603179', 'step': 11736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:31.673330', 'step': 11736, 'epoch': 2}
{'type': 'loss', 'content': 0.06867530196905136, 'timestamp': '2025-10-02 00:31:31.678294', 'step': 11737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:31.747474', 'step': 11737, 'epoch': 2}
{'type': 'loss', 'content': 0.06273633986711502, 'timestamp': '2025-10-02 00:31:31.758351', 'step': 11738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:31:31.833683', 'step': 11738, 'epoch': 2}
{'type': 'loss', 'content': 0.07262466847896576, 'timestamp': '2025-10-02 00:31:31.838760', 'step': 11739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:31.899149', 'step': 11739, 'epoch': 2}
{'type': 'loss', 'content': 0.060175567865371704, 'timestamp': '2025-10-02 00:31:31.919024', 'step': 11740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:31:31.985914', 'step': 11740, 'epoch': 2}
{'type': 'loss', 'content': 0.07958760857582092, 'timestamp': '2025-10-02 00:31:32.008561', 'step': 11741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:32.091588', 'step': 11741, 'epoch': 2}
{'type': 'loss', 'content': 0.1493251770734787, 'timestamp': '2025-10-02 00:31:32.097114', 'step': 11742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:32.166047', 'step': 11742, 'epoch': 2}
{'type': 'loss', 'content': 0.10281296819448471, 'timestamp': '2025-10-02 00:31:32.182942', 'step': 11743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:32.252326', 'step': 11743, 'epoch': 2}
{'type': 'loss', 'content': 0.05474386364221573, 'timestamp': '2025-10-02 00:31:32.268218', 'step': 11744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:32.324173', 'step': 11744, 'epoch': 2}
{'type': 'loss', 'content': 0.07497814297676086, 'timestamp': '2025-10-02 00:31:32.328455', 'step': 11745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:31:32.411992', 'step': 11745, 'epoch': 2}
{'type': 'loss', 'content': 0.03306233882904053, 'timestamp': '2025-10-02 00:31:32.422690', 'step': 11746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:32.494243', 'step': 11746, 'epoch': 2}
{'type': 'loss', 'content': 0.14512498676776886, 'timestamp': '2025-10-02 00:31:32.499133', 'step': 11747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:32.570208', 'step': 11747, 'epoch': 2}
{'type': 'loss', 'content': 0.09321118891239166, 'timestamp': '2025-10-02 00:31:32.585731', 'step': 11748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:32.658598', 'step': 11748, 'epoch': 2}
{'type': 'loss', 'content': 0.2014014720916748, 'timestamp': '2025-10-02 00:31:32.663016', 'step': 11749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:32.723276', 'step': 11749, 'epoch': 2}
{'type': 'loss', 'content': 0.043519217520952225, 'timestamp': '2025-10-02 00:31:32.726649', 'step': 11750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:32.794636', 'step': 11750, 'epoch': 2}
{'type': 'loss', 'content': 0.02662627585232258, 'timestamp': '2025-10-02 00:31:32.803936', 'step': 11751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:32.869374', 'step': 11751, 'epoch': 2}
{'type': 'loss', 'content': 0.21554015576839447, 'timestamp': '2025-10-02 00:31:32.876805', 'step': 11752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:32.953871', 'step': 11752, 'epoch': 2}
{'type': 'loss', 'content': 0.06421513855457306, 'timestamp': '2025-10-02 00:31:32.957364', 'step': 11753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:33.013979', 'step': 11753, 'epoch': 2}
{'type': 'loss', 'content': 0.03868449851870537, 'timestamp': '2025-10-02 00:31:33.016906', 'step': 11754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:33.078310', 'step': 11754, 'epoch': 2}
{'type': 'loss', 'content': 0.07246726006269455, 'timestamp': '2025-10-02 00:31:33.081236', 'step': 11755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:33.143915', 'step': 11755, 'epoch': 2}
{'type': 'loss', 'content': 0.07453742623329163, 'timestamp': '2025-10-02 00:31:33.156839', 'step': 11756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:33.214005', 'step': 11756, 'epoch': 2}
{'type': 'loss', 'content': 0.08221810311079025, 'timestamp': '2025-10-02 00:31:33.224289', 'step': 11757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:33.280864', 'step': 11757, 'epoch': 2}
{'type': 'loss', 'content': 0.0784284919500351, 'timestamp': '2025-10-02 00:31:33.285447', 'step': 11758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:33.349578', 'step': 11758, 'epoch': 2}
{'type': 'loss', 'content': 0.012721368111670017, 'timestamp': '2025-10-02 00:31:33.359731', 'step': 11759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:33.429454', 'step': 11759, 'epoch': 2}
{'type': 'loss', 'content': 0.05541666969656944, 'timestamp': '2025-10-02 00:31:33.437329', 'step': 11760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:33.493016', 'step': 11760, 'epoch': 2}
{'type': 'loss', 'content': 0.09944845736026764, 'timestamp': '2025-10-02 00:31:33.497417', 'step': 11761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:33.553886', 'step': 11761, 'epoch': 2}
{'type': 'loss', 'content': 0.09572295099496841, 'timestamp': '2025-10-02 00:31:33.556993', 'step': 11762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:33.616286', 'step': 11762, 'epoch': 2}
{'type': 'loss', 'content': 0.03529021516442299, 'timestamp': '2025-10-02 00:31:33.623958', 'step': 11763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:33.680263', 'step': 11763, 'epoch': 2}
{'type': 'loss', 'content': 0.08655907213687897, 'timestamp': '2025-10-02 00:31:33.687396', 'step': 11764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:33.754486', 'step': 11764, 'epoch': 2}
{'type': 'loss', 'content': 0.030803976580500603, 'timestamp': '2025-10-02 00:31:33.757207', 'step': 11765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:31:33.833331', 'step': 11765, 'epoch': 2}
{'type': 'loss', 'content': 0.024615909904241562, 'timestamp': '2025-10-02 00:31:33.846037', 'step': 11766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:33.905154', 'step': 11766, 'epoch': 2}
{'type': 'loss', 'content': 0.08497420698404312, 'timestamp': '2025-10-02 00:31:33.915232', 'step': 11767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:33.977426', 'step': 11767, 'epoch': 2}
{'type': 'loss', 'content': 0.026625527068972588, 'timestamp': '2025-10-02 00:31:33.984425', 'step': 11768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:34.042946', 'step': 11768, 'epoch': 2}
{'type': 'loss', 'content': 0.036662179976701736, 'timestamp': '2025-10-02 00:31:34.046874', 'step': 11769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:34.103037', 'step': 11769, 'epoch': 2}
{'type': 'loss', 'content': 0.07711175084114075, 'timestamp': '2025-10-02 00:31:34.106937', 'step': 11770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:34.169375', 'step': 11770, 'epoch': 2}
{'type': 'loss', 'content': 0.06026317551732063, 'timestamp': '2025-10-02 00:31:34.173921', 'step': 11771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:34.238064', 'step': 11771, 'epoch': 2}
{'type': 'loss', 'content': 0.1160818487405777, 'timestamp': '2025-10-02 00:31:34.244470', 'step': 11772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:34.304777', 'step': 11772, 'epoch': 2}
{'type': 'loss', 'content': 0.03167816251516342, 'timestamp': '2025-10-02 00:31:34.312518', 'step': 11773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:34.371984', 'step': 11773, 'epoch': 2}
{'type': 'loss', 'content': 0.055764339864254, 'timestamp': '2025-10-02 00:31:34.378108', 'step': 11774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:34.435767', 'step': 11774, 'epoch': 2}
{'type': 'loss', 'content': 0.12468080967664719, 'timestamp': '2025-10-02 00:31:34.442697', 'step': 11775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:34.512513', 'step': 11775, 'epoch': 2}
{'type': 'loss', 'content': 0.08185228705406189, 'timestamp': '2025-10-02 00:31:34.522849', 'step': 11776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:34.577897', 'step': 11776, 'epoch': 2}
{'type': 'loss', 'content': 0.09181871265172958, 'timestamp': '2025-10-02 00:31:34.582145', 'step': 11777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:34.640801', 'step': 11777, 'epoch': 2}
{'type': 'loss', 'content': 0.003454289399087429, 'timestamp': '2025-10-02 00:31:34.643588', 'step': 11778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:34.698611', 'step': 11778, 'epoch': 2}
{'type': 'loss', 'content': 0.12331300973892212, 'timestamp': '2025-10-02 00:31:34.707680', 'step': 11779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:34.768310', 'step': 11779, 'epoch': 2}
{'type': 'loss', 'content': 0.04469982162117958, 'timestamp': '2025-10-02 00:31:34.774236', 'step': 11780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:34.836124', 'step': 11780, 'epoch': 2}
{'type': 'loss', 'content': 0.13002122938632965, 'timestamp': '2025-10-02 00:31:34.839290', 'step': 11781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:34.897421', 'step': 11781, 'epoch': 2}
{'type': 'loss', 'content': 0.05075205862522125, 'timestamp': '2025-10-02 00:31:34.899790', 'step': 11782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:34.957683', 'step': 11782, 'epoch': 2}
{'type': 'loss', 'content': 0.048854634165763855, 'timestamp': '2025-10-02 00:31:34.961821', 'step': 11783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:35.028846', 'step': 11783, 'epoch': 2}
{'type': 'loss', 'content': 0.05086221918463707, 'timestamp': '2025-10-02 00:31:35.036357', 'step': 11784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:35.101851', 'step': 11784, 'epoch': 2}
{'type': 'loss', 'content': 0.023962682113051414, 'timestamp': '2025-10-02 00:31:35.105374', 'step': 11785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:35.163642', 'step': 11785, 'epoch': 2}
{'type': 'loss', 'content': 0.14602893590927124, 'timestamp': '2025-10-02 00:31:35.168410', 'step': 11786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:35.226019', 'step': 11786, 'epoch': 2}
{'type': 'loss', 'content': 0.043430741876363754, 'timestamp': '2025-10-02 00:31:35.233742', 'step': 11787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:31:35.304009', 'step': 11787, 'epoch': 2}
{'type': 'loss', 'content': 0.016142789274454117, 'timestamp': '2025-10-02 00:31:35.315286', 'step': 11788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:35.376526', 'step': 11788, 'epoch': 2}
{'type': 'loss', 'content': 0.09747179597616196, 'timestamp': '2025-10-02 00:31:35.380572', 'step': 11789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:35.449940', 'step': 11789, 'epoch': 2}
{'type': 'loss', 'content': 0.09444916248321533, 'timestamp': '2025-10-02 00:31:35.454123', 'step': 11790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:35.523365', 'step': 11790, 'epoch': 2}
{'type': 'loss', 'content': 0.07932404428720474, 'timestamp': '2025-10-02 00:31:35.529347', 'step': 11791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:35.592752', 'step': 11791, 'epoch': 2}
{'type': 'loss', 'content': 0.07580237835645676, 'timestamp': '2025-10-02 00:31:35.601437', 'step': 11792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:31:35.662892', 'step': 11792, 'epoch': 2}
{'type': 'loss', 'content': 0.10533000528812408, 'timestamp': '2025-10-02 00:31:35.665642', 'step': 11793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:35.740829', 'step': 11793, 'epoch': 2}
{'type': 'loss', 'content': 0.06540434062480927, 'timestamp': '2025-10-02 00:31:35.747013', 'step': 11794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:35.815599', 'step': 11794, 'epoch': 2}
{'type': 'loss', 'content': 0.08101353794336319, 'timestamp': '2025-10-02 00:31:35.824602', 'step': 11795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:35.894821', 'step': 11795, 'epoch': 2}
{'type': 'loss', 'content': 0.14557510614395142, 'timestamp': '2025-10-02 00:31:35.901921', 'step': 11796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:31:35.969378', 'step': 11796, 'epoch': 2}
{'type': 'loss', 'content': 0.1452532857656479, 'timestamp': '2025-10-02 00:31:35.973383', 'step': 11797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:36.034760', 'step': 11797, 'epoch': 2}
{'type': 'loss', 'content': 0.04907098785042763, 'timestamp': '2025-10-02 00:31:36.044095', 'step': 11798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:36.113129', 'step': 11798, 'epoch': 2}
{'type': 'loss', 'content': 0.039442937821149826, 'timestamp': '2025-10-02 00:31:36.117035', 'step': 11799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:31:36.182702', 'step': 11799, 'epoch': 2}
{'type': 'loss', 'content': 0.04443438723683357, 'timestamp': '2025-10-02 00:31:36.193755', 'step': 11800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:36.249043', 'step': 11800, 'epoch': 2}
{'type': 'loss', 'content': 0.14860565960407257, 'timestamp': '2025-10-02 00:31:36.259121', 'step': 11801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:36.331467', 'step': 11801, 'epoch': 2}
{'type': 'loss', 'content': 0.07822021842002869, 'timestamp': '2025-10-02 00:31:36.337506', 'step': 11802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:36.395843', 'step': 11802, 'epoch': 2}
{'type': 'loss', 'content': 0.1357758343219757, 'timestamp': '2025-10-02 00:31:36.398953', 'step': 11803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:36.467705', 'step': 11803, 'epoch': 2}
{'type': 'loss', 'content': 0.016330571845173836, 'timestamp': '2025-10-02 00:31:36.474580', 'step': 11804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:36.547147', 'step': 11804, 'epoch': 2}
{'type': 'loss', 'content': 0.017358392477035522, 'timestamp': '2025-10-02 00:31:36.557932', 'step': 11805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:36.627212', 'step': 11805, 'epoch': 2}
{'type': 'loss', 'content': 0.05256173014640808, 'timestamp': '2025-10-02 00:31:36.637573', 'step': 11806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:36.696497', 'step': 11806, 'epoch': 2}
{'type': 'loss', 'content': 0.07381405681371689, 'timestamp': '2025-10-02 00:31:36.707928', 'step': 11807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:36.783685', 'step': 11807, 'epoch': 2}
{'type': 'loss', 'content': 0.027041619643568993, 'timestamp': '2025-10-02 00:31:36.791287', 'step': 11808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:36.854018', 'step': 11808, 'epoch': 2}
{'type': 'loss', 'content': 0.08303175866603851, 'timestamp': '2025-10-02 00:31:36.856722', 'step': 11809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:36.917391', 'step': 11809, 'epoch': 2}
{'type': 'loss', 'content': 0.053226374089717865, 'timestamp': '2025-10-02 00:31:36.931259', 'step': 11810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:36.996194', 'step': 11810, 'epoch': 2}
{'type': 'loss', 'content': 0.14082224667072296, 'timestamp': '2025-10-02 00:31:36.998954', 'step': 11811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:37.062074', 'step': 11811, 'epoch': 2}
{'type': 'loss', 'content': 0.10571552813053131, 'timestamp': '2025-10-02 00:31:37.074255', 'step': 11812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:37.134010', 'step': 11812, 'epoch': 2}
{'type': 'loss', 'content': 0.07863584160804749, 'timestamp': '2025-10-02 00:31:37.144016', 'step': 11813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:31:37.224059', 'step': 11813, 'epoch': 2}
{'type': 'loss', 'content': 0.052515946328639984, 'timestamp': '2025-10-02 00:31:37.233398', 'step': 11814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:37.299502', 'step': 11814, 'epoch': 2}
{'type': 'loss', 'content': 0.07162318378686905, 'timestamp': '2025-10-02 00:31:37.302890', 'step': 11815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:37.370423', 'step': 11815, 'epoch': 2}
{'type': 'loss', 'content': 0.04958278313279152, 'timestamp': '2025-10-02 00:31:37.393225', 'step': 11816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:31:37.476127', 'step': 11816, 'epoch': 2}
{'type': 'loss', 'content': 0.03290632367134094, 'timestamp': '2025-10-02 00:31:37.479282', 'step': 11817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:31:37.551058', 'step': 11817, 'epoch': 2}
{'type': 'loss', 'content': 0.20033520460128784, 'timestamp': '2025-10-02 00:31:37.561308', 'step': 11818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:31:37.635264', 'step': 11818, 'epoch': 2}
{'type': 'loss', 'content': 0.06085461005568504, 'timestamp': '2025-10-02 00:31:37.647591', 'step': 11819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:31:37.718252', 'step': 11819, 'epoch': 2}
{'type': 'loss', 'content': 0.06911839544773102, 'timestamp': '2025-10-02 00:31:37.725258', 'step': 11820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:31:37.805647', 'step': 11820, 'epoch': 2}
{'type': 'loss', 'content': 0.08459246158599854, 'timestamp': '2025-10-02 00:31:37.813180', 'step': 11821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:31:37.891714', 'step': 11821, 'epoch': 2}
{'type': 'loss', 'content': 0.03474823385477066, 'timestamp': '2025-10-02 00:31:37.897505', 'step': 11822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:37.977748', 'step': 11822, 'epoch': 2}
{'type': 'loss', 'content': 0.21124888956546783, 'timestamp': '2025-10-02 00:31:37.982962', 'step': 11823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:31:38.067529', 'step': 11823, 'epoch': 2}
{'type': 'loss', 'content': 0.004738082177937031, 'timestamp': '2025-10-02 00:31:38.081637', 'step': 11824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:38.161938', 'step': 11824, 'epoch': 2}
{'type': 'loss', 'content': 0.12831732630729675, 'timestamp': '2025-10-02 00:31:38.167517', 'step': 11825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:31:38.238046', 'step': 11825, 'epoch': 2}
{'type': 'loss', 'content': 0.13840462267398834, 'timestamp': '2025-10-02 00:31:38.242854', 'step': 11826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:31:38.303596', 'step': 11826, 'epoch': 2}
{'type': 'loss', 'content': 0.10756528377532959, 'timestamp': '2025-10-02 00:31:38.308141', 'step': 11827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:38.368374', 'step': 11827, 'epoch': 2}
{'type': 'loss', 'content': 0.07994911074638367, 'timestamp': '2025-10-02 00:31:38.381681', 'step': 11828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:31:38.458192', 'step': 11828, 'epoch': 2}
{'type': 'loss', 'content': 0.11384741216897964, 'timestamp': '2025-10-02 00:31:38.461244', 'step': 11829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:31:38.528603', 'step': 11829, 'epoch': 2}
{'type': 'loss', 'content': 0.0736825242638588, 'timestamp': '2025-10-02 00:31:38.531598', 'step': 11830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:31:38.589707', 'step': 11830, 'epoch': 2}
{'type': 'loss', 'content': 0.0792001262307167, 'timestamp': '2025-10-02 00:31:38.599266', 'step': 11831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:31:38.657504', 'step': 11831, 'epoch': 2}
{'type': 'loss', 'content': 0.08551274240016937, 'timestamp': '2025-10-02 00:31:38.665028', 'step': 11832, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:32:05.542960', 'step': 11832, 'epoch': 2}
{'type': 'pplx', 'content': 109.50340842156312, 'timestamp': '2025-10-02 00:32:05.547367', 'step': 11832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:05.605126', 'step': 11832, 'epoch': 2}
{'type': 'loss', 'content': 0.04657219350337982, 'timestamp': '2025-10-02 00:32:05.609751', 'step': 11833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:05.665606', 'step': 11833, 'epoch': 2}
{'type': 'loss', 'content': 0.10614665597677231, 'timestamp': '2025-10-02 00:32:05.668312', 'step': 11834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:32:05.736944', 'step': 11834, 'epoch': 2}
{'type': 'loss', 'content': 0.034453216940164566, 'timestamp': '2025-10-02 00:32:05.748934', 'step': 11835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:05.805246', 'step': 11835, 'epoch': 2}
{'type': 'loss', 'content': 0.0636795163154602, 'timestamp': '2025-10-02 00:32:05.815571', 'step': 11836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:05.869530', 'step': 11836, 'epoch': 2}
{'type': 'loss', 'content': 0.059592414647340775, 'timestamp': '2025-10-02 00:32:05.872635', 'step': 11837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:05.927497', 'step': 11837, 'epoch': 2}
{'type': 'loss', 'content': 0.07342541962862015, 'timestamp': '2025-10-02 00:32:05.930518', 'step': 11838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:05.986102', 'step': 11838, 'epoch': 2}
{'type': 'loss', 'content': 0.09145653992891312, 'timestamp': '2025-10-02 00:32:05.988826', 'step': 11839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:06.045495', 'step': 11839, 'epoch': 2}
{'type': 'loss', 'content': 0.045622438192367554, 'timestamp': '2025-10-02 00:32:06.053875', 'step': 11840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:06.108333', 'step': 11840, 'epoch': 2}
{'type': 'loss', 'content': 0.021815845742821693, 'timestamp': '2025-10-02 00:32:06.116231', 'step': 11841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:06.171842', 'step': 11841, 'epoch': 2}
{'type': 'loss', 'content': 0.050799332559108734, 'timestamp': '2025-10-02 00:32:06.174446', 'step': 11842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:06.233755', 'step': 11842, 'epoch': 2}
{'type': 'loss', 'content': 0.03343639895319939, 'timestamp': '2025-10-02 00:32:06.243927', 'step': 11843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:06.298638', 'step': 11843, 'epoch': 2}
{'type': 'loss', 'content': 0.03497592732310295, 'timestamp': '2025-10-02 00:32:06.304668', 'step': 11844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:06.358605', 'step': 11844, 'epoch': 2}
{'type': 'loss', 'content': 0.0835488811135292, 'timestamp': '2025-10-02 00:32:06.361005', 'step': 11845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:06.421381', 'step': 11845, 'epoch': 2}
{'type': 'loss', 'content': 0.02166811190545559, 'timestamp': '2025-10-02 00:32:06.431613', 'step': 11846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:06.486374', 'step': 11846, 'epoch': 2}
{'type': 'loss', 'content': 0.21751366555690765, 'timestamp': '2025-10-02 00:32:06.489117', 'step': 11847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:06.542993', 'step': 11847, 'epoch': 2}
{'type': 'loss', 'content': 0.13587363064289093, 'timestamp': '2025-10-02 00:32:06.551481', 'step': 11848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:06.605038', 'step': 11848, 'epoch': 2}
{'type': 'loss', 'content': 0.07767146080732346, 'timestamp': '2025-10-02 00:32:06.614790', 'step': 11849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:32:06.683314', 'step': 11849, 'epoch': 2}
{'type': 'loss', 'content': 0.04773431643843651, 'timestamp': '2025-10-02 00:32:06.694162', 'step': 11850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:06.748673', 'step': 11850, 'epoch': 2}
{'type': 'loss', 'content': 0.1293623000383377, 'timestamp': '2025-10-02 00:32:06.751432', 'step': 11851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:06.806210', 'step': 11851, 'epoch': 2}
{'type': 'loss', 'content': 0.029017820954322815, 'timestamp': '2025-10-02 00:32:06.813262', 'step': 11852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:06.869409', 'step': 11852, 'epoch': 2}
{'type': 'loss', 'content': 0.07679957151412964, 'timestamp': '2025-10-02 00:32:06.872600', 'step': 11853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:06.929252', 'step': 11853, 'epoch': 2}
{'type': 'loss', 'content': 0.03482652083039284, 'timestamp': '2025-10-02 00:32:06.935558', 'step': 11854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:07.000679', 'step': 11854, 'epoch': 2}
{'type': 'loss', 'content': 0.04189174994826317, 'timestamp': '2025-10-02 00:32:07.011168', 'step': 11855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:07.070396', 'step': 11855, 'epoch': 2}
{'type': 'loss', 'content': 0.10110198706388474, 'timestamp': '2025-10-02 00:32:07.077342', 'step': 11856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:07.133360', 'step': 11856, 'epoch': 2}
{'type': 'loss', 'content': 0.03916516155004501, 'timestamp': '2025-10-02 00:32:07.135841', 'step': 11857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:07.190920', 'step': 11857, 'epoch': 2}
{'type': 'loss', 'content': 0.036831069737672806, 'timestamp': '2025-10-02 00:32:07.193311', 'step': 11858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:07.246644', 'step': 11858, 'epoch': 2}
{'type': 'loss', 'content': 0.07872503250837326, 'timestamp': '2025-10-02 00:32:07.250446', 'step': 11859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:07.315245', 'step': 11859, 'epoch': 2}
{'type': 'loss', 'content': 0.01572185382246971, 'timestamp': '2025-10-02 00:32:07.326644', 'step': 11860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:07.382853', 'step': 11860, 'epoch': 2}
{'type': 'loss', 'content': 0.16385811567306519, 'timestamp': '2025-10-02 00:32:07.385835', 'step': 11861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:07.441136', 'step': 11861, 'epoch': 2}
{'type': 'loss', 'content': 0.08125939965248108, 'timestamp': '2025-10-02 00:32:07.444115', 'step': 11862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:07.501371', 'step': 11862, 'epoch': 2}
{'type': 'loss', 'content': 0.13966771960258484, 'timestamp': '2025-10-02 00:32:07.504482', 'step': 11863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:07.561144', 'step': 11863, 'epoch': 2}
{'type': 'loss', 'content': 0.13015520572662354, 'timestamp': '2025-10-02 00:32:07.568131', 'step': 11864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:07.624619', 'step': 11864, 'epoch': 2}
{'type': 'loss', 'content': 0.15289737284183502, 'timestamp': '2025-10-02 00:32:07.627816', 'step': 11865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:07.682959', 'step': 11865, 'epoch': 2}
{'type': 'loss', 'content': 0.15824812650680542, 'timestamp': '2025-10-02 00:32:07.685487', 'step': 11866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:07.743308', 'step': 11866, 'epoch': 2}
{'type': 'loss', 'content': 0.09478267282247543, 'timestamp': '2025-10-02 00:32:07.746553', 'step': 11867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:32:07.819841', 'step': 11867, 'epoch': 2}
{'type': 'loss', 'content': 0.01075334195047617, 'timestamp': '2025-10-02 00:32:07.832657', 'step': 11868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:07.890276', 'step': 11868, 'epoch': 2}
{'type': 'loss', 'content': 0.07977531850337982, 'timestamp': '2025-10-02 00:32:07.894217', 'step': 11869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:07.952426', 'step': 11869, 'epoch': 2}
{'type': 'loss', 'content': 0.13341379165649414, 'timestamp': '2025-10-02 00:32:07.955728', 'step': 11870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:08.012827', 'step': 11870, 'epoch': 2}
{'type': 'loss', 'content': 0.13318386673927307, 'timestamp': '2025-10-02 00:32:08.016331', 'step': 11871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:08.072150', 'step': 11871, 'epoch': 2}
{'type': 'loss', 'content': 0.08876488357782364, 'timestamp': '2025-10-02 00:32:08.079504', 'step': 11872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:08.135497', 'step': 11872, 'epoch': 2}
{'type': 'loss', 'content': 0.027611739933490753, 'timestamp': '2025-10-02 00:32:08.139118', 'step': 11873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:08.195932', 'step': 11873, 'epoch': 2}
{'type': 'loss', 'content': 0.0920192152261734, 'timestamp': '2025-10-02 00:32:08.199347', 'step': 11874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:08.258389', 'step': 11874, 'epoch': 2}
{'type': 'loss', 'content': 0.023113727569580078, 'timestamp': '2025-10-02 00:32:08.267758', 'step': 11875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:08.324651', 'step': 11875, 'epoch': 2}
{'type': 'loss', 'content': 0.08086512982845306, 'timestamp': '2025-10-02 00:32:08.331619', 'step': 11876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:08.386584', 'step': 11876, 'epoch': 2}
{'type': 'loss', 'content': 0.04052116721868515, 'timestamp': '2025-10-02 00:32:08.392693', 'step': 11877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:08.449671', 'step': 11877, 'epoch': 2}
{'type': 'loss', 'content': 0.034173354506492615, 'timestamp': '2025-10-02 00:32:08.459177', 'step': 11878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:08.517656', 'step': 11878, 'epoch': 2}
{'type': 'loss', 'content': 0.12081817537546158, 'timestamp': '2025-10-02 00:32:08.520620', 'step': 11879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:08.577602', 'step': 11879, 'epoch': 2}
{'type': 'loss', 'content': 0.22044001519680023, 'timestamp': '2025-10-02 00:32:08.583738', 'step': 11880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:32:08.647848', 'step': 11880, 'epoch': 2}
{'type': 'loss', 'content': 0.005800626240670681, 'timestamp': '2025-10-02 00:32:08.659583', 'step': 11881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:08.714786', 'step': 11881, 'epoch': 2}
{'type': 'loss', 'content': 0.046076126396656036, 'timestamp': '2025-10-02 00:32:08.722598', 'step': 11882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:08.778885', 'step': 11882, 'epoch': 2}
{'type': 'loss', 'content': 0.10076107084751129, 'timestamp': '2025-10-02 00:32:08.781350', 'step': 11883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:08.837593', 'step': 11883, 'epoch': 2}
{'type': 'loss', 'content': 0.017276186496019363, 'timestamp': '2025-10-02 00:32:08.847740', 'step': 11884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:08.900902', 'step': 11884, 'epoch': 2}
{'type': 'loss', 'content': 0.19731244444847107, 'timestamp': '2025-10-02 00:32:08.903556', 'step': 11885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:08.958220', 'step': 11885, 'epoch': 2}
{'type': 'loss', 'content': 0.0940171480178833, 'timestamp': '2025-10-02 00:32:08.964508', 'step': 11886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:09.020260', 'step': 11886, 'epoch': 2}
{'type': 'loss', 'content': 0.11110266298055649, 'timestamp': '2025-10-02 00:32:09.022884', 'step': 11887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:09.077588', 'step': 11887, 'epoch': 2}
{'type': 'loss', 'content': 0.06010773777961731, 'timestamp': '2025-10-02 00:32:09.083992', 'step': 11888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:09.139075', 'step': 11888, 'epoch': 2}
{'type': 'loss', 'content': 0.02125503681600094, 'timestamp': '2025-10-02 00:32:09.141763', 'step': 11889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:09.196784', 'step': 11889, 'epoch': 2}
{'type': 'loss', 'content': 0.051835253834724426, 'timestamp': '2025-10-02 00:32:09.199621', 'step': 11890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:09.254768', 'step': 11890, 'epoch': 2}
{'type': 'loss', 'content': 0.10449068248271942, 'timestamp': '2025-10-02 00:32:09.257240', 'step': 11891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:09.313087', 'step': 11891, 'epoch': 2}
{'type': 'loss', 'content': 0.13058051466941833, 'timestamp': '2025-10-02 00:32:09.319159', 'step': 11892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:09.374150', 'step': 11892, 'epoch': 2}
{'type': 'loss', 'content': 0.048998426645994186, 'timestamp': '2025-10-02 00:32:09.376278', 'step': 11893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:09.432315', 'step': 11893, 'epoch': 2}
{'type': 'loss', 'content': 0.13077495992183685, 'timestamp': '2025-10-02 00:32:09.435274', 'step': 11894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:09.490789', 'step': 11894, 'epoch': 2}
{'type': 'loss', 'content': 0.06320972740650177, 'timestamp': '2025-10-02 00:32:09.500136', 'step': 11895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:09.560607', 'step': 11895, 'epoch': 2}
{'type': 'loss', 'content': 0.03247484937310219, 'timestamp': '2025-10-02 00:32:09.571569', 'step': 11896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:09.626254', 'step': 11896, 'epoch': 2}
{'type': 'loss', 'content': 0.19483277201652527, 'timestamp': '2025-10-02 00:32:09.628958', 'step': 11897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:09.684067', 'step': 11897, 'epoch': 2}
{'type': 'loss', 'content': 0.04187187924981117, 'timestamp': '2025-10-02 00:32:09.686461', 'step': 11898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:09.740553', 'step': 11898, 'epoch': 2}
{'type': 'loss', 'content': 0.19510674476623535, 'timestamp': '2025-10-02 00:32:09.742881', 'step': 11899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:09.797652', 'step': 11899, 'epoch': 2}
{'type': 'loss', 'content': 0.06837979704141617, 'timestamp': '2025-10-02 00:32:09.803895', 'step': 11900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:32:09.871660', 'step': 11900, 'epoch': 2}
{'type': 'loss', 'content': 0.03737318515777588, 'timestamp': '2025-10-02 00:32:09.885046', 'step': 11901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:09.940112', 'step': 11901, 'epoch': 2}
{'type': 'loss', 'content': 0.03403584286570549, 'timestamp': '2025-10-02 00:32:09.949448', 'step': 11902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:10.004525', 'step': 11902, 'epoch': 2}
{'type': 'loss', 'content': 0.2045675367116928, 'timestamp': '2025-10-02 00:32:10.007470', 'step': 11903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:10.062220', 'step': 11903, 'epoch': 2}
{'type': 'loss', 'content': 0.2853805720806122, 'timestamp': '2025-10-02 00:32:10.068487', 'step': 11904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:10.122867', 'step': 11904, 'epoch': 2}
{'type': 'loss', 'content': 0.06653161346912384, 'timestamp': '2025-10-02 00:32:10.132767', 'step': 11905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:32:10.202133', 'step': 11905, 'epoch': 2}
{'type': 'loss', 'content': 0.018141109496355057, 'timestamp': '2025-10-02 00:32:10.214093', 'step': 11906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:10.274508', 'step': 11906, 'epoch': 2}
{'type': 'loss', 'content': 0.05662995204329491, 'timestamp': '2025-10-02 00:32:10.284675', 'step': 11907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:10.339039', 'step': 11907, 'epoch': 2}
{'type': 'loss', 'content': 0.05684525519609451, 'timestamp': '2025-10-02 00:32:10.347609', 'step': 11908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:10.402128', 'step': 11908, 'epoch': 2}
{'type': 'loss', 'content': 0.10772660374641418, 'timestamp': '2025-10-02 00:32:10.405345', 'step': 11909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:10.460439', 'step': 11909, 'epoch': 2}
{'type': 'loss', 'content': 0.09891477227210999, 'timestamp': '2025-10-02 00:32:10.462835', 'step': 11910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:10.518055', 'step': 11910, 'epoch': 2}
{'type': 'loss', 'content': 0.17099955677986145, 'timestamp': '2025-10-02 00:32:10.521341', 'step': 11911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:10.575995', 'step': 11911, 'epoch': 2}
{'type': 'loss', 'content': 0.0841335654258728, 'timestamp': '2025-10-02 00:32:10.582875', 'step': 11912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:10.637311', 'step': 11912, 'epoch': 2}
{'type': 'loss', 'content': 0.010668028146028519, 'timestamp': '2025-10-02 00:32:10.641143', 'step': 11913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:10.696493', 'step': 11913, 'epoch': 2}
{'type': 'loss', 'content': 0.09655735641717911, 'timestamp': '2025-10-02 00:32:10.699116', 'step': 11914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:10.753808', 'step': 11914, 'epoch': 2}
{'type': 'loss', 'content': 0.16037164628505707, 'timestamp': '2025-10-02 00:32:10.756388', 'step': 11915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:10.811926', 'step': 11915, 'epoch': 2}
{'type': 'loss', 'content': 0.11610550433397293, 'timestamp': '2025-10-02 00:32:10.822263', 'step': 11916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:10.877217', 'step': 11916, 'epoch': 2}
{'type': 'loss', 'content': 0.059272751212120056, 'timestamp': '2025-10-02 00:32:10.879568', 'step': 11917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:10.939615', 'step': 11917, 'epoch': 2}
{'type': 'loss', 'content': 0.06193290278315544, 'timestamp': '2025-10-02 00:32:10.949778', 'step': 11918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:11.008815', 'step': 11918, 'epoch': 2}
{'type': 'loss', 'content': 0.07544226199388504, 'timestamp': '2025-10-02 00:32:11.011592', 'step': 11919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:11.066315', 'step': 11919, 'epoch': 2}
{'type': 'loss', 'content': 0.10299849510192871, 'timestamp': '2025-10-02 00:32:11.072674', 'step': 11920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:11.127201', 'step': 11920, 'epoch': 2}
{'type': 'loss', 'content': 0.04680781066417694, 'timestamp': '2025-10-02 00:32:11.137447', 'step': 11921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:11.195093', 'step': 11921, 'epoch': 2}
{'type': 'loss', 'content': 0.11348584294319153, 'timestamp': '2025-10-02 00:32:11.197864', 'step': 11922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:11.254415', 'step': 11922, 'epoch': 2}
{'type': 'loss', 'content': 0.12403909116983414, 'timestamp': '2025-10-02 00:32:11.257049', 'step': 11923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:11.311668', 'step': 11923, 'epoch': 2}
{'type': 'loss', 'content': 0.047446444630622864, 'timestamp': '2025-10-02 00:32:11.318562', 'step': 11924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:11.372191', 'step': 11924, 'epoch': 2}
{'type': 'loss', 'content': 0.11178530007600784, 'timestamp': '2025-10-02 00:32:11.381972', 'step': 11925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:11.457299', 'step': 11925, 'epoch': 2}
{'type': 'loss', 'content': 0.16526490449905396, 'timestamp': '2025-10-02 00:32:11.464806', 'step': 11926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:11.533727', 'step': 11926, 'epoch': 2}
{'type': 'loss', 'content': 0.028089502826333046, 'timestamp': '2025-10-02 00:32:11.536236', 'step': 11927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:11.614866', 'step': 11927, 'epoch': 2}
{'type': 'loss', 'content': 0.06966695934534073, 'timestamp': '2025-10-02 00:32:11.622140', 'step': 11928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:11.680711', 'step': 11928, 'epoch': 2}
{'type': 'loss', 'content': 0.08614997565746307, 'timestamp': '2025-10-02 00:32:11.687630', 'step': 11929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:32:11.766764', 'step': 11929, 'epoch': 2}
{'type': 'loss', 'content': 0.008231882937252522, 'timestamp': '2025-10-02 00:32:11.780028', 'step': 11930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:11.853723', 'step': 11930, 'epoch': 2}
{'type': 'loss', 'content': 0.02407601848244667, 'timestamp': '2025-10-02 00:32:11.859971', 'step': 11931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:11.930764', 'step': 11931, 'epoch': 2}
{'type': 'loss', 'content': 0.014751967042684555, 'timestamp': '2025-10-02 00:32:11.941770', 'step': 11932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:12.000218', 'step': 11932, 'epoch': 2}
{'type': 'loss', 'content': 0.19523470103740692, 'timestamp': '2025-10-02 00:32:12.010373', 'step': 11933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:12.076049', 'step': 11933, 'epoch': 2}
{'type': 'loss', 'content': 0.04902058467268944, 'timestamp': '2025-10-02 00:32:12.078762', 'step': 11934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:12.140551', 'step': 11934, 'epoch': 2}
{'type': 'loss', 'content': 0.06241947412490845, 'timestamp': '2025-10-02 00:32:12.146349', 'step': 11935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:12.223053', 'step': 11935, 'epoch': 2}
{'type': 'loss', 'content': 0.03364400565624237, 'timestamp': '2025-10-02 00:32:12.234520', 'step': 11936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:12.300177', 'step': 11936, 'epoch': 2}
{'type': 'loss', 'content': 0.013421176001429558, 'timestamp': '2025-10-02 00:32:12.303009', 'step': 11937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:12.379575', 'step': 11937, 'epoch': 2}
{'type': 'loss', 'content': 0.013752719387412071, 'timestamp': '2025-10-02 00:32:12.389796', 'step': 11938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:12.447889', 'step': 11938, 'epoch': 2}
{'type': 'loss', 'content': 0.019934261217713356, 'timestamp': '2025-10-02 00:32:12.457227', 'step': 11939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:12.520538', 'step': 11939, 'epoch': 2}
{'type': 'loss', 'content': 0.08479362726211548, 'timestamp': '2025-10-02 00:32:12.527111', 'step': 11940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:12.584232', 'step': 11940, 'epoch': 2}
{'type': 'loss', 'content': 0.07679257541894913, 'timestamp': '2025-10-02 00:32:12.593951', 'step': 11941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:12.658531', 'step': 11941, 'epoch': 2}
{'type': 'loss', 'content': 0.14877311885356903, 'timestamp': '2025-10-02 00:32:12.666599', 'step': 11942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:12.733198', 'step': 11942, 'epoch': 2}
{'type': 'loss', 'content': 0.014136002399027348, 'timestamp': '2025-10-02 00:32:12.737087', 'step': 11943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:12.794578', 'step': 11943, 'epoch': 2}
{'type': 'loss', 'content': 0.020404009148478508, 'timestamp': '2025-10-02 00:32:12.804589', 'step': 11944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:12.866266', 'step': 11944, 'epoch': 2}
{'type': 'loss', 'content': 0.0467953085899353, 'timestamp': '2025-10-02 00:32:12.874134', 'step': 11945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:12.942654', 'step': 11945, 'epoch': 2}
{'type': 'loss', 'content': 0.021054986864328384, 'timestamp': '2025-10-02 00:32:12.952818', 'step': 11946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:13.011451', 'step': 11946, 'epoch': 2}
{'type': 'loss', 'content': 0.04538754001259804, 'timestamp': '2025-10-02 00:32:13.018572', 'step': 11947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:13.091806', 'step': 11947, 'epoch': 2}
{'type': 'loss', 'content': 0.0942964181303978, 'timestamp': '2025-10-02 00:32:13.098265', 'step': 11948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:13.165172', 'step': 11948, 'epoch': 2}
{'type': 'loss', 'content': 0.11935267597436905, 'timestamp': '2025-10-02 00:32:13.167673', 'step': 11949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:13.236497', 'step': 11949, 'epoch': 2}
{'type': 'loss', 'content': 0.022263459861278534, 'timestamp': '2025-10-02 00:32:13.247113', 'step': 11950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:13.301691', 'step': 11950, 'epoch': 2}
{'type': 'loss', 'content': 0.13813723623752594, 'timestamp': '2025-10-02 00:32:13.305003', 'step': 11951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:13.359728', 'step': 11951, 'epoch': 2}
{'type': 'loss', 'content': 0.05890169367194176, 'timestamp': '2025-10-02 00:32:13.373159', 'step': 11952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:13.429038', 'step': 11952, 'epoch': 2}
{'type': 'loss', 'content': 0.051840320229530334, 'timestamp': '2025-10-02 00:32:13.431605', 'step': 11953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:13.488177', 'step': 11953, 'epoch': 2}
{'type': 'loss', 'content': 0.042085181921720505, 'timestamp': '2025-10-02 00:32:13.497718', 'step': 11954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:13.552651', 'step': 11954, 'epoch': 2}
{'type': 'loss', 'content': 0.026388883590698242, 'timestamp': '2025-10-02 00:32:13.561997', 'step': 11955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:13.617153', 'step': 11955, 'epoch': 2}
{'type': 'loss', 'content': 0.033758241683244705, 'timestamp': '2025-10-02 00:32:13.624069', 'step': 11956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:13.678747', 'step': 11956, 'epoch': 2}
{'type': 'loss', 'content': 0.07504019886255264, 'timestamp': '2025-10-02 00:32:13.681266', 'step': 11957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:13.738117', 'step': 11957, 'epoch': 2}
{'type': 'loss', 'content': 0.02644313871860504, 'timestamp': '2025-10-02 00:32:13.747625', 'step': 11958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:13.802278', 'step': 11958, 'epoch': 2}
{'type': 'loss', 'content': 0.09109210222959518, 'timestamp': '2025-10-02 00:32:13.811620', 'step': 11959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:13.867159', 'step': 11959, 'epoch': 2}
{'type': 'loss', 'content': 0.0975320041179657, 'timestamp': '2025-10-02 00:32:13.873517', 'step': 11960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:13.927591', 'step': 11960, 'epoch': 2}
{'type': 'loss', 'content': 0.07707897573709488, 'timestamp': '2025-10-02 00:32:13.930608', 'step': 11961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:13.986140', 'step': 11961, 'epoch': 2}
{'type': 'loss', 'content': 0.057236094027757645, 'timestamp': '2025-10-02 00:32:13.993921', 'step': 11962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:14.047890', 'step': 11962, 'epoch': 2}
{'type': 'loss', 'content': 0.1693870723247528, 'timestamp': '2025-10-02 00:32:14.051087', 'step': 11963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:14.114877', 'step': 11963, 'epoch': 2}
{'type': 'loss', 'content': 0.01759537309408188, 'timestamp': '2025-10-02 00:32:14.126346', 'step': 11964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:14.179760', 'step': 11964, 'epoch': 2}
{'type': 'loss', 'content': 0.049806658178567886, 'timestamp': '2025-10-02 00:32:14.182476', 'step': 11965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:14.239043', 'step': 11965, 'epoch': 2}
{'type': 'loss', 'content': 0.12045972794294357, 'timestamp': '2025-10-02 00:32:14.248594', 'step': 11966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:14.305499', 'step': 11966, 'epoch': 2}
{'type': 'loss', 'content': 0.05532674491405487, 'timestamp': '2025-10-02 00:32:14.311827', 'step': 11967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:14.366266', 'step': 11967, 'epoch': 2}
{'type': 'loss', 'content': 0.14416015148162842, 'timestamp': '2025-10-02 00:32:14.372369', 'step': 11968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:14.425825', 'step': 11968, 'epoch': 2}
{'type': 'loss', 'content': 0.2342308908700943, 'timestamp': '2025-10-02 00:32:14.428041', 'step': 11969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:14.482329', 'step': 11969, 'epoch': 2}
{'type': 'loss', 'content': 0.0907251313328743, 'timestamp': '2025-10-02 00:32:14.484863', 'step': 11970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:14.540029', 'step': 11970, 'epoch': 2}
{'type': 'loss', 'content': 0.06350333243608475, 'timestamp': '2025-10-02 00:32:14.542789', 'step': 11971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:14.598017', 'step': 11971, 'epoch': 2}
{'type': 'loss', 'content': 0.023232722654938698, 'timestamp': '2025-10-02 00:32:14.605496', 'step': 11972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:14.659675', 'step': 11972, 'epoch': 2}
{'type': 'loss', 'content': 0.2207276076078415, 'timestamp': '2025-10-02 00:32:14.662360', 'step': 11973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:14.717485', 'step': 11973, 'epoch': 2}
{'type': 'loss', 'content': 0.10942521691322327, 'timestamp': '2025-10-02 00:32:14.723725', 'step': 11974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:14.778346', 'step': 11974, 'epoch': 2}
{'type': 'loss', 'content': 0.182151198387146, 'timestamp': '2025-10-02 00:32:14.781189', 'step': 11975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:14.835997', 'step': 11975, 'epoch': 2}
{'type': 'loss', 'content': 0.0802214965224266, 'timestamp': '2025-10-02 00:32:14.842369', 'step': 11976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:14.896955', 'step': 11976, 'epoch': 2}
{'type': 'loss', 'content': 0.019200274720788002, 'timestamp': '2025-10-02 00:32:14.906694', 'step': 11977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:14.963387', 'step': 11977, 'epoch': 2}
{'type': 'loss', 'content': 0.058103419840335846, 'timestamp': '2025-10-02 00:32:14.972941', 'step': 11978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:15.028109', 'step': 11978, 'epoch': 2}
{'type': 'loss', 'content': 0.08908065408468246, 'timestamp': '2025-10-02 00:32:15.030567', 'step': 11979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:15.085744', 'step': 11979, 'epoch': 2}
{'type': 'loss', 'content': 0.14699891209602356, 'timestamp': '2025-10-02 00:32:15.091643', 'step': 11980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:15.146347', 'step': 11980, 'epoch': 2}
{'type': 'loss', 'content': 0.06955911219120026, 'timestamp': '2025-10-02 00:32:15.148741', 'step': 11981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:15.203736', 'step': 11981, 'epoch': 2}
{'type': 'loss', 'content': 0.06542876362800598, 'timestamp': '2025-10-02 00:32:15.206569', 'step': 11982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:15.261423', 'step': 11982, 'epoch': 2}
{'type': 'loss', 'content': 0.03462778776884079, 'timestamp': '2025-10-02 00:32:15.264707', 'step': 11983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:15.320711', 'step': 11983, 'epoch': 2}
{'type': 'loss', 'content': 0.14174780249595642, 'timestamp': '2025-10-02 00:32:15.326837', 'step': 11984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:15.381609', 'step': 11984, 'epoch': 2}
{'type': 'loss', 'content': 0.0302739255130291, 'timestamp': '2025-10-02 00:32:15.389467', 'step': 11985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:15.444993', 'step': 11985, 'epoch': 2}
{'type': 'loss', 'content': 0.05812770873308182, 'timestamp': '2025-10-02 00:32:15.451311', 'step': 11986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:15.506895', 'step': 11986, 'epoch': 2}
{'type': 'loss', 'content': 0.03937143459916115, 'timestamp': '2025-10-02 00:32:15.514652', 'step': 11987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:15.571142', 'step': 11987, 'epoch': 2}
{'type': 'loss', 'content': 0.04091683775186539, 'timestamp': '2025-10-02 00:32:15.577158', 'step': 11988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:15.631265', 'step': 11988, 'epoch': 2}
{'type': 'loss', 'content': 0.12083527445793152, 'timestamp': '2025-10-02 00:32:15.633830', 'step': 11989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:15.688964', 'step': 11989, 'epoch': 2}
{'type': 'loss', 'content': 0.04472217336297035, 'timestamp': '2025-10-02 00:32:15.698329', 'step': 11990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:15.752962', 'step': 11990, 'epoch': 2}
{'type': 'loss', 'content': 0.23648016154766083, 'timestamp': '2025-10-02 00:32:15.755360', 'step': 11991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:15.809598', 'step': 11991, 'epoch': 2}
{'type': 'loss', 'content': 0.045002974569797516, 'timestamp': '2025-10-02 00:32:15.818153', 'step': 11992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:15.874817', 'step': 11992, 'epoch': 2}
{'type': 'loss', 'content': 0.011173991486430168, 'timestamp': '2025-10-02 00:32:15.881113', 'step': 11993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:15.942737', 'step': 11993, 'epoch': 2}
{'type': 'loss', 'content': 0.04371289536356926, 'timestamp': '2025-10-02 00:32:15.953236', 'step': 11994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:16.008454', 'step': 11994, 'epoch': 2}
{'type': 'loss', 'content': 0.023293334990739822, 'timestamp': '2025-10-02 00:32:16.011253', 'step': 11995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:16.065644', 'step': 11995, 'epoch': 2}
{'type': 'loss', 'content': 0.17178820073604584, 'timestamp': '2025-10-02 00:32:16.071905', 'step': 11996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:16.126478', 'step': 11996, 'epoch': 2}
{'type': 'loss', 'content': 0.06613868474960327, 'timestamp': '2025-10-02 00:32:16.129510', 'step': 11997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:16.184093', 'step': 11997, 'epoch': 2}
{'type': 'loss', 'content': 0.08779609203338623, 'timestamp': '2025-10-02 00:32:16.186784', 'step': 11998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:16.241647', 'step': 11998, 'epoch': 2}
{'type': 'loss', 'content': 0.10252639651298523, 'timestamp': '2025-10-02 00:32:16.244308', 'step': 11999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:16.299335', 'step': 11999, 'epoch': 2}
{'type': 'loss', 'content': 0.11852353066205978, 'timestamp': '2025-10-02 00:32:16.305760', 'step': 12000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 12000', 'timestamp': '2025-10-02 00:32:16.799660', 'step': 12000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:16.864187', 'step': 12000, 'epoch': 2}
{'type': 'loss', 'content': 0.05495256185531616, 'timestamp': '2025-10-02 00:32:16.868054', 'step': 12001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:16.923830', 'step': 12001, 'epoch': 2}
{'type': 'loss', 'content': 0.07485178858041763, 'timestamp': '2025-10-02 00:32:16.929281', 'step': 12002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:16.985418', 'step': 12002, 'epoch': 2}
{'type': 'loss', 'content': 0.03208370506763458, 'timestamp': '2025-10-02 00:32:16.988526', 'step': 12003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:17.044193', 'step': 12003, 'epoch': 2}
{'type': 'loss', 'content': 0.1542736440896988, 'timestamp': '2025-10-02 00:32:17.051021', 'step': 12004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:17.106886', 'step': 12004, 'epoch': 2}
{'type': 'loss', 'content': 0.04056825488805771, 'timestamp': '2025-10-02 00:32:17.112883', 'step': 12005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:17.169121', 'step': 12005, 'epoch': 2}
{'type': 'loss', 'content': 0.10538524389266968, 'timestamp': '2025-10-02 00:32:17.171642', 'step': 12006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:17.227425', 'step': 12006, 'epoch': 2}
{'type': 'loss', 'content': 0.03279335796833038, 'timestamp': '2025-10-02 00:32:17.230019', 'step': 12007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:17.286405', 'step': 12007, 'epoch': 2}
{'type': 'loss', 'content': 0.02952788956463337, 'timestamp': '2025-10-02 00:32:17.293673', 'step': 12008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:17.348190', 'step': 12008, 'epoch': 2}
{'type': 'loss', 'content': 0.2328399121761322, 'timestamp': '2025-10-02 00:32:17.351031', 'step': 12009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:17.406951', 'step': 12009, 'epoch': 2}
{'type': 'loss', 'content': 0.04055029898881912, 'timestamp': '2025-10-02 00:32:17.409718', 'step': 12010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:17.465501', 'step': 12010, 'epoch': 2}
{'type': 'loss', 'content': 0.04485469311475754, 'timestamp': '2025-10-02 00:32:17.467928', 'step': 12011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:17.522492', 'step': 12011, 'epoch': 2}
{'type': 'loss', 'content': 0.09315603971481323, 'timestamp': '2025-10-02 00:32:17.531518', 'step': 12012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:17.585861', 'step': 12012, 'epoch': 2}
{'type': 'loss', 'content': 0.13741445541381836, 'timestamp': '2025-10-02 00:32:17.588281', 'step': 12013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:17.642938', 'step': 12013, 'epoch': 2}
{'type': 'loss', 'content': 0.042436834424734116, 'timestamp': '2025-10-02 00:32:17.650579', 'step': 12014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:17.705064', 'step': 12014, 'epoch': 2}
{'type': 'loss', 'content': 0.18121130764484406, 'timestamp': '2025-10-02 00:32:17.708053', 'step': 12015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:17.770897', 'step': 12015, 'epoch': 2}
{'type': 'loss', 'content': 0.025548061355948448, 'timestamp': '2025-10-02 00:32:17.782163', 'step': 12016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:17.836990', 'step': 12016, 'epoch': 2}
{'type': 'loss', 'content': 0.07113751024007797, 'timestamp': '2025-10-02 00:32:17.841113', 'step': 12017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:17.895793', 'step': 12017, 'epoch': 2}
{'type': 'loss', 'content': 0.16234812140464783, 'timestamp': '2025-10-02 00:32:17.898904', 'step': 12018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:17.956161', 'step': 12018, 'epoch': 2}
{'type': 'loss', 'content': 0.053457822650671005, 'timestamp': '2025-10-02 00:32:17.965679', 'step': 12019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:18.024849', 'step': 12019, 'epoch': 2}
{'type': 'loss', 'content': 0.0842469185590744, 'timestamp': '2025-10-02 00:32:18.035841', 'step': 12020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:18.091238', 'step': 12020, 'epoch': 2}
{'type': 'loss', 'content': 0.046159762889146805, 'timestamp': '2025-10-02 00:32:18.097549', 'step': 12021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:18.152318', 'step': 12021, 'epoch': 2}
{'type': 'loss', 'content': 0.08305321633815765, 'timestamp': '2025-10-02 00:32:18.155168', 'step': 12022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:18.210704', 'step': 12022, 'epoch': 2}
{'type': 'loss', 'content': 0.09146382659673691, 'timestamp': '2025-10-02 00:32:18.216730', 'step': 12023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:18.271219', 'step': 12023, 'epoch': 2}
{'type': 'loss', 'content': 0.0486161895096302, 'timestamp': '2025-10-02 00:32:18.278230', 'step': 12024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:18.334584', 'step': 12024, 'epoch': 2}
{'type': 'loss', 'content': 0.15288233757019043, 'timestamp': '2025-10-02 00:32:18.338052', 'step': 12025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:18.396177', 'step': 12025, 'epoch': 2}
{'type': 'loss', 'content': 0.06582047790288925, 'timestamp': '2025-10-02 00:32:18.405479', 'step': 12026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:18.459913', 'step': 12026, 'epoch': 2}
{'type': 'loss', 'content': 0.09475480020046234, 'timestamp': '2025-10-02 00:32:18.462297', 'step': 12027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:18.517059', 'step': 12027, 'epoch': 2}
{'type': 'loss', 'content': 0.09852240979671478, 'timestamp': '2025-10-02 00:32:18.523371', 'step': 12028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:18.579192', 'step': 12028, 'epoch': 2}
{'type': 'loss', 'content': 0.0457632876932621, 'timestamp': '2025-10-02 00:32:18.585273', 'step': 12029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:18.639677', 'step': 12029, 'epoch': 2}
{'type': 'loss', 'content': 0.06522563099861145, 'timestamp': '2025-10-02 00:32:18.642176', 'step': 12030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:18.697689', 'step': 12030, 'epoch': 2}
{'type': 'loss', 'content': 0.05490512400865555, 'timestamp': '2025-10-02 00:32:18.700591', 'step': 12031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:18.755199', 'step': 12031, 'epoch': 2}
{'type': 'loss', 'content': 0.09881933033466339, 'timestamp': '2025-10-02 00:32:18.761958', 'step': 12032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:18.816040', 'step': 12032, 'epoch': 2}
{'type': 'loss', 'content': 0.02958819456398487, 'timestamp': '2025-10-02 00:32:18.818919', 'step': 12033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:18.874273', 'step': 12033, 'epoch': 2}
{'type': 'loss', 'content': 0.07038627564907074, 'timestamp': '2025-10-02 00:32:18.877333', 'step': 12034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:32:18.932351', 'step': 12034, 'epoch': 2}
{'type': 'loss', 'content': 0.08713015168905258, 'timestamp': '2025-10-02 00:32:18.934986', 'step': 12035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:18.989594', 'step': 12035, 'epoch': 2}
{'type': 'loss', 'content': 0.06861186027526855, 'timestamp': '2025-10-02 00:32:18.995847', 'step': 12036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:19.049642', 'step': 12036, 'epoch': 2}
{'type': 'loss', 'content': 0.11001948267221451, 'timestamp': '2025-10-02 00:32:19.052229', 'step': 12037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:19.107297', 'step': 12037, 'epoch': 2}
{'type': 'loss', 'content': 0.06634483486413956, 'timestamp': '2025-10-02 00:32:19.109876', 'step': 12038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:19.164411', 'step': 12038, 'epoch': 2}
{'type': 'loss', 'content': 0.12986116111278534, 'timestamp': '2025-10-02 00:32:19.167350', 'step': 12039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:19.223487', 'step': 12039, 'epoch': 2}
{'type': 'loss', 'content': 0.01301957480609417, 'timestamp': '2025-10-02 00:32:19.229306', 'step': 12040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:19.284704', 'step': 12040, 'epoch': 2}
{'type': 'loss', 'content': 0.06463764607906342, 'timestamp': '2025-10-02 00:32:19.287417', 'step': 12041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:19.344346', 'step': 12041, 'epoch': 2}
{'type': 'loss', 'content': 0.03885192051529884, 'timestamp': '2025-10-02 00:32:19.353741', 'step': 12042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:19.414562', 'step': 12042, 'epoch': 2}
{'type': 'loss', 'content': 0.04718438908457756, 'timestamp': '2025-10-02 00:32:19.424109', 'step': 12043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:19.479750', 'step': 12043, 'epoch': 2}
{'type': 'loss', 'content': 0.03617075830698013, 'timestamp': '2025-10-02 00:32:19.486572', 'step': 12044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:19.540943', 'step': 12044, 'epoch': 2}
{'type': 'loss', 'content': 0.034376952797174454, 'timestamp': '2025-10-02 00:32:19.550732', 'step': 12045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:19.607165', 'step': 12045, 'epoch': 2}
{'type': 'loss', 'content': 0.07188741117715836, 'timestamp': '2025-10-02 00:32:19.610089', 'step': 12046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:19.665485', 'step': 12046, 'epoch': 2}
{'type': 'loss', 'content': 0.053249236196279526, 'timestamp': '2025-10-02 00:32:19.668742', 'step': 12047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:19.724507', 'step': 12047, 'epoch': 2}
{'type': 'loss', 'content': 0.08206348121166229, 'timestamp': '2025-10-02 00:32:19.730294', 'step': 12048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:19.783639', 'step': 12048, 'epoch': 2}
{'type': 'loss', 'content': 0.19189099967479706, 'timestamp': '2025-10-02 00:32:19.786341', 'step': 12049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:19.842492', 'step': 12049, 'epoch': 2}
{'type': 'loss', 'content': 0.07107148319482803, 'timestamp': '2025-10-02 00:32:19.851824', 'step': 12050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:19.908057', 'step': 12050, 'epoch': 2}
{'type': 'loss', 'content': 0.029410474002361298, 'timestamp': '2025-10-02 00:32:19.911817', 'step': 12051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:19.967907', 'step': 12051, 'epoch': 2}
{'type': 'loss', 'content': 0.05277516320347786, 'timestamp': '2025-10-02 00:32:19.974602', 'step': 12052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:20.027954', 'step': 12052, 'epoch': 2}
{'type': 'loss', 'content': 0.1410825401544571, 'timestamp': '2025-10-02 00:32:20.031013', 'step': 12053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:20.086696', 'step': 12053, 'epoch': 2}
{'type': 'loss', 'content': 0.04121042415499687, 'timestamp': '2025-10-02 00:32:20.094481', 'step': 12054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:20.150706', 'step': 12054, 'epoch': 2}
{'type': 'loss', 'content': 0.06893184781074524, 'timestamp': '2025-10-02 00:32:20.153444', 'step': 12055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:20.207854', 'step': 12055, 'epoch': 2}
{'type': 'loss', 'content': 0.05175352096557617, 'timestamp': '2025-10-02 00:32:20.214161', 'step': 12056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:20.270114', 'step': 12056, 'epoch': 2}
{'type': 'loss', 'content': 0.06118441000580788, 'timestamp': '2025-10-02 00:32:20.273564', 'step': 12057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:20.328136', 'step': 12057, 'epoch': 2}
{'type': 'loss', 'content': 0.05495288968086243, 'timestamp': '2025-10-02 00:32:20.335912', 'step': 12058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:20.390197', 'step': 12058, 'epoch': 2}
{'type': 'loss', 'content': 0.030618848279118538, 'timestamp': '2025-10-02 00:32:20.392676', 'step': 12059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:20.448218', 'step': 12059, 'epoch': 2}
{'type': 'loss', 'content': 0.06489197164773941, 'timestamp': '2025-10-02 00:32:20.455146', 'step': 12060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:20.509658', 'step': 12060, 'epoch': 2}
{'type': 'loss', 'content': 0.08550997823476791, 'timestamp': '2025-10-02 00:32:20.513045', 'step': 12061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:20.567736', 'step': 12061, 'epoch': 2}
{'type': 'loss', 'content': 0.062392689287662506, 'timestamp': '2025-10-02 00:32:20.570469', 'step': 12062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:20.625952', 'step': 12062, 'epoch': 2}
{'type': 'loss', 'content': 0.033643629401922226, 'timestamp': '2025-10-02 00:32:20.635356', 'step': 12063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:20.692587', 'step': 12063, 'epoch': 2}
{'type': 'loss', 'content': 0.039270009845495224, 'timestamp': '2025-10-02 00:32:20.702910', 'step': 12064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:20.757722', 'step': 12064, 'epoch': 2}
{'type': 'loss', 'content': 0.0068726916797459126, 'timestamp': '2025-10-02 00:32:20.765562', 'step': 12065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:20.820301', 'step': 12065, 'epoch': 2}
{'type': 'loss', 'content': 0.019870329648256302, 'timestamp': '2025-10-02 00:32:20.826557', 'step': 12066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:20.886428', 'step': 12066, 'epoch': 2}
{'type': 'loss', 'content': 0.03059515915811062, 'timestamp': '2025-10-02 00:32:20.896644', 'step': 12067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:20.952557', 'step': 12067, 'epoch': 2}
{'type': 'loss', 'content': 0.05561681091785431, 'timestamp': '2025-10-02 00:32:20.958607', 'step': 12068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:32:21.027580', 'step': 12068, 'epoch': 2}
{'type': 'loss', 'content': 0.01511288806796074, 'timestamp': '2025-10-02 00:32:21.040966', 'step': 12069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:21.096128', 'step': 12069, 'epoch': 2}
{'type': 'loss', 'content': 0.11771213263273239, 'timestamp': '2025-10-02 00:32:21.099254', 'step': 12070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:21.154319', 'step': 12070, 'epoch': 2}
{'type': 'loss', 'content': 0.12751717865467072, 'timestamp': '2025-10-02 00:32:21.156910', 'step': 12071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:21.211845', 'step': 12071, 'epoch': 2}
{'type': 'loss', 'content': 0.008578275330364704, 'timestamp': '2025-10-02 00:32:21.218739', 'step': 12072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:32:21.281471', 'step': 12072, 'epoch': 2}
{'type': 'loss', 'content': 0.028880884870886803, 'timestamp': '2025-10-02 00:32:21.293260', 'step': 12073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:21.349642', 'step': 12073, 'epoch': 2}
{'type': 'loss', 'content': 0.05771340802311897, 'timestamp': '2025-10-02 00:32:21.352647', 'step': 12074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:21.410640', 'step': 12074, 'epoch': 2}
{'type': 'loss', 'content': 0.1430521160364151, 'timestamp': '2025-10-02 00:32:21.413794', 'step': 12075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:21.469289', 'step': 12075, 'epoch': 2}
{'type': 'loss', 'content': 0.12610743939876556, 'timestamp': '2025-10-02 00:32:21.475574', 'step': 12076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:21.529481', 'step': 12076, 'epoch': 2}
{'type': 'loss', 'content': 0.0742744579911232, 'timestamp': '2025-10-02 00:32:21.539352', 'step': 12077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:21.594847', 'step': 12077, 'epoch': 2}
{'type': 'loss', 'content': 0.0789826288819313, 'timestamp': '2025-10-02 00:32:21.597378', 'step': 12078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:32:21.660802', 'step': 12078, 'epoch': 2}
{'type': 'loss', 'content': 0.05655965209007263, 'timestamp': '2025-10-02 00:32:21.671623', 'step': 12079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:21.726916', 'step': 12079, 'epoch': 2}
{'type': 'loss', 'content': 0.07742328196763992, 'timestamp': '2025-10-02 00:32:21.734576', 'step': 12080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:21.788667', 'step': 12080, 'epoch': 2}
{'type': 'loss', 'content': 0.13358531892299652, 'timestamp': '2025-10-02 00:32:21.792055', 'step': 12081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:21.846928', 'step': 12081, 'epoch': 2}
{'type': 'loss', 'content': 0.03432956337928772, 'timestamp': '2025-10-02 00:32:21.849523', 'step': 12082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:21.904745', 'step': 12082, 'epoch': 2}
{'type': 'loss', 'content': 0.07317090779542923, 'timestamp': '2025-10-02 00:32:21.907843', 'step': 12083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:21.962652', 'step': 12083, 'epoch': 2}
{'type': 'loss', 'content': 0.0097622936591506, 'timestamp': '2025-10-02 00:32:21.969704', 'step': 12084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 00:32:22.050004', 'step': 12084, 'epoch': 2}
{'type': 'loss', 'content': 0.031627554446458817, 'timestamp': '2025-10-02 00:32:22.066286', 'step': 12085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:22.121206', 'step': 12085, 'epoch': 2}
{'type': 'loss', 'content': 0.08381777256727219, 'timestamp': '2025-10-02 00:32:22.123801', 'step': 12086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:22.181013', 'step': 12086, 'epoch': 2}
{'type': 'loss', 'content': 0.07567860931158066, 'timestamp': '2025-10-02 00:32:22.184218', 'step': 12087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:22.243860', 'step': 12087, 'epoch': 2}
{'type': 'loss', 'content': 0.0930178239941597, 'timestamp': '2025-10-02 00:32:22.254841', 'step': 12088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:22.310219', 'step': 12088, 'epoch': 2}
{'type': 'loss', 'content': 0.11253933608531952, 'timestamp': '2025-10-02 00:32:22.312719', 'step': 12089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:22.368618', 'step': 12089, 'epoch': 2}
{'type': 'loss', 'content': 0.09975700080394745, 'timestamp': '2025-10-02 00:32:22.371700', 'step': 12090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:22.437382', 'step': 12090, 'epoch': 2}
{'type': 'loss', 'content': 0.03637809678912163, 'timestamp': '2025-10-02 00:32:22.440614', 'step': 12091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:22.495525', 'step': 12091, 'epoch': 2}
{'type': 'loss', 'content': 0.06725028902292252, 'timestamp': '2025-10-02 00:32:22.501951', 'step': 12092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:22.556637', 'step': 12092, 'epoch': 2}
{'type': 'loss', 'content': 0.11939989030361176, 'timestamp': '2025-10-02 00:32:22.559275', 'step': 12093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:22.614094', 'step': 12093, 'epoch': 2}
{'type': 'loss', 'content': 0.1442558914422989, 'timestamp': '2025-10-02 00:32:22.621892', 'step': 12094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:22.677025', 'step': 12094, 'epoch': 2}
{'type': 'loss', 'content': 0.044931259006261826, 'timestamp': '2025-10-02 00:32:22.684672', 'step': 12095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:22.740353', 'step': 12095, 'epoch': 2}
{'type': 'loss', 'content': 0.04990537837147713, 'timestamp': '2025-10-02 00:32:22.748881', 'step': 12096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:22.803304', 'step': 12096, 'epoch': 2}
{'type': 'loss', 'content': 0.05884089693427086, 'timestamp': '2025-10-02 00:32:22.813013', 'step': 12097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:22.868020', 'step': 12097, 'epoch': 2}
{'type': 'loss', 'content': 0.008804135955870152, 'timestamp': '2025-10-02 00:32:22.870973', 'step': 12098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:22.934307', 'step': 12098, 'epoch': 2}
{'type': 'loss', 'content': 0.021222131326794624, 'timestamp': '2025-10-02 00:32:22.944948', 'step': 12099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:23.001653', 'step': 12099, 'epoch': 2}
{'type': 'loss', 'content': 0.026124190539121628, 'timestamp': '2025-10-02 00:32:23.008752', 'step': 12100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:23.062987', 'step': 12100, 'epoch': 2}
{'type': 'loss', 'content': 0.04153376817703247, 'timestamp': '2025-10-02 00:32:23.065548', 'step': 12101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:23.125897', 'step': 12101, 'epoch': 2}
{'type': 'loss', 'content': 0.056846220046281815, 'timestamp': '2025-10-02 00:32:23.136085', 'step': 12102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:32:23.209209', 'step': 12102, 'epoch': 2}
{'type': 'loss', 'content': 0.02028009295463562, 'timestamp': '2025-10-02 00:32:23.221851', 'step': 12103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:23.277465', 'step': 12103, 'epoch': 2}
{'type': 'loss', 'content': 0.09211746603250504, 'timestamp': '2025-10-02 00:32:23.284104', 'step': 12104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:23.346095', 'step': 12104, 'epoch': 2}
{'type': 'loss', 'content': 0.020601719617843628, 'timestamp': '2025-10-02 00:32:23.357641', 'step': 12105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:23.413534', 'step': 12105, 'epoch': 2}
{'type': 'loss', 'content': 0.02668900042772293, 'timestamp': '2025-10-02 00:32:23.419702', 'step': 12106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:23.475995', 'step': 12106, 'epoch': 2}
{'type': 'loss', 'content': 0.08635612577199936, 'timestamp': '2025-10-02 00:32:23.485386', 'step': 12107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:23.541211', 'step': 12107, 'epoch': 2}
{'type': 'loss', 'content': 0.049612466245889664, 'timestamp': '2025-10-02 00:32:23.547540', 'step': 12108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:23.606002', 'step': 12108, 'epoch': 2}
{'type': 'loss', 'content': 0.019971134141087532, 'timestamp': '2025-10-02 00:32:23.609591', 'step': 12109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:23.664262', 'step': 12109, 'epoch': 2}
{'type': 'loss', 'content': 0.09121747314929962, 'timestamp': '2025-10-02 00:32:23.667111', 'step': 12110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:23.723915', 'step': 12110, 'epoch': 2}
{'type': 'loss', 'content': 0.034164730459451675, 'timestamp': '2025-10-02 00:32:23.729594', 'step': 12111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:23.791630', 'step': 12111, 'epoch': 2}
{'type': 'loss', 'content': 0.07563690096139908, 'timestamp': '2025-10-02 00:32:23.801780', 'step': 12112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:23.857771', 'step': 12112, 'epoch': 2}
{'type': 'loss', 'content': 0.02615678496658802, 'timestamp': '2025-10-02 00:32:23.863699', 'step': 12113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:23.935802', 'step': 12113, 'epoch': 2}
{'type': 'loss', 'content': 0.072225421667099, 'timestamp': '2025-10-02 00:32:23.942020', 'step': 12114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:23.998358', 'step': 12114, 'epoch': 2}
{'type': 'loss', 'content': 0.17868353426456451, 'timestamp': '2025-10-02 00:32:24.001928', 'step': 12115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:24.066115', 'step': 12115, 'epoch': 2}
{'type': 'loss', 'content': 0.029525164514780045, 'timestamp': '2025-10-02 00:32:24.077067', 'step': 12116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:24.140635', 'step': 12116, 'epoch': 2}
{'type': 'loss', 'content': 0.03741248697042465, 'timestamp': '2025-10-02 00:32:24.151607', 'step': 12117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:24.209131', 'step': 12117, 'epoch': 2}
{'type': 'loss', 'content': 0.05425934121012688, 'timestamp': '2025-10-02 00:32:24.211710', 'step': 12118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:24.267735', 'step': 12118, 'epoch': 2}
{'type': 'loss', 'content': 0.11804252117872238, 'timestamp': '2025-10-02 00:32:24.274113', 'step': 12119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:24.335301', 'step': 12119, 'epoch': 2}
{'type': 'loss', 'content': 0.019631585106253624, 'timestamp': '2025-10-02 00:32:24.342750', 'step': 12120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:24.400314', 'step': 12120, 'epoch': 2}
{'type': 'loss', 'content': 0.05275796726346016, 'timestamp': '2025-10-02 00:32:24.403508', 'step': 12121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:24.457285', 'step': 12121, 'epoch': 2}
{'type': 'loss', 'content': 0.09782969951629639, 'timestamp': '2025-10-02 00:32:24.464729', 'step': 12122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:24.528878', 'step': 12122, 'epoch': 2}
{'type': 'loss', 'content': 0.09558500349521637, 'timestamp': '2025-10-02 00:32:24.531761', 'step': 12123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:24.591292', 'step': 12123, 'epoch': 2}
{'type': 'loss', 'content': 0.1664174646139145, 'timestamp': '2025-10-02 00:32:24.599100', 'step': 12124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:24.654076', 'step': 12124, 'epoch': 2}
{'type': 'loss', 'content': 0.06049894168972969, 'timestamp': '2025-10-02 00:32:24.658152', 'step': 12125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:24.724863', 'step': 12125, 'epoch': 2}
{'type': 'loss', 'content': 0.014330615289509296, 'timestamp': '2025-10-02 00:32:24.732450', 'step': 12126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:24.798421', 'step': 12126, 'epoch': 2}
{'type': 'loss', 'content': 0.03789990395307541, 'timestamp': '2025-10-02 00:32:24.807965', 'step': 12127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:24.866327', 'step': 12127, 'epoch': 2}
{'type': 'loss', 'content': 0.04102012887597084, 'timestamp': '2025-10-02 00:32:24.872647', 'step': 12128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:24.928430', 'step': 12128, 'epoch': 2}
{'type': 'loss', 'content': 0.1406107246875763, 'timestamp': '2025-10-02 00:32:24.933904', 'step': 12129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:24.998636', 'step': 12129, 'epoch': 2}
{'type': 'loss', 'content': 0.018397940322756767, 'timestamp': '2025-10-02 00:32:25.009114', 'step': 12130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:25.065873', 'step': 12130, 'epoch': 2}
{'type': 'loss', 'content': 0.1205444410443306, 'timestamp': '2025-10-02 00:32:25.069129', 'step': 12131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:25.128941', 'step': 12131, 'epoch': 2}
{'type': 'loss', 'content': 0.03704528138041496, 'timestamp': '2025-10-02 00:32:25.136819', 'step': 12132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:25.196123', 'step': 12132, 'epoch': 2}
{'type': 'loss', 'content': 0.03860040754079819, 'timestamp': '2025-10-02 00:32:25.200492', 'step': 12133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:25.281607', 'step': 12133, 'epoch': 2}
{'type': 'loss', 'content': 0.016994791105389595, 'timestamp': '2025-10-02 00:32:25.292098', 'step': 12134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:25.354364', 'step': 12134, 'epoch': 2}
{'type': 'loss', 'content': 0.11470147967338562, 'timestamp': '2025-10-02 00:32:25.358052', 'step': 12135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:25.414947', 'step': 12135, 'epoch': 2}
{'type': 'loss', 'content': 0.06686905771493912, 'timestamp': '2025-10-02 00:32:25.422094', 'step': 12136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:25.480698', 'step': 12136, 'epoch': 2}
{'type': 'loss', 'content': 0.02709949016571045, 'timestamp': '2025-10-02 00:32:25.490147', 'step': 12137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:25.547271', 'step': 12137, 'epoch': 2}
{'type': 'loss', 'content': 0.03637689724564552, 'timestamp': '2025-10-02 00:32:25.550118', 'step': 12138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:25.609296', 'step': 12138, 'epoch': 2}
{'type': 'loss', 'content': 0.14959649741649628, 'timestamp': '2025-10-02 00:32:25.612122', 'step': 12139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:25.673366', 'step': 12139, 'epoch': 2}
{'type': 'loss', 'content': 0.023220131173729897, 'timestamp': '2025-10-02 00:32:25.683667', 'step': 12140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:25.740143', 'step': 12140, 'epoch': 2}
{'type': 'loss', 'content': 0.019302714616060257, 'timestamp': '2025-10-02 00:32:25.743477', 'step': 12141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:25.800522', 'step': 12141, 'epoch': 2}
{'type': 'loss', 'content': 0.09582047909498215, 'timestamp': '2025-10-02 00:32:25.804460', 'step': 12142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:25.862257', 'step': 12142, 'epoch': 2}
{'type': 'loss', 'content': 0.07173044979572296, 'timestamp': '2025-10-02 00:32:25.871828', 'step': 12143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:25.929771', 'step': 12143, 'epoch': 2}
{'type': 'loss', 'content': 0.2054387629032135, 'timestamp': '2025-10-02 00:32:25.935788', 'step': 12144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:25.993631', 'step': 12144, 'epoch': 2}
{'type': 'loss', 'content': 0.019673576578497887, 'timestamp': '2025-10-02 00:32:26.003296', 'step': 12145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:26.060162', 'step': 12145, 'epoch': 2}
{'type': 'loss', 'content': 0.01807817816734314, 'timestamp': '2025-10-02 00:32:26.069530', 'step': 12146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:32:26.138665', 'step': 12146, 'epoch': 2}
{'type': 'loss', 'content': 0.06778654456138611, 'timestamp': '2025-10-02 00:32:26.150635', 'step': 12147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:26.206123', 'step': 12147, 'epoch': 2}
{'type': 'loss', 'content': 0.03673870489001274, 'timestamp': '2025-10-02 00:32:26.212347', 'step': 12148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:26.267015', 'step': 12148, 'epoch': 2}
{'type': 'loss', 'content': 0.030436109751462936, 'timestamp': '2025-10-02 00:32:26.270997', 'step': 12149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:26.327887', 'step': 12149, 'epoch': 2}
{'type': 'loss', 'content': 0.10350938141345978, 'timestamp': '2025-10-02 00:32:26.330496', 'step': 12150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:26.385650', 'step': 12150, 'epoch': 2}
{'type': 'loss', 'content': 0.08802328258752823, 'timestamp': '2025-10-02 00:32:26.392962', 'step': 12151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:26.448049', 'step': 12151, 'epoch': 2}
{'type': 'loss', 'content': 0.12839098274707794, 'timestamp': '2025-10-02 00:32:26.454448', 'step': 12152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:26.509089', 'step': 12152, 'epoch': 2}
{'type': 'loss', 'content': 0.08645334094762802, 'timestamp': '2025-10-02 00:32:26.519334', 'step': 12153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:26.575116', 'step': 12153, 'epoch': 2}
{'type': 'loss', 'content': 0.06174120679497719, 'timestamp': '2025-10-02 00:32:26.580929', 'step': 12154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:26.636361', 'step': 12154, 'epoch': 2}
{'type': 'loss', 'content': 0.04430960863828659, 'timestamp': '2025-10-02 00:32:26.642463', 'step': 12155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:26.702586', 'step': 12155, 'epoch': 2}
{'type': 'loss', 'content': 0.10028392821550369, 'timestamp': '2025-10-02 00:32:26.713529', 'step': 12156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:26.768064', 'step': 12156, 'epoch': 2}
{'type': 'loss', 'content': 0.05255363881587982, 'timestamp': '2025-10-02 00:32:26.778332', 'step': 12157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:26.833045', 'step': 12157, 'epoch': 2}
{'type': 'loss', 'content': 0.04986773431301117, 'timestamp': '2025-10-02 00:32:26.840488', 'step': 12158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:26.896156', 'step': 12158, 'epoch': 2}
{'type': 'loss', 'content': 0.0854795053601265, 'timestamp': '2025-10-02 00:32:26.898702', 'step': 12159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:26.953217', 'step': 12159, 'epoch': 2}
{'type': 'loss', 'content': 0.060583341866731644, 'timestamp': '2025-10-02 00:32:26.960576', 'step': 12160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:27.015695', 'step': 12160, 'epoch': 2}
{'type': 'loss', 'content': 0.09673494845628738, 'timestamp': '2025-10-02 00:32:27.025067', 'step': 12161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:27.080484', 'step': 12161, 'epoch': 2}
{'type': 'loss', 'content': 0.06295367330312729, 'timestamp': '2025-10-02 00:32:27.083226', 'step': 12162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:27.138382', 'step': 12162, 'epoch': 2}
{'type': 'loss', 'content': 0.06377732753753662, 'timestamp': '2025-10-02 00:32:27.140984', 'step': 12163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:27.196184', 'step': 12163, 'epoch': 2}
{'type': 'loss', 'content': 0.13956907391548157, 'timestamp': '2025-10-02 00:32:27.202691', 'step': 12164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:27.258580', 'step': 12164, 'epoch': 2}
{'type': 'loss', 'content': 0.03067348524928093, 'timestamp': '2025-10-02 00:32:27.268817', 'step': 12165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:27.323929', 'step': 12165, 'epoch': 2}
{'type': 'loss', 'content': 0.0876486673951149, 'timestamp': '2025-10-02 00:32:27.326473', 'step': 12166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:27.381113', 'step': 12166, 'epoch': 2}
{'type': 'loss', 'content': 0.09721115976572037, 'timestamp': '2025-10-02 00:32:27.383809', 'step': 12167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:32:27.455233', 'step': 12167, 'epoch': 2}
{'type': 'loss', 'content': 0.04290083795785904, 'timestamp': '2025-10-02 00:32:27.468646', 'step': 12168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:27.522468', 'step': 12168, 'epoch': 2}
{'type': 'loss', 'content': 0.0637473464012146, 'timestamp': '2025-10-02 00:32:27.525096', 'step': 12169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:27.581335', 'step': 12169, 'epoch': 2}
{'type': 'loss', 'content': 0.04410102590918541, 'timestamp': '2025-10-02 00:32:27.590835', 'step': 12170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:27.645799', 'step': 12170, 'epoch': 2}
{'type': 'loss', 'content': 0.12700910866260529, 'timestamp': '2025-10-02 00:32:27.648947', 'step': 12171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:27.705486', 'step': 12171, 'epoch': 2}
{'type': 'loss', 'content': 0.04374377056956291, 'timestamp': '2025-10-02 00:32:27.711801', 'step': 12172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:27.773282', 'step': 12172, 'epoch': 2}
{'type': 'loss', 'content': 0.07182798534631729, 'timestamp': '2025-10-02 00:32:27.784818', 'step': 12173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:27.840856', 'step': 12173, 'epoch': 2}
{'type': 'loss', 'content': 0.06473226845264435, 'timestamp': '2025-10-02 00:32:27.848409', 'step': 12174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:27.904048', 'step': 12174, 'epoch': 2}
{'type': 'loss', 'content': 0.07382974773645401, 'timestamp': '2025-10-02 00:32:27.906829', 'step': 12175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:27.961540', 'step': 12175, 'epoch': 2}
{'type': 'loss', 'content': 0.1125408187508583, 'timestamp': '2025-10-02 00:32:27.968218', 'step': 12176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:28.022661', 'step': 12176, 'epoch': 2}
{'type': 'loss', 'content': 0.13599078357219696, 'timestamp': '2025-10-02 00:32:28.025605', 'step': 12177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:28.080538', 'step': 12177, 'epoch': 2}
{'type': 'loss', 'content': 0.07460293173789978, 'timestamp': '2025-10-02 00:32:28.087954', 'step': 12178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:28.144137', 'step': 12178, 'epoch': 2}
{'type': 'loss', 'content': 0.15340645611286163, 'timestamp': '2025-10-02 00:32:28.146610', 'step': 12179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:28.203029', 'step': 12179, 'epoch': 2}
{'type': 'loss', 'content': 0.05016573145985603, 'timestamp': '2025-10-02 00:32:28.213385', 'step': 12180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:28.267596', 'step': 12180, 'epoch': 2}
{'type': 'loss', 'content': 0.08566134423017502, 'timestamp': '2025-10-02 00:32:28.270403', 'step': 12181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:28.325191', 'step': 12181, 'epoch': 2}
{'type': 'loss', 'content': 0.01962609589099884, 'timestamp': '2025-10-02 00:32:28.327740', 'step': 12182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:28.382882', 'step': 12182, 'epoch': 2}
{'type': 'loss', 'content': 0.12558108568191528, 'timestamp': '2025-10-02 00:32:28.385865', 'step': 12183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:28.440636', 'step': 12183, 'epoch': 2}
{'type': 'loss', 'content': 0.06884188205003738, 'timestamp': '2025-10-02 00:32:28.450736', 'step': 12184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:28.512612', 'step': 12184, 'epoch': 2}
{'type': 'loss', 'content': 0.009131744503974915, 'timestamp': '2025-10-02 00:32:28.524170', 'step': 12185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:28.579305', 'step': 12185, 'epoch': 2}
{'type': 'loss', 'content': 0.20674945414066315, 'timestamp': '2025-10-02 00:32:28.582254', 'step': 12186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:28.637322', 'step': 12186, 'epoch': 2}
{'type': 'loss', 'content': 0.05592052638530731, 'timestamp': '2025-10-02 00:32:28.646671', 'step': 12187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:28.701182', 'step': 12187, 'epoch': 2}
{'type': 'loss', 'content': 0.07574095577001572, 'timestamp': '2025-10-02 00:32:28.709535', 'step': 12188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:28.764054', 'step': 12188, 'epoch': 2}
{'type': 'loss', 'content': 0.09018277376890182, 'timestamp': '2025-10-02 00:32:28.766815', 'step': 12189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:28.822912', 'step': 12189, 'epoch': 2}
{'type': 'loss', 'content': 0.07178725302219391, 'timestamp': '2025-10-02 00:32:28.825843', 'step': 12190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:28.881204', 'step': 12190, 'epoch': 2}
{'type': 'loss', 'content': 0.1021353006362915, 'timestamp': '2025-10-02 00:32:28.885019', 'step': 12191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:28.940486', 'step': 12191, 'epoch': 2}
{'type': 'loss', 'content': 0.13520267605781555, 'timestamp': '2025-10-02 00:32:28.950773', 'step': 12192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:29.009223', 'step': 12192, 'epoch': 2}
{'type': 'loss', 'content': 0.15784378349781036, 'timestamp': '2025-10-02 00:32:29.011772', 'step': 12193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:29.066274', 'step': 12193, 'epoch': 2}
{'type': 'loss', 'content': 0.07006679475307465, 'timestamp': '2025-10-02 00:32:29.069260', 'step': 12194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:29.124194', 'step': 12194, 'epoch': 2}
{'type': 'loss', 'content': 0.053502559661865234, 'timestamp': '2025-10-02 00:32:29.127377', 'step': 12195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:29.182615', 'step': 12195, 'epoch': 2}
{'type': 'loss', 'content': 0.0638195127248764, 'timestamp': '2025-10-02 00:32:29.192727', 'step': 12196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:29.246732', 'step': 12196, 'epoch': 2}
{'type': 'loss', 'content': 0.12385988235473633, 'timestamp': '2025-10-02 00:32:29.254310', 'step': 12197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:29.310130', 'step': 12197, 'epoch': 2}
{'type': 'loss', 'content': 0.043628040701150894, 'timestamp': '2025-10-02 00:32:29.313227', 'step': 12198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:29.367851', 'step': 12198, 'epoch': 2}
{'type': 'loss', 'content': 0.054673172533512115, 'timestamp': '2025-10-02 00:32:29.377195', 'step': 12199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:29.432414', 'step': 12199, 'epoch': 2}
{'type': 'loss', 'content': 0.06122225150465965, 'timestamp': '2025-10-02 00:32:29.439712', 'step': 12200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:29.494356', 'step': 12200, 'epoch': 2}
{'type': 'loss', 'content': 0.15518233180046082, 'timestamp': '2025-10-02 00:32:29.496868', 'step': 12201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:29.551960', 'step': 12201, 'epoch': 2}
{'type': 'loss', 'content': 0.0813104659318924, 'timestamp': '2025-10-02 00:32:29.555039', 'step': 12202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:29.610475', 'step': 12202, 'epoch': 2}
{'type': 'loss', 'content': 0.19233280420303345, 'timestamp': '2025-10-02 00:32:29.613234', 'step': 12203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:29.676005', 'step': 12203, 'epoch': 2}
{'type': 'loss', 'content': 0.0354340560734272, 'timestamp': '2025-10-02 00:32:29.686923', 'step': 12204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:29.748005', 'step': 12204, 'epoch': 2}
{'type': 'loss', 'content': 0.04760444909334183, 'timestamp': '2025-10-02 00:32:29.759018', 'step': 12205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:29.819136', 'step': 12205, 'epoch': 2}
{'type': 'loss', 'content': 0.06906935572624207, 'timestamp': '2025-10-02 00:32:29.828354', 'step': 12206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:29.889492', 'step': 12206, 'epoch': 2}
{'type': 'loss', 'content': 0.05119597539305687, 'timestamp': '2025-10-02 00:32:29.892434', 'step': 12207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:29.948028', 'step': 12207, 'epoch': 2}
{'type': 'loss', 'content': 0.0910489559173584, 'timestamp': '2025-10-02 00:32:29.954782', 'step': 12208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:30.013072', 'step': 12208, 'epoch': 2}
{'type': 'loss', 'content': 0.07371669262647629, 'timestamp': '2025-10-02 00:32:30.022774', 'step': 12209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:30.077650', 'step': 12209, 'epoch': 2}
{'type': 'loss', 'content': 0.11642012745141983, 'timestamp': '2025-10-02 00:32:30.080383', 'step': 12210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:30.135340', 'step': 12210, 'epoch': 2}
{'type': 'loss', 'content': 0.035226237028837204, 'timestamp': '2025-10-02 00:32:30.138367', 'step': 12211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:30.194556', 'step': 12211, 'epoch': 2}
{'type': 'loss', 'content': 0.023195909336209297, 'timestamp': '2025-10-02 00:32:30.200847', 'step': 12212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:30.257658', 'step': 12212, 'epoch': 2}
{'type': 'loss', 'content': 0.06190848350524902, 'timestamp': '2025-10-02 00:32:30.260408', 'step': 12213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:30.315145', 'step': 12213, 'epoch': 2}
{'type': 'loss', 'content': 0.04128254950046539, 'timestamp': '2025-10-02 00:32:30.317881', 'step': 12214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:30.376528', 'step': 12214, 'epoch': 2}
{'type': 'loss', 'content': 0.08230412006378174, 'timestamp': '2025-10-02 00:32:30.380826', 'step': 12215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:30.437338', 'step': 12215, 'epoch': 2}
{'type': 'loss', 'content': 0.09186050295829773, 'timestamp': '2025-10-02 00:32:30.444055', 'step': 12216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:30.504866', 'step': 12216, 'epoch': 2}
{'type': 'loss', 'content': 0.06083257123827934, 'timestamp': '2025-10-02 00:32:30.516389', 'step': 12217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:30.570565', 'step': 12217, 'epoch': 2}
{'type': 'loss', 'content': 0.08136153221130371, 'timestamp': '2025-10-02 00:32:30.576616', 'step': 12218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:30.636792', 'step': 12218, 'epoch': 2}
{'type': 'loss', 'content': 0.09076733142137527, 'timestamp': '2025-10-02 00:32:30.640140', 'step': 12219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:30.695125', 'step': 12219, 'epoch': 2}
{'type': 'loss', 'content': 0.1332646757364273, 'timestamp': '2025-10-02 00:32:30.701347', 'step': 12220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:30.757883', 'step': 12220, 'epoch': 2}
{'type': 'loss', 'content': 0.10555465519428253, 'timestamp': '2025-10-02 00:32:30.760611', 'step': 12221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:30.816579', 'step': 12221, 'epoch': 2}
{'type': 'loss', 'content': 0.01732575334608555, 'timestamp': '2025-10-02 00:32:30.824176', 'step': 12222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:30.878764', 'step': 12222, 'epoch': 2}
{'type': 'loss', 'content': 0.06705667823553085, 'timestamp': '2025-10-02 00:32:30.881382', 'step': 12223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:30.938507', 'step': 12223, 'epoch': 2}
{'type': 'loss', 'content': 0.020676666870713234, 'timestamp': '2025-10-02 00:32:30.945055', 'step': 12224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:31.000630', 'step': 12224, 'epoch': 2}
{'type': 'loss', 'content': 0.18333300948143005, 'timestamp': '2025-10-02 00:32:31.003751', 'step': 12225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:31.066959', 'step': 12225, 'epoch': 2}
{'type': 'loss', 'content': 0.054932571947574615, 'timestamp': '2025-10-02 00:32:31.077436', 'step': 12226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:31.133050', 'step': 12226, 'epoch': 2}
{'type': 'loss', 'content': 0.062190454453229904, 'timestamp': '2025-10-02 00:32:31.135864', 'step': 12227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:31.197446', 'step': 12227, 'epoch': 2}
{'type': 'loss', 'content': 0.05163721740245819, 'timestamp': '2025-10-02 00:32:31.208706', 'step': 12228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:31.262788', 'step': 12228, 'epoch': 2}
{'type': 'loss', 'content': 0.015228121541440487, 'timestamp': '2025-10-02 00:32:31.272648', 'step': 12229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:31.328750', 'step': 12229, 'epoch': 2}
{'type': 'loss', 'content': 0.025491846725344658, 'timestamp': '2025-10-02 00:32:31.336543', 'step': 12230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:31.399888', 'step': 12230, 'epoch': 2}
{'type': 'loss', 'content': 0.0404227040708065, 'timestamp': '2025-10-02 00:32:31.410390', 'step': 12231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:32:31.466685', 'step': 12231, 'epoch': 2}
{'type': 'loss', 'content': 0.10170969367027283, 'timestamp': '2025-10-02 00:32:31.472835', 'step': 12232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:31.526465', 'step': 12232, 'epoch': 2}
{'type': 'loss', 'content': 0.07179126888513565, 'timestamp': '2025-10-02 00:32:31.529124', 'step': 12233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:31.583125', 'step': 12233, 'epoch': 2}
{'type': 'loss', 'content': 0.1090313121676445, 'timestamp': '2025-10-02 00:32:31.585684', 'step': 12234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:31.641272', 'step': 12234, 'epoch': 2}
{'type': 'loss', 'content': 0.016377899795770645, 'timestamp': '2025-10-02 00:32:31.650615', 'step': 12235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:31.705433', 'step': 12235, 'epoch': 2}
{'type': 'loss', 'content': 0.035424862056970596, 'timestamp': '2025-10-02 00:32:31.713810', 'step': 12236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:31.767408', 'step': 12236, 'epoch': 2}
{'type': 'loss', 'content': 0.06731832027435303, 'timestamp': '2025-10-02 00:32:31.769926', 'step': 12237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:31.824200', 'step': 12237, 'epoch': 2}
{'type': 'loss', 'content': 0.16518709063529968, 'timestamp': '2025-10-02 00:32:31.826845', 'step': 12238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:31.890966', 'step': 12238, 'epoch': 2}
{'type': 'loss', 'content': 0.0034462660551071167, 'timestamp': '2025-10-02 00:32:31.901671', 'step': 12239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:31.956021', 'step': 12239, 'epoch': 2}
{'type': 'loss', 'content': 0.09006533771753311, 'timestamp': '2025-10-02 00:32:31.962367', 'step': 12240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:32.016637', 'step': 12240, 'epoch': 2}
{'type': 'loss', 'content': 0.07541188597679138, 'timestamp': '2025-10-02 00:32:32.019341', 'step': 12241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:32.073873', 'step': 12241, 'epoch': 2}
{'type': 'loss', 'content': 0.07052323967218399, 'timestamp': '2025-10-02 00:32:32.083261', 'step': 12242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:32.141235', 'step': 12242, 'epoch': 2}
{'type': 'loss', 'content': 0.16119426488876343, 'timestamp': '2025-10-02 00:32:32.144097', 'step': 12243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:32.198495', 'step': 12243, 'epoch': 2}
{'type': 'loss', 'content': 0.05479980632662773, 'timestamp': '2025-10-02 00:32:32.204731', 'step': 12244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:32.258258', 'step': 12244, 'epoch': 2}
{'type': 'loss', 'content': 0.17107313871383667, 'timestamp': '2025-10-02 00:32:32.261088', 'step': 12245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:32.315638', 'step': 12245, 'epoch': 2}
{'type': 'loss', 'content': 0.046019457280635834, 'timestamp': '2025-10-02 00:32:32.318262', 'step': 12246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:32.372225', 'step': 12246, 'epoch': 2}
{'type': 'loss', 'content': 0.14034731686115265, 'timestamp': '2025-10-02 00:32:32.375234', 'step': 12247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:32.429841', 'step': 12247, 'epoch': 2}
{'type': 'loss', 'content': 0.05365653708577156, 'timestamp': '2025-10-02 00:32:32.436567', 'step': 12248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:32.490037', 'step': 12248, 'epoch': 2}
{'type': 'loss', 'content': 0.09450940042734146, 'timestamp': '2025-10-02 00:32:32.492362', 'step': 12249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:32.546576', 'step': 12249, 'epoch': 2}
{'type': 'loss', 'content': 0.12637419998645782, 'timestamp': '2025-10-02 00:32:32.549371', 'step': 12250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:32.604284', 'step': 12250, 'epoch': 2}
{'type': 'loss', 'content': 0.08170162886381149, 'timestamp': '2025-10-02 00:32:32.607331', 'step': 12251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:32.663018', 'step': 12251, 'epoch': 2}
{'type': 'loss', 'content': 0.07399311661720276, 'timestamp': '2025-10-02 00:32:32.669819', 'step': 12252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:32.730800', 'step': 12252, 'epoch': 2}
{'type': 'loss', 'content': 0.09886640310287476, 'timestamp': '2025-10-02 00:32:32.737057', 'step': 12253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:32.809257', 'step': 12253, 'epoch': 2}
{'type': 'loss', 'content': 0.12458176910877228, 'timestamp': '2025-10-02 00:32:32.814054', 'step': 12254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:32:32.909788', 'step': 12254, 'epoch': 2}
{'type': 'loss', 'content': 0.0025985927786678076, 'timestamp': '2025-10-02 00:32:32.923612', 'step': 12255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:32.985204', 'step': 12255, 'epoch': 2}
{'type': 'loss', 'content': 0.07460293918848038, 'timestamp': '2025-10-02 00:32:33.006394', 'step': 12256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:33.095315', 'step': 12256, 'epoch': 2}
{'type': 'loss', 'content': 0.06006985157728195, 'timestamp': '2025-10-02 00:32:33.103117', 'step': 12257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:33.185018', 'step': 12257, 'epoch': 2}
{'type': 'loss', 'content': 0.04688574746251106, 'timestamp': '2025-10-02 00:32:33.193106', 'step': 12258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:32:33.277981', 'step': 12258, 'epoch': 2}
{'type': 'loss', 'content': 0.08786800503730774, 'timestamp': '2025-10-02 00:32:33.289975', 'step': 12259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:33.350987', 'step': 12259, 'epoch': 2}
{'type': 'loss', 'content': 0.09253257513046265, 'timestamp': '2025-10-02 00:32:33.358611', 'step': 12260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:33.425659', 'step': 12260, 'epoch': 2}
{'type': 'loss', 'content': 0.10345029830932617, 'timestamp': '2025-10-02 00:32:33.433551', 'step': 12261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:33.511593', 'step': 12261, 'epoch': 2}
{'type': 'loss', 'content': 0.026808714494109154, 'timestamp': '2025-10-02 00:32:33.522776', 'step': 12262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:33.592738', 'step': 12262, 'epoch': 2}
{'type': 'loss', 'content': 0.09977342933416367, 'timestamp': '2025-10-02 00:32:33.596621', 'step': 12263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:33.673880', 'step': 12263, 'epoch': 2}
{'type': 'loss', 'content': 0.07201565057039261, 'timestamp': '2025-10-02 00:32:33.687847', 'step': 12264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:33.751070', 'step': 12264, 'epoch': 2}
{'type': 'loss', 'content': 0.057951584458351135, 'timestamp': '2025-10-02 00:32:33.755963', 'step': 12265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:33.826595', 'step': 12265, 'epoch': 2}
{'type': 'loss', 'content': 0.017270060256123543, 'timestamp': '2025-10-02 00:32:33.830179', 'step': 12266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:33.893222', 'step': 12266, 'epoch': 2}
{'type': 'loss', 'content': 0.07163898646831512, 'timestamp': '2025-10-02 00:32:33.902762', 'step': 12267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:33.978737', 'step': 12267, 'epoch': 2}
{'type': 'loss', 'content': 0.03344621881842613, 'timestamp': '2025-10-02 00:32:33.992153', 'step': 12268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:34.057396', 'step': 12268, 'epoch': 2}
{'type': 'loss', 'content': 0.05811888352036476, 'timestamp': '2025-10-02 00:32:34.068095', 'step': 12269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:34.133809', 'step': 12269, 'epoch': 2}
{'type': 'loss', 'content': 0.052741046994924545, 'timestamp': '2025-10-02 00:32:34.143203', 'step': 12270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:34.217344', 'step': 12270, 'epoch': 2}
{'type': 'loss', 'content': 0.02226227894425392, 'timestamp': '2025-10-02 00:32:34.226951', 'step': 12271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:34.300626', 'step': 12271, 'epoch': 2}
{'type': 'loss', 'content': 0.13003547489643097, 'timestamp': '2025-10-02 00:32:34.308861', 'step': 12272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:32:34.393029', 'step': 12272, 'epoch': 2}
{'type': 'loss', 'content': 0.020792899653315544, 'timestamp': '2025-10-02 00:32:34.406015', 'step': 12273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:34.480224', 'step': 12273, 'epoch': 2}
{'type': 'loss', 'content': 0.025931479409337044, 'timestamp': '2025-10-02 00:32:34.490773', 'step': 12274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:34.565136', 'step': 12274, 'epoch': 2}
{'type': 'loss', 'content': 0.03624141588807106, 'timestamp': '2025-10-02 00:32:34.574702', 'step': 12275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:34.656820', 'step': 12275, 'epoch': 2}
{'type': 'loss', 'content': 0.06996246427297592, 'timestamp': '2025-10-02 00:32:34.672844', 'step': 12276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:32:34.757232', 'step': 12276, 'epoch': 2}
{'type': 'loss', 'content': 0.04259086400270462, 'timestamp': '2025-10-02 00:32:34.770785', 'step': 12277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:34.830030', 'step': 12277, 'epoch': 2}
{'type': 'loss', 'content': 0.09386385232210159, 'timestamp': '2025-10-02 00:32:34.833013', 'step': 12278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:34.904150', 'step': 12278, 'epoch': 2}
{'type': 'loss', 'content': 0.017626125365495682, 'timestamp': '2025-10-02 00:32:34.914338', 'step': 12279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:34.991017', 'step': 12279, 'epoch': 2}
{'type': 'loss', 'content': 0.13161179423332214, 'timestamp': '2025-10-02 00:32:34.999018', 'step': 12280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:35.067931', 'step': 12280, 'epoch': 2}
{'type': 'loss', 'content': 0.04926840215921402, 'timestamp': '2025-10-02 00:32:35.079287', 'step': 12281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:32:35.176616', 'step': 12281, 'epoch': 2}
{'type': 'loss', 'content': 0.014515005052089691, 'timestamp': '2025-10-02 00:32:35.189093', 'step': 12282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:35.256600', 'step': 12282, 'epoch': 2}
{'type': 'loss', 'content': 0.041381098330020905, 'timestamp': '2025-10-02 00:32:35.266815', 'step': 12283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:35.342737', 'step': 12283, 'epoch': 2}
{'type': 'loss', 'content': 0.07749680429697037, 'timestamp': '2025-10-02 00:32:35.349715', 'step': 12284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:35.414057', 'step': 12284, 'epoch': 2}
{'type': 'loss', 'content': 0.20336037874221802, 'timestamp': '2025-10-02 00:32:35.423885', 'step': 12285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:35.495274', 'step': 12285, 'epoch': 2}
{'type': 'loss', 'content': 0.09569445252418518, 'timestamp': '2025-10-02 00:32:35.498267', 'step': 12286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:35.560231', 'step': 12286, 'epoch': 2}
{'type': 'loss', 'content': 0.1415734589099884, 'timestamp': '2025-10-02 00:32:35.571471', 'step': 12287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:35.631703', 'step': 12287, 'epoch': 2}
{'type': 'loss', 'content': 0.05487234517931938, 'timestamp': '2025-10-02 00:32:35.640011', 'step': 12288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:35.701037', 'step': 12288, 'epoch': 2}
{'type': 'loss', 'content': 0.0745069831609726, 'timestamp': '2025-10-02 00:32:35.705434', 'step': 12289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:35.778486', 'step': 12289, 'epoch': 2}
{'type': 'loss', 'content': 0.045200616121292114, 'timestamp': '2025-10-02 00:32:35.788450', 'step': 12290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:35.863498', 'step': 12290, 'epoch': 2}
{'type': 'loss', 'content': 0.08641218394041061, 'timestamp': '2025-10-02 00:32:35.867022', 'step': 12291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:35.924369', 'step': 12291, 'epoch': 2}
{'type': 'loss', 'content': 0.10565071552991867, 'timestamp': '2025-10-02 00:32:35.940036', 'step': 12292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:36.010341', 'step': 12292, 'epoch': 2}
{'type': 'loss', 'content': 0.18630115687847137, 'timestamp': '2025-10-02 00:32:36.013668', 'step': 12293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:32:36.107417', 'step': 12293, 'epoch': 2}
{'type': 'loss', 'content': 0.04661620035767555, 'timestamp': '2025-10-02 00:32:36.120896', 'step': 12294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:36.188653', 'step': 12294, 'epoch': 2}
{'type': 'loss', 'content': 0.06842511892318726, 'timestamp': '2025-10-02 00:32:36.198610', 'step': 12295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:36.271247', 'step': 12295, 'epoch': 2}
{'type': 'loss', 'content': 0.07192429155111313, 'timestamp': '2025-10-02 00:32:36.279901', 'step': 12296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:36.352716', 'step': 12296, 'epoch': 2}
{'type': 'loss', 'content': 0.08510051667690277, 'timestamp': '2025-10-02 00:32:36.362879', 'step': 12297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:36.442114', 'step': 12297, 'epoch': 2}
{'type': 'loss', 'content': 0.03725520148873329, 'timestamp': '2025-10-02 00:32:36.452788', 'step': 12298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:36.509897', 'step': 12298, 'epoch': 2}
{'type': 'loss', 'content': 0.056722600013017654, 'timestamp': '2025-10-02 00:32:36.514224', 'step': 12299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:36.571479', 'step': 12299, 'epoch': 2}
{'type': 'loss', 'content': 0.037632621824741364, 'timestamp': '2025-10-02 00:32:36.578163', 'step': 12300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:36.648412', 'step': 12300, 'epoch': 2}
{'type': 'loss', 'content': 0.027725722640752792, 'timestamp': '2025-10-02 00:32:36.658169', 'step': 12301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:36.731927', 'step': 12301, 'epoch': 2}
{'type': 'loss', 'content': 0.024275733157992363, 'timestamp': '2025-10-02 00:32:36.743939', 'step': 12302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:36.808286', 'step': 12302, 'epoch': 2}
{'type': 'loss', 'content': 0.02335944026708603, 'timestamp': '2025-10-02 00:32:36.818369', 'step': 12303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:36.891178', 'step': 12303, 'epoch': 2}
{'type': 'loss', 'content': 0.09395938366651535, 'timestamp': '2025-10-02 00:32:36.900765', 'step': 12304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:36.978040', 'step': 12304, 'epoch': 2}
{'type': 'loss', 'content': 0.048780690878629684, 'timestamp': '2025-10-02 00:32:36.988337', 'step': 12305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:37.065186', 'step': 12305, 'epoch': 2}
{'type': 'loss', 'content': 0.06301116198301315, 'timestamp': '2025-10-02 00:32:37.076530', 'step': 12306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:32:37.142590', 'step': 12306, 'epoch': 2}
{'type': 'loss', 'content': 0.04505711793899536, 'timestamp': '2025-10-02 00:32:37.156235', 'step': 12307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:37.213213', 'step': 12307, 'epoch': 2}
{'type': 'loss', 'content': 0.10008533298969269, 'timestamp': '2025-10-02 00:32:37.222251', 'step': 12308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:37.285357', 'step': 12308, 'epoch': 2}
{'type': 'loss', 'content': 0.14325635135173798, 'timestamp': '2025-10-02 00:32:37.288363', 'step': 12309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:37.349952', 'step': 12309, 'epoch': 2}
{'type': 'loss', 'content': 0.061861999332904816, 'timestamp': '2025-10-02 00:32:37.354468', 'step': 12310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:37.412187', 'step': 12310, 'epoch': 2}
{'type': 'loss', 'content': 0.08153238147497177, 'timestamp': '2025-10-02 00:32:37.421770', 'step': 12311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:37.478796', 'step': 12311, 'epoch': 2}
{'type': 'loss', 'content': 0.18767155706882477, 'timestamp': '2025-10-02 00:32:37.485698', 'step': 12312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:37.563006', 'step': 12312, 'epoch': 2}
{'type': 'loss', 'content': 0.07978639751672745, 'timestamp': '2025-10-02 00:32:37.566557', 'step': 12313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:37.621283', 'step': 12313, 'epoch': 2}
{'type': 'loss', 'content': 0.10311959683895111, 'timestamp': '2025-10-02 00:32:37.629233', 'step': 12314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:37.703660', 'step': 12314, 'epoch': 2}
{'type': 'loss', 'content': 0.11322274804115295, 'timestamp': '2025-10-02 00:32:37.706985', 'step': 12315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:37.770041', 'step': 12315, 'epoch': 2}
{'type': 'loss', 'content': 0.004364443942904472, 'timestamp': '2025-10-02 00:32:37.777600', 'step': 12316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:37.839093', 'step': 12316, 'epoch': 2}
{'type': 'loss', 'content': 0.05153360217809677, 'timestamp': '2025-10-02 00:32:37.849692', 'step': 12317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:37.906915', 'step': 12317, 'epoch': 2}
{'type': 'loss', 'content': 0.153848335146904, 'timestamp': '2025-10-02 00:32:37.910336', 'step': 12318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:32:37.980752', 'step': 12318, 'epoch': 2}
{'type': 'loss', 'content': 0.025518983602523804, 'timestamp': '2025-10-02 00:32:37.993711', 'step': 12319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:32:38.068092', 'step': 12319, 'epoch': 2}
{'type': 'loss', 'content': 0.04310827702283859, 'timestamp': '2025-10-02 00:32:38.080870', 'step': 12320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:38.165920', 'step': 12320, 'epoch': 2}
{'type': 'loss', 'content': 0.16062158346176147, 'timestamp': '2025-10-02 00:32:38.176884', 'step': 12321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:38.232060', 'step': 12321, 'epoch': 2}
{'type': 'loss', 'content': 0.04702216386795044, 'timestamp': '2025-10-02 00:32:38.239900', 'step': 12322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:38.304485', 'step': 12322, 'epoch': 2}
{'type': 'loss', 'content': 0.023048199713230133, 'timestamp': '2025-10-02 00:32:38.309658', 'step': 12323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:38.370943', 'step': 12323, 'epoch': 2}
{'type': 'loss', 'content': 0.06226801872253418, 'timestamp': '2025-10-02 00:32:38.386391', 'step': 12324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:32:38.462903', 'step': 12324, 'epoch': 2}
{'type': 'loss', 'content': 0.012286579236388206, 'timestamp': '2025-10-02 00:32:38.476333', 'step': 12325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:38.535898', 'step': 12325, 'epoch': 2}
{'type': 'loss', 'content': 0.04885955899953842, 'timestamp': '2025-10-02 00:32:38.540017', 'step': 12326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:32:38.618410', 'step': 12326, 'epoch': 2}
{'type': 'loss', 'content': 0.0070940423756837845, 'timestamp': '2025-10-02 00:32:38.630887', 'step': 12327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:38.693196', 'step': 12327, 'epoch': 2}
{'type': 'loss', 'content': 0.06283308565616608, 'timestamp': '2025-10-02 00:32:38.702443', 'step': 12328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:38.758625', 'step': 12328, 'epoch': 2}
{'type': 'loss', 'content': 0.08670580387115479, 'timestamp': '2025-10-02 00:32:38.762515', 'step': 12329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:38.827813', 'step': 12329, 'epoch': 2}
{'type': 'loss', 'content': 0.08576937019824982, 'timestamp': '2025-10-02 00:32:38.831201', 'step': 12330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:38.897369', 'step': 12330, 'epoch': 2}
{'type': 'loss', 'content': 0.06735709309577942, 'timestamp': '2025-10-02 00:32:38.907574', 'step': 12331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:38.977291', 'step': 12331, 'epoch': 2}
{'type': 'loss', 'content': 0.012064550071954727, 'timestamp': '2025-10-02 00:32:38.988562', 'step': 12332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:39.054601', 'step': 12332, 'epoch': 2}
{'type': 'loss', 'content': 0.06248470023274422, 'timestamp': '2025-10-02 00:32:39.059144', 'step': 12333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:39.116234', 'step': 12333, 'epoch': 2}
{'type': 'loss', 'content': 0.09259290248155594, 'timestamp': '2025-10-02 00:32:39.126323', 'step': 12334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:39.197963', 'step': 12334, 'epoch': 2}
{'type': 'loss', 'content': 0.020641431212425232, 'timestamp': '2025-10-02 00:32:39.207813', 'step': 12335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:39.282637', 'step': 12335, 'epoch': 2}
{'type': 'loss', 'content': 0.02996213547885418, 'timestamp': '2025-10-02 00:32:39.293928', 'step': 12336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:39.358431', 'step': 12336, 'epoch': 2}
{'type': 'loss', 'content': 0.03654563054442406, 'timestamp': '2025-10-02 00:32:39.368211', 'step': 12337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:39.439839', 'step': 12337, 'epoch': 2}
{'type': 'loss', 'content': 0.2730574309825897, 'timestamp': '2025-10-02 00:32:39.446057', 'step': 12338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:39.508311', 'step': 12338, 'epoch': 2}
{'type': 'loss', 'content': 0.011820787563920021, 'timestamp': '2025-10-02 00:32:39.517711', 'step': 12339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:39.579648', 'step': 12339, 'epoch': 2}
{'type': 'loss', 'content': 0.09265345335006714, 'timestamp': '2025-10-02 00:32:39.595599', 'step': 12340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:39.654590', 'step': 12340, 'epoch': 2}
{'type': 'loss', 'content': 0.09113059192895889, 'timestamp': '2025-10-02 00:32:39.660928', 'step': 12341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:39.736102', 'step': 12341, 'epoch': 2}
{'type': 'loss', 'content': 0.08073233813047409, 'timestamp': '2025-10-02 00:32:39.740656', 'step': 12342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:39.805255', 'step': 12342, 'epoch': 2}
{'type': 'loss', 'content': 0.04622334614396095, 'timestamp': '2025-10-02 00:32:39.813091', 'step': 12343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:39.876353', 'step': 12343, 'epoch': 2}
{'type': 'loss', 'content': 0.0825352892279625, 'timestamp': '2025-10-02 00:32:39.886155', 'step': 12344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:39.955073', 'step': 12344, 'epoch': 2}
{'type': 'loss', 'content': 0.13684873282909393, 'timestamp': '2025-10-02 00:32:39.960261', 'step': 12345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:40.032170', 'step': 12345, 'epoch': 2}
{'type': 'loss', 'content': 0.041070010513067245, 'timestamp': '2025-10-02 00:32:40.037370', 'step': 12346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:40.100671', 'step': 12346, 'epoch': 2}
{'type': 'loss', 'content': 0.04217855632305145, 'timestamp': '2025-10-02 00:32:40.108394', 'step': 12347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:40.165722', 'step': 12347, 'epoch': 2}
{'type': 'loss', 'content': 0.04902748391032219, 'timestamp': '2025-10-02 00:32:40.177025', 'step': 12348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:40.241745', 'step': 12348, 'epoch': 2}
{'type': 'loss', 'content': 0.10430526733398438, 'timestamp': '2025-10-02 00:32:40.253111', 'step': 12349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:40.309239', 'step': 12349, 'epoch': 2}
{'type': 'loss', 'content': 0.02107621729373932, 'timestamp': '2025-10-02 00:32:40.315461', 'step': 12350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:40.372198', 'step': 12350, 'epoch': 2}
{'type': 'loss', 'content': 0.16616900265216827, 'timestamp': '2025-10-02 00:32:40.374948', 'step': 12351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:40.430689', 'step': 12351, 'epoch': 2}
{'type': 'loss', 'content': 0.053411077708005905, 'timestamp': '2025-10-02 00:32:40.437687', 'step': 12352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:40.491397', 'step': 12352, 'epoch': 2}
{'type': 'loss', 'content': 0.12860605120658875, 'timestamp': '2025-10-02 00:32:40.494143', 'step': 12353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:40.548824', 'step': 12353, 'epoch': 2}
{'type': 'loss', 'content': 0.05271109938621521, 'timestamp': '2025-10-02 00:32:40.552029', 'step': 12354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:40.608495', 'step': 12354, 'epoch': 2}
{'type': 'loss', 'content': 0.05691775307059288, 'timestamp': '2025-10-02 00:32:40.611951', 'step': 12355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:40.666298', 'step': 12355, 'epoch': 2}
{'type': 'loss', 'content': 0.11781474202871323, 'timestamp': '2025-10-02 00:32:40.672262', 'step': 12356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:40.728629', 'step': 12356, 'epoch': 2}
{'type': 'loss', 'content': 0.052071377635002136, 'timestamp': '2025-10-02 00:32:40.732288', 'step': 12357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:40.788821', 'step': 12357, 'epoch': 2}
{'type': 'loss', 'content': 0.06360869854688644, 'timestamp': '2025-10-02 00:32:40.794137', 'step': 12358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:40.858292', 'step': 12358, 'epoch': 2}
{'type': 'loss', 'content': 0.027723683044314384, 'timestamp': '2025-10-02 00:32:40.868742', 'step': 12359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:40.923998', 'step': 12359, 'epoch': 2}
{'type': 'loss', 'content': 0.09498799592256546, 'timestamp': '2025-10-02 00:32:40.930062', 'step': 12360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:40.983414', 'step': 12360, 'epoch': 2}
{'type': 'loss', 'content': 0.1742401272058487, 'timestamp': '2025-10-02 00:32:40.986788', 'step': 12361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:41.042303', 'step': 12361, 'epoch': 2}
{'type': 'loss', 'content': 0.0512579083442688, 'timestamp': '2025-10-02 00:32:41.048503', 'step': 12362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:41.104604', 'step': 12362, 'epoch': 2}
{'type': 'loss', 'content': 0.1737118363380432, 'timestamp': '2025-10-02 00:32:41.107915', 'step': 12363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:41.166543', 'step': 12363, 'epoch': 2}
{'type': 'loss', 'content': 0.0752592384815216, 'timestamp': '2025-10-02 00:32:41.181026', 'step': 12364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:41.252937', 'step': 12364, 'epoch': 2}
{'type': 'loss', 'content': 0.27205559611320496, 'timestamp': '2025-10-02 00:32:41.256288', 'step': 12365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:41.313535', 'step': 12365, 'epoch': 2}
{'type': 'loss', 'content': 0.08570053428411484, 'timestamp': '2025-10-02 00:32:41.317421', 'step': 12366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:41.393136', 'step': 12366, 'epoch': 2}
{'type': 'loss', 'content': 0.05915508419275284, 'timestamp': '2025-10-02 00:32:41.396853', 'step': 12367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:41.467667', 'step': 12367, 'epoch': 2}
{'type': 'loss', 'content': 0.03006414882838726, 'timestamp': '2025-10-02 00:32:41.478009', 'step': 12368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:41.539978', 'step': 12368, 'epoch': 2}
{'type': 'loss', 'content': 0.09018492698669434, 'timestamp': '2025-10-02 00:32:41.547865', 'step': 12369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:41.604327', 'step': 12369, 'epoch': 2}
{'type': 'loss', 'content': 0.05894918367266655, 'timestamp': '2025-10-02 00:32:41.615594', 'step': 12370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:41.672881', 'step': 12370, 'epoch': 2}
{'type': 'loss', 'content': 0.23691371083259583, 'timestamp': '2025-10-02 00:32:41.675902', 'step': 12371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:41.738349', 'step': 12371, 'epoch': 2}
{'type': 'loss', 'content': 0.11174637824296951, 'timestamp': '2025-10-02 00:32:41.744642', 'step': 12372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:41.806914', 'step': 12372, 'epoch': 2}
{'type': 'loss', 'content': 0.04487181827425957, 'timestamp': '2025-10-02 00:32:41.813138', 'step': 12373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:41.870319', 'step': 12373, 'epoch': 2}
{'type': 'loss', 'content': 0.19925563037395477, 'timestamp': '2025-10-02 00:32:41.872934', 'step': 12374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:41.934524', 'step': 12374, 'epoch': 2}
{'type': 'loss', 'content': 0.17092400789260864, 'timestamp': '2025-10-02 00:32:41.937814', 'step': 12375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:42.001863', 'step': 12375, 'epoch': 2}
{'type': 'loss', 'content': 0.16326823830604553, 'timestamp': '2025-10-02 00:32:42.016390', 'step': 12376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:42.080858', 'step': 12376, 'epoch': 2}
{'type': 'loss', 'content': 0.048871468752622604, 'timestamp': '2025-10-02 00:32:42.091884', 'step': 12377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:42.174678', 'step': 12377, 'epoch': 2}
{'type': 'loss', 'content': 0.027501020580530167, 'timestamp': '2025-10-02 00:32:42.178121', 'step': 12378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:42.248663', 'step': 12378, 'epoch': 2}
{'type': 'loss', 'content': 0.11008568108081818, 'timestamp': '2025-10-02 00:32:42.251365', 'step': 12379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:42.307195', 'step': 12379, 'epoch': 2}
{'type': 'loss', 'content': 0.10092157870531082, 'timestamp': '2025-10-02 00:32:42.317302', 'step': 12380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:42.377328', 'step': 12380, 'epoch': 2}
{'type': 'loss', 'content': 0.03962841257452965, 'timestamp': '2025-10-02 00:32:42.383610', 'step': 12381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:42.448676', 'step': 12381, 'epoch': 2}
{'type': 'loss', 'content': 0.029617665335536003, 'timestamp': '2025-10-02 00:32:42.456415', 'step': 12382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:42.521194', 'step': 12382, 'epoch': 2}
{'type': 'loss', 'content': 0.09345678985118866, 'timestamp': '2025-10-02 00:32:42.530574', 'step': 12383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:42.596493', 'step': 12383, 'epoch': 2}
{'type': 'loss', 'content': 0.07675793021917343, 'timestamp': '2025-10-02 00:32:42.605946', 'step': 12384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:42.666266', 'step': 12384, 'epoch': 2}
{'type': 'loss', 'content': 0.11155880242586136, 'timestamp': '2025-10-02 00:32:42.669511', 'step': 12385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:42.727077', 'step': 12385, 'epoch': 2}
{'type': 'loss', 'content': 0.22745397686958313, 'timestamp': '2025-10-02 00:32:42.730330', 'step': 12386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:42.799103', 'step': 12386, 'epoch': 2}
{'type': 'loss', 'content': 0.028977151960134506, 'timestamp': '2025-10-02 00:32:42.804981', 'step': 12387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:42.862778', 'step': 12387, 'epoch': 2}
{'type': 'loss', 'content': 0.09350249916315079, 'timestamp': '2025-10-02 00:32:42.869605', 'step': 12388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:42.945749', 'step': 12388, 'epoch': 2}
{'type': 'loss', 'content': 0.04655396565794945, 'timestamp': '2025-10-02 00:32:42.949071', 'step': 12389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:43.010345', 'step': 12389, 'epoch': 2}
{'type': 'loss', 'content': 0.07126757502555847, 'timestamp': '2025-10-02 00:32:43.018476', 'step': 12390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:43.089876', 'step': 12390, 'epoch': 2}
{'type': 'loss', 'content': 0.01118030771613121, 'timestamp': '2025-10-02 00:32:43.099437', 'step': 12391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:43.174368', 'step': 12391, 'epoch': 2}
{'type': 'loss', 'content': 0.08462008833885193, 'timestamp': '2025-10-02 00:32:43.188588', 'step': 12392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:43.271646', 'step': 12392, 'epoch': 2}
{'type': 'loss', 'content': 0.06454233080148697, 'timestamp': '2025-10-02 00:32:43.281535', 'step': 12393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:43.357630', 'step': 12393, 'epoch': 2}
{'type': 'loss', 'content': 0.04590392857789993, 'timestamp': '2025-10-02 00:32:43.367851', 'step': 12394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:43.426786', 'step': 12394, 'epoch': 2}
{'type': 'loss', 'content': 0.12960247695446014, 'timestamp': '2025-10-02 00:32:43.432557', 'step': 12395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:43.495501', 'step': 12395, 'epoch': 2}
{'type': 'loss', 'content': 0.011695530265569687, 'timestamp': '2025-10-02 00:32:43.507261', 'step': 12396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:43.571304', 'step': 12396, 'epoch': 2}
{'type': 'loss', 'content': 0.1919916570186615, 'timestamp': '2025-10-02 00:32:43.584201', 'step': 12397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:43.652211', 'step': 12397, 'epoch': 2}
{'type': 'loss', 'content': 0.09213805198669434, 'timestamp': '2025-10-02 00:32:43.660452', 'step': 12398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:43.717810', 'step': 12398, 'epoch': 2}
{'type': 'loss', 'content': 0.12293229252099991, 'timestamp': '2025-10-02 00:32:43.725842', 'step': 12399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:43.804771', 'step': 12399, 'epoch': 2}
{'type': 'loss', 'content': 0.045554373413324356, 'timestamp': '2025-10-02 00:32:43.819433', 'step': 12400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:43.893799', 'step': 12400, 'epoch': 2}
{'type': 'loss', 'content': 0.0276121087372303, 'timestamp': '2025-10-02 00:32:43.903831', 'step': 12401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:43.976775', 'step': 12401, 'epoch': 2}
{'type': 'loss', 'content': 0.11160048097372055, 'timestamp': '2025-10-02 00:32:43.983785', 'step': 12402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:44.061541', 'step': 12402, 'epoch': 2}
{'type': 'loss', 'content': 0.042271167039871216, 'timestamp': '2025-10-02 00:32:44.064182', 'step': 12403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:44.134759', 'step': 12403, 'epoch': 2}
{'type': 'loss', 'content': 0.006031266879290342, 'timestamp': '2025-10-02 00:32:44.147097', 'step': 12404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:44.220642', 'step': 12404, 'epoch': 2}
{'type': 'loss', 'content': 0.06461484730243683, 'timestamp': '2025-10-02 00:32:44.230917', 'step': 12405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:44.294594', 'step': 12405, 'epoch': 2}
{'type': 'loss', 'content': 0.07337664812803268, 'timestamp': '2025-10-02 00:32:44.303025', 'step': 12406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:44.372738', 'step': 12406, 'epoch': 2}
{'type': 'loss', 'content': 0.2184673398733139, 'timestamp': '2025-10-02 00:32:44.382482', 'step': 12407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:44.449849', 'step': 12407, 'epoch': 2}
{'type': 'loss', 'content': 0.2140810638666153, 'timestamp': '2025-10-02 00:32:44.461342', 'step': 12408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:44.522476', 'step': 12408, 'epoch': 2}
{'type': 'loss', 'content': 0.06418845057487488, 'timestamp': '2025-10-02 00:32:44.531887', 'step': 12409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:44.594623', 'step': 12409, 'epoch': 2}
{'type': 'loss', 'content': 0.035428132861852646, 'timestamp': '2025-10-02 00:32:44.600958', 'step': 12410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:44.655600', 'step': 12410, 'epoch': 2}
{'type': 'loss', 'content': 0.0699663907289505, 'timestamp': '2025-10-02 00:32:44.658337', 'step': 12411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:44.712944', 'step': 12411, 'epoch': 2}
{'type': 'loss', 'content': 0.10181666165590286, 'timestamp': '2025-10-02 00:32:44.719039', 'step': 12412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:44.773827', 'step': 12412, 'epoch': 2}
{'type': 'loss', 'content': 0.05575084686279297, 'timestamp': '2025-10-02 00:32:44.776053', 'step': 12413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:44.831812', 'step': 12413, 'epoch': 2}
{'type': 'loss', 'content': 0.06865595281124115, 'timestamp': '2025-10-02 00:32:44.841305', 'step': 12414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:44.896664', 'step': 12414, 'epoch': 2}
{'type': 'loss', 'content': 0.04799283295869827, 'timestamp': '2025-10-02 00:32:44.899096', 'step': 12415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:44.954897', 'step': 12415, 'epoch': 2}
{'type': 'loss', 'content': 0.03437665477395058, 'timestamp': '2025-10-02 00:32:44.965257', 'step': 12416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:45.019622', 'step': 12416, 'epoch': 2}
{'type': 'loss', 'content': 0.08160951733589172, 'timestamp': '2025-10-02 00:32:45.027636', 'step': 12417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:45.084780', 'step': 12417, 'epoch': 2}
{'type': 'loss', 'content': 0.015348195098340511, 'timestamp': '2025-10-02 00:32:45.094317', 'step': 12418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:45.149620', 'step': 12418, 'epoch': 2}
{'type': 'loss', 'content': 0.0719812661409378, 'timestamp': '2025-10-02 00:32:45.153121', 'step': 12419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:45.209109', 'step': 12419, 'epoch': 2}
{'type': 'loss', 'content': 0.04662510007619858, 'timestamp': '2025-10-02 00:32:45.219243', 'step': 12420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:45.272907', 'step': 12420, 'epoch': 2}
{'type': 'loss', 'content': 0.08679701387882233, 'timestamp': '2025-10-02 00:32:45.275647', 'step': 12421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:45.330904', 'step': 12421, 'epoch': 2}
{'type': 'loss', 'content': 0.012276135385036469, 'timestamp': '2025-10-02 00:32:45.340269', 'step': 12422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:45.395716', 'step': 12422, 'epoch': 2}
{'type': 'loss', 'content': 0.09242264926433563, 'timestamp': '2025-10-02 00:32:45.398159', 'step': 12423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:32:45.466093', 'step': 12423, 'epoch': 2}
{'type': 'loss', 'content': 0.034026309847831726, 'timestamp': '2025-10-02 00:32:45.478875', 'step': 12424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:45.533111', 'step': 12424, 'epoch': 2}
{'type': 'loss', 'content': 0.041977886110544205, 'timestamp': '2025-10-02 00:32:45.535622', 'step': 12425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:45.589196', 'step': 12425, 'epoch': 2}
{'type': 'loss', 'content': 0.2054544985294342, 'timestamp': '2025-10-02 00:32:45.591812', 'step': 12426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:45.645689', 'step': 12426, 'epoch': 2}
{'type': 'loss', 'content': 0.06410370022058487, 'timestamp': '2025-10-02 00:32:45.649795', 'step': 12427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:45.704619', 'step': 12427, 'epoch': 2}
{'type': 'loss', 'content': 0.033067066222429276, 'timestamp': '2025-10-02 00:32:45.710552', 'step': 12428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:45.764097', 'step': 12428, 'epoch': 2}
{'type': 'loss', 'content': 0.06738448143005371, 'timestamp': '2025-10-02 00:32:45.766891', 'step': 12429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:45.825268', 'step': 12429, 'epoch': 2}
{'type': 'loss', 'content': 0.0022863566409796476, 'timestamp': '2025-10-02 00:32:45.834873', 'step': 12430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:45.890496', 'step': 12430, 'epoch': 2}
{'type': 'loss', 'content': 0.03365929052233696, 'timestamp': '2025-10-02 00:32:45.898375', 'step': 12431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:45.956689', 'step': 12431, 'epoch': 2}
{'type': 'loss', 'content': 0.08431044965982437, 'timestamp': '2025-10-02 00:32:45.967029', 'step': 12432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:46.022297', 'step': 12432, 'epoch': 2}
{'type': 'loss', 'content': 0.14005626738071442, 'timestamp': '2025-10-02 00:32:46.025742', 'step': 12433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:46.082900', 'step': 12433, 'epoch': 2}
{'type': 'loss', 'content': 0.09895045310258865, 'timestamp': '2025-10-02 00:32:46.087000', 'step': 12434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:46.144602', 'step': 12434, 'epoch': 2}
{'type': 'loss', 'content': 0.08072685450315475, 'timestamp': '2025-10-02 00:32:46.154152', 'step': 12435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:46.209397', 'step': 12435, 'epoch': 2}
{'type': 'loss', 'content': 0.08777157962322235, 'timestamp': '2025-10-02 00:32:46.215628', 'step': 12436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:46.272076', 'step': 12436, 'epoch': 2}
{'type': 'loss', 'content': 0.11188710480928421, 'timestamp': '2025-10-02 00:32:46.274446', 'step': 12437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:46.329788', 'step': 12437, 'epoch': 2}
{'type': 'loss', 'content': 0.05242157727479935, 'timestamp': '2025-10-02 00:32:46.333359', 'step': 12438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:46.387113', 'step': 12438, 'epoch': 2}
{'type': 'loss', 'content': 0.05886387079954147, 'timestamp': '2025-10-02 00:32:46.389821', 'step': 12439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:46.445333', 'step': 12439, 'epoch': 2}
{'type': 'loss', 'content': 0.03683609142899513, 'timestamp': '2025-10-02 00:32:46.451912', 'step': 12440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:46.505791', 'step': 12440, 'epoch': 2}
{'type': 'loss', 'content': 0.1114002913236618, 'timestamp': '2025-10-02 00:32:46.508478', 'step': 12441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:46.572907', 'step': 12441, 'epoch': 2}
{'type': 'loss', 'content': 0.0363074392080307, 'timestamp': '2025-10-02 00:32:46.583116', 'step': 12442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:46.640065', 'step': 12442, 'epoch': 2}
{'type': 'loss', 'content': 0.02829602360725403, 'timestamp': '2025-10-02 00:32:46.643103', 'step': 12443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:46.700415', 'step': 12443, 'epoch': 2}
{'type': 'loss', 'content': 0.13027510046958923, 'timestamp': '2025-10-02 00:32:46.706709', 'step': 12444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:46.762575', 'step': 12444, 'epoch': 2}
{'type': 'loss', 'content': 0.12502236664295197, 'timestamp': '2025-10-02 00:32:46.766219', 'step': 12445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:46.821839', 'step': 12445, 'epoch': 2}
{'type': 'loss', 'content': 0.08634345233440399, 'timestamp': '2025-10-02 00:32:46.825205', 'step': 12446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:46.883309', 'step': 12446, 'epoch': 2}
{'type': 'loss', 'content': 0.06769777834415436, 'timestamp': '2025-10-02 00:32:46.887271', 'step': 12447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:46.945444', 'step': 12447, 'epoch': 2}
{'type': 'loss', 'content': 0.04373766854405403, 'timestamp': '2025-10-02 00:32:46.951610', 'step': 12448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:47.006965', 'step': 12448, 'epoch': 2}
{'type': 'loss', 'content': 0.0902969166636467, 'timestamp': '2025-10-02 00:32:47.010028', 'step': 12449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:47.068170', 'step': 12449, 'epoch': 2}
{'type': 'loss', 'content': 0.009498164989054203, 'timestamp': '2025-10-02 00:32:47.071810', 'step': 12450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:47.127745', 'step': 12450, 'epoch': 2}
{'type': 'loss', 'content': 0.104278065264225, 'timestamp': '2025-10-02 00:32:47.131072', 'step': 12451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:47.186542', 'step': 12451, 'epoch': 2}
{'type': 'loss', 'content': 0.0867033377289772, 'timestamp': '2025-10-02 00:32:47.192607', 'step': 12452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:47.253127', 'step': 12452, 'epoch': 2}
{'type': 'loss', 'content': 0.09911685436964035, 'timestamp': '2025-10-02 00:32:47.256391', 'step': 12453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:47.313301', 'step': 12453, 'epoch': 2}
{'type': 'loss', 'content': 0.09685972332954407, 'timestamp': '2025-10-02 00:32:47.317378', 'step': 12454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:32:47.373455', 'step': 12454, 'epoch': 2}
{'type': 'loss', 'content': 0.1523246467113495, 'timestamp': '2025-10-02 00:32:47.376002', 'step': 12455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:47.433854', 'step': 12455, 'epoch': 2}
{'type': 'loss', 'content': 0.012452995404601097, 'timestamp': '2025-10-02 00:32:47.440723', 'step': 12456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:47.501055', 'step': 12456, 'epoch': 2}
{'type': 'loss', 'content': 0.012794447131454945, 'timestamp': '2025-10-02 00:32:47.512004', 'step': 12457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:47.568107', 'step': 12457, 'epoch': 2}
{'type': 'loss', 'content': 0.06070484220981598, 'timestamp': '2025-10-02 00:32:47.571502', 'step': 12458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:47.627776', 'step': 12458, 'epoch': 2}
{'type': 'loss', 'content': 0.061698492616415024, 'timestamp': '2025-10-02 00:32:47.630781', 'step': 12459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:47.691394', 'step': 12459, 'epoch': 2}
{'type': 'loss', 'content': 0.10040124505758286, 'timestamp': '2025-10-02 00:32:47.699040', 'step': 12460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:47.754121', 'step': 12460, 'epoch': 2}
{'type': 'loss', 'content': 0.0905057042837143, 'timestamp': '2025-10-02 00:32:47.756913', 'step': 12461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:47.813603', 'step': 12461, 'epoch': 2}
{'type': 'loss', 'content': 0.014719435945153236, 'timestamp': '2025-10-02 00:32:47.823169', 'step': 12462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:47.879094', 'step': 12462, 'epoch': 2}
{'type': 'loss', 'content': 0.037497378885746, 'timestamp': '2025-10-02 00:32:47.882289', 'step': 12463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:47.938661', 'step': 12463, 'epoch': 2}
{'type': 'loss', 'content': 0.17724910378456116, 'timestamp': '2025-10-02 00:32:47.945586', 'step': 12464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:48.002478', 'step': 12464, 'epoch': 2}
{'type': 'loss', 'content': 0.02395043708384037, 'timestamp': '2025-10-02 00:32:48.012752', 'step': 12465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:48.071093', 'step': 12465, 'epoch': 2}
{'type': 'loss', 'content': 0.024343108758330345, 'timestamp': '2025-10-02 00:32:48.080411', 'step': 12466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:48.135650', 'step': 12466, 'epoch': 2}
{'type': 'loss', 'content': 0.074790358543396, 'timestamp': '2025-10-02 00:32:48.138391', 'step': 12467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:32:48.209147', 'step': 12467, 'epoch': 2}
{'type': 'loss', 'content': 0.05775173753499985, 'timestamp': '2025-10-02 00:32:48.222297', 'step': 12468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:48.277569', 'step': 12468, 'epoch': 2}
{'type': 'loss', 'content': 0.08003785461187363, 'timestamp': '2025-10-02 00:32:48.280245', 'step': 12469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:48.336156', 'step': 12469, 'epoch': 2}
{'type': 'loss', 'content': 0.13424406945705414, 'timestamp': '2025-10-02 00:32:48.338730', 'step': 12470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:48.394495', 'step': 12470, 'epoch': 2}
{'type': 'loss', 'content': 0.20753376185894012, 'timestamp': '2025-10-02 00:32:48.397018', 'step': 12471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:48.451863', 'step': 12471, 'epoch': 2}
{'type': 'loss', 'content': 0.10252590477466583, 'timestamp': '2025-10-02 00:32:48.462022', 'step': 12472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:48.515696', 'step': 12472, 'epoch': 2}
{'type': 'loss', 'content': 0.10930364578962326, 'timestamp': '2025-10-02 00:32:48.518507', 'step': 12473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:48.572862', 'step': 12473, 'epoch': 2}
{'type': 'loss', 'content': 0.0755089521408081, 'timestamp': '2025-10-02 00:32:48.574957', 'step': 12474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:48.629747', 'step': 12474, 'epoch': 2}
{'type': 'loss', 'content': 0.04306936636567116, 'timestamp': '2025-10-02 00:32:48.637527', 'step': 12475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:48.693068', 'step': 12475, 'epoch': 2}
{'type': 'loss', 'content': 0.036698322743177414, 'timestamp': '2025-10-02 00:32:48.699221', 'step': 12476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:48.753223', 'step': 12476, 'epoch': 2}
{'type': 'loss', 'content': 0.06246073544025421, 'timestamp': '2025-10-02 00:32:48.755945', 'step': 12477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:48.810723', 'step': 12477, 'epoch': 2}
{'type': 'loss', 'content': 0.12409472465515137, 'timestamp': '2025-10-02 00:32:48.813213', 'step': 12478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:48.867377', 'step': 12478, 'epoch': 2}
{'type': 'loss', 'content': 0.07608094811439514, 'timestamp': '2025-10-02 00:32:48.870050', 'step': 12479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:48.924155', 'step': 12479, 'epoch': 2}
{'type': 'loss', 'content': 0.07432912290096283, 'timestamp': '2025-10-02 00:32:48.929884', 'step': 12480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:48.984879', 'step': 12480, 'epoch': 2}
{'type': 'loss', 'content': 0.10911445319652557, 'timestamp': '2025-10-02 00:32:48.987380', 'step': 12481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:49.042340', 'step': 12481, 'epoch': 2}
{'type': 'loss', 'content': 0.07317163795232773, 'timestamp': '2025-10-02 00:32:49.050230', 'step': 12482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:49.104834', 'step': 12482, 'epoch': 2}
{'type': 'loss', 'content': 0.042366985231637955, 'timestamp': '2025-10-02 00:32:49.107157', 'step': 12483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:49.161129', 'step': 12483, 'epoch': 2}
{'type': 'loss', 'content': 0.1062895730137825, 'timestamp': '2025-10-02 00:32:49.167172', 'step': 12484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:49.221271', 'step': 12484, 'epoch': 2}
{'type': 'loss', 'content': 0.01939702220261097, 'timestamp': '2025-10-02 00:32:49.231253', 'step': 12485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:32:49.293849', 'step': 12485, 'epoch': 2}
{'type': 'loss', 'content': 0.007734148297458887, 'timestamp': '2025-10-02 00:32:49.304354', 'step': 12486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:32:49.382124', 'step': 12486, 'epoch': 2}
{'type': 'loss', 'content': 0.01207538042217493, 'timestamp': '2025-10-02 00:32:49.395832', 'step': 12487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:49.450712', 'step': 12487, 'epoch': 2}
{'type': 'loss', 'content': 0.16758860647678375, 'timestamp': '2025-10-02 00:32:49.456644', 'step': 12488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:49.511070', 'step': 12488, 'epoch': 2}
{'type': 'loss', 'content': 0.11362966150045395, 'timestamp': '2025-10-02 00:32:49.513529', 'step': 12489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:49.569392', 'step': 12489, 'epoch': 2}
{'type': 'loss', 'content': 0.03804139047861099, 'timestamp': '2025-10-02 00:32:49.574925', 'step': 12490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:49.631522', 'step': 12490, 'epoch': 2}
{'type': 'loss', 'content': 0.10591349750757217, 'timestamp': '2025-10-02 00:32:49.634498', 'step': 12491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:49.689821', 'step': 12491, 'epoch': 2}
{'type': 'loss', 'content': 0.08694200962781906, 'timestamp': '2025-10-02 00:32:49.695767', 'step': 12492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:49.751085', 'step': 12492, 'epoch': 2}
{'type': 'loss', 'content': 0.059275418519973755, 'timestamp': '2025-10-02 00:32:49.754247', 'step': 12493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:49.808467', 'step': 12493, 'epoch': 2}
{'type': 'loss', 'content': 0.09579486399888992, 'timestamp': '2025-10-02 00:32:49.810947', 'step': 12494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:32:49.866344', 'step': 12494, 'epoch': 2}
{'type': 'loss', 'content': 0.1011931449174881, 'timestamp': '2025-10-02 00:32:49.869499', 'step': 12495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:49.923347', 'step': 12495, 'epoch': 2}
{'type': 'loss', 'content': 0.05559638515114784, 'timestamp': '2025-10-02 00:32:49.929172', 'step': 12496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:49.983225', 'step': 12496, 'epoch': 2}
{'type': 'loss', 'content': 0.16148188710212708, 'timestamp': '2025-10-02 00:32:49.985974', 'step': 12497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:50.040847', 'step': 12497, 'epoch': 2}
{'type': 'loss', 'content': 0.06540906429290771, 'timestamp': '2025-10-02 00:32:50.043372', 'step': 12498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:50.097944', 'step': 12498, 'epoch': 2}
{'type': 'loss', 'content': 0.0550549142062664, 'timestamp': '2025-10-02 00:32:50.104107', 'step': 12499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:32:50.158512', 'step': 12499, 'epoch': 2}
{'type': 'loss', 'content': 0.12578772008419037, 'timestamp': '2025-10-02 00:32:50.164487', 'step': 12500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 12500', 'timestamp': '2025-10-02 00:32:50.586123', 'step': 12500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:50.644201', 'step': 12500, 'epoch': 2}
{'type': 'loss', 'content': 0.040576983243227005, 'timestamp': '2025-10-02 00:32:50.646928', 'step': 12501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:32:50.703234', 'step': 12501, 'epoch': 2}
{'type': 'loss', 'content': 0.028841637074947357, 'timestamp': '2025-10-02 00:32:50.705601', 'step': 12502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:50.765640', 'step': 12502, 'epoch': 2}
{'type': 'loss', 'content': 0.05439494922757149, 'timestamp': '2025-10-02 00:32:50.768227', 'step': 12503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:50.824319', 'step': 12503, 'epoch': 2}
{'type': 'loss', 'content': 0.07570645958185196, 'timestamp': '2025-10-02 00:32:50.830643', 'step': 12504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:50.887111', 'step': 12504, 'epoch': 2}
{'type': 'loss', 'content': 0.14922676980495453, 'timestamp': '2025-10-02 00:32:50.890670', 'step': 12505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:50.952028', 'step': 12505, 'epoch': 2}
{'type': 'loss', 'content': 0.055349014699459076, 'timestamp': '2025-10-02 00:32:50.954879', 'step': 12506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:32:51.009261', 'step': 12506, 'epoch': 2}
{'type': 'loss', 'content': 0.0958295464515686, 'timestamp': '2025-10-02 00:32:51.011653', 'step': 12507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:51.071389', 'step': 12507, 'epoch': 2}
{'type': 'loss', 'content': 0.02288001775741577, 'timestamp': '2025-10-02 00:32:51.077213', 'step': 12508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:51.131303', 'step': 12508, 'epoch': 2}
{'type': 'loss', 'content': 0.05183488130569458, 'timestamp': '2025-10-02 00:32:51.139147', 'step': 12509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:32:51.193253', 'step': 12509, 'epoch': 2}
{'type': 'loss', 'content': 0.19023144245147705, 'timestamp': '2025-10-02 00:32:51.195394', 'step': 12510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:32:51.256554', 'step': 12510, 'epoch': 2}
{'type': 'loss', 'content': 0.0070322537794709206, 'timestamp': '2025-10-02 00:32:51.266705', 'step': 12511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:51.320526', 'step': 12511, 'epoch': 2}
{'type': 'loss', 'content': 0.07804436981678009, 'timestamp': '2025-10-02 00:32:51.327000', 'step': 12512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:51.381057', 'step': 12512, 'epoch': 2}
{'type': 'loss', 'content': 0.09005355089902878, 'timestamp': '2025-10-02 00:32:51.388991', 'step': 12513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:32:51.444037', 'step': 12513, 'epoch': 2}
{'type': 'loss', 'content': 0.22363856434822083, 'timestamp': '2025-10-02 00:32:51.446594', 'step': 12514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:32:51.501803', 'step': 12514, 'epoch': 2}
{'type': 'loss', 'content': 0.0354132279753685, 'timestamp': '2025-10-02 00:32:51.509390', 'step': 12515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:51.564823', 'step': 12515, 'epoch': 2}
{'type': 'loss', 'content': 0.09355569630861282, 'timestamp': '2025-10-02 00:32:51.571017', 'step': 12516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:32:51.624718', 'step': 12516, 'epoch': 2}
{'type': 'loss', 'content': 0.09675031155347824, 'timestamp': '2025-10-02 00:32:51.627739', 'step': 12517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:51.683430', 'step': 12517, 'epoch': 2}
{'type': 'loss', 'content': 0.0255175419151783, 'timestamp': '2025-10-02 00:32:51.689148', 'step': 12518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:32:51.759507', 'step': 12518, 'epoch': 2}
{'type': 'loss', 'content': 0.056444138288497925, 'timestamp': '2025-10-02 00:32:51.771978', 'step': 12519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:51.827218', 'step': 12519, 'epoch': 2}
{'type': 'loss', 'content': 0.11308037489652634, 'timestamp': '2025-10-02 00:32:51.833364', 'step': 12520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:32:51.903683', 'step': 12520, 'epoch': 2}
{'type': 'loss', 'content': 0.016106557101011276, 'timestamp': '2025-10-02 00:32:51.917239', 'step': 12521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:32:51.972421', 'step': 12521, 'epoch': 2}
{'type': 'loss', 'content': 0.012086738832294941, 'timestamp': '2025-10-02 00:32:51.974783', 'step': 12522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:32:52.030308', 'step': 12522, 'epoch': 2}
{'type': 'loss', 'content': 0.06028474494814873, 'timestamp': '2025-10-02 00:32:52.032921', 'step': 12523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:32:52.089306', 'step': 12523, 'epoch': 2}
{'type': 'loss', 'content': 0.10534153133630753, 'timestamp': '2025-10-02 00:32:52.099660', 'step': 12524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:32:52.155643', 'step': 12524, 'epoch': 2}
{'type': 'loss', 'content': 0.04142308980226517, 'timestamp': '2025-10-02 00:32:52.165595', 'step': 12525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:32:52.221303', 'step': 12525, 'epoch': 2}
{'type': 'loss', 'content': 0.05410252511501312, 'timestamp': '2025-10-02 00:32:52.223572', 'step': 12526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:32:52.277306', 'step': 12526, 'epoch': 2}
{'type': 'loss', 'content': 0.12994247674942017, 'timestamp': '2025-10-02 00:32:52.283505', 'step': 12527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:32:52.351854', 'step': 12527, 'epoch': 2}
{'type': 'loss', 'content': 0.10190755128860474, 'timestamp': '2025-10-02 00:32:52.358675', 'step': 12528, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:33:22.181600', 'step': 12528, 'epoch': 2}
{'type': 'pplx', 'content': 110.41522230447363, 'timestamp': '2025-10-02 00:33:22.185471', 'step': 12528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:22.244732', 'step': 12528, 'epoch': 2}
{'type': 'loss', 'content': 0.04739779233932495, 'timestamp': '2025-10-02 00:33:22.255749', 'step': 12529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:22.317997', 'step': 12529, 'epoch': 2}
{'type': 'loss', 'content': 0.0802415981888771, 'timestamp': '2025-10-02 00:33:22.328160', 'step': 12530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:22.385844', 'step': 12530, 'epoch': 2}
{'type': 'loss', 'content': 0.028153911232948303, 'timestamp': '2025-10-02 00:33:22.395197', 'step': 12531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:22.453900', 'step': 12531, 'epoch': 2}
{'type': 'loss', 'content': 0.10564178228378296, 'timestamp': '2025-10-02 00:33:22.461214', 'step': 12532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:22.521071', 'step': 12532, 'epoch': 2}
{'type': 'loss', 'content': 0.058573756366968155, 'timestamp': '2025-10-02 00:33:22.523270', 'step': 12533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:22.583005', 'step': 12533, 'epoch': 2}
{'type': 'loss', 'content': 0.024170367047190666, 'timestamp': '2025-10-02 00:33:22.592403', 'step': 12534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:22.655837', 'step': 12534, 'epoch': 2}
{'type': 'loss', 'content': 0.05322080850601196, 'timestamp': '2025-10-02 00:33:22.658592', 'step': 12535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:22.724207', 'step': 12535, 'epoch': 2}
{'type': 'loss', 'content': 0.1279073804616928, 'timestamp': '2025-10-02 00:33:22.730612', 'step': 12536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:22.788819', 'step': 12536, 'epoch': 2}
{'type': 'loss', 'content': 0.048457685858011246, 'timestamp': '2025-10-02 00:33:22.793452', 'step': 12537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:22.853592', 'step': 12537, 'epoch': 2}
{'type': 'loss', 'content': 0.033187687397003174, 'timestamp': '2025-10-02 00:33:22.863084', 'step': 12538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:22.920303', 'step': 12538, 'epoch': 2}
{'type': 'loss', 'content': 0.17127904295921326, 'timestamp': '2025-10-02 00:33:22.923641', 'step': 12539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:22.981233', 'step': 12539, 'epoch': 2}
{'type': 'loss', 'content': 0.06779244542121887, 'timestamp': '2025-10-02 00:33:22.988086', 'step': 12540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:23.044279', 'step': 12540, 'epoch': 2}
{'type': 'loss', 'content': 0.06485985964536667, 'timestamp': '2025-10-02 00:33:23.054566', 'step': 12541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:23.119147', 'step': 12541, 'epoch': 2}
{'type': 'loss', 'content': 0.25027206540107727, 'timestamp': '2025-10-02 00:33:23.125515', 'step': 12542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:23.195628', 'step': 12542, 'epoch': 2}
{'type': 'loss', 'content': 0.1222725659608841, 'timestamp': '2025-10-02 00:33:23.200017', 'step': 12543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:23.260985', 'step': 12543, 'epoch': 2}
{'type': 'loss', 'content': 0.04182710498571396, 'timestamp': '2025-10-02 00:33:23.270466', 'step': 12544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:23.330273', 'step': 12544, 'epoch': 2}
{'type': 'loss', 'content': 0.0371985100209713, 'timestamp': '2025-10-02 00:33:23.341233', 'step': 12545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:23.395615', 'step': 12545, 'epoch': 2}
{'type': 'loss', 'content': 0.07742815464735031, 'timestamp': '2025-10-02 00:33:23.403382', 'step': 12546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:23.459207', 'step': 12546, 'epoch': 2}
{'type': 'loss', 'content': 0.051330626010894775, 'timestamp': '2025-10-02 00:33:23.468577', 'step': 12547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:23.522988', 'step': 12547, 'epoch': 2}
{'type': 'loss', 'content': 0.1820230633020401, 'timestamp': '2025-10-02 00:33:23.528632', 'step': 12548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:23.582605', 'step': 12548, 'epoch': 2}
{'type': 'loss', 'content': 0.049836695194244385, 'timestamp': '2025-10-02 00:33:23.585039', 'step': 12549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:23.639737', 'step': 12549, 'epoch': 2}
{'type': 'loss', 'content': 0.004255411680787802, 'timestamp': '2025-10-02 00:33:23.646069', 'step': 12550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:23.700700', 'step': 12550, 'epoch': 2}
{'type': 'loss', 'content': 0.08397062867879868, 'timestamp': '2025-10-02 00:33:23.703563', 'step': 12551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:23.764348', 'step': 12551, 'epoch': 2}
{'type': 'loss', 'content': 0.055934593081474304, 'timestamp': '2025-10-02 00:33:23.775385', 'step': 12552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:23.837628', 'step': 12552, 'epoch': 2}
{'type': 'loss', 'content': 0.12194433063268661, 'timestamp': '2025-10-02 00:33:23.842231', 'step': 12553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:23.902104', 'step': 12553, 'epoch': 2}
{'type': 'loss', 'content': 0.1243927851319313, 'timestamp': '2025-10-02 00:33:23.907406', 'step': 12554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:23.971005', 'step': 12554, 'epoch': 2}
{'type': 'loss', 'content': 0.045367468148469925, 'timestamp': '2025-10-02 00:33:23.974517', 'step': 12555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:24.032271', 'step': 12555, 'epoch': 2}
{'type': 'loss', 'content': 0.02851908840239048, 'timestamp': '2025-10-02 00:33:24.038583', 'step': 12556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:24.098102', 'step': 12556, 'epoch': 2}
{'type': 'loss', 'content': 0.04272119328379631, 'timestamp': '2025-10-02 00:33:24.109073', 'step': 12557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:24.166650', 'step': 12557, 'epoch': 2}
{'type': 'loss', 'content': 0.0819796621799469, 'timestamp': '2025-10-02 00:33:24.174165', 'step': 12558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:24.234656', 'step': 12558, 'epoch': 2}
{'type': 'loss', 'content': 0.03001364693045616, 'timestamp': '2025-10-02 00:33:24.242456', 'step': 12559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:24.300097', 'step': 12559, 'epoch': 2}
{'type': 'loss', 'content': 0.06643339991569519, 'timestamp': '2025-10-02 00:33:24.307957', 'step': 12560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:24.366807', 'step': 12560, 'epoch': 2}
{'type': 'loss', 'content': 0.0915301963686943, 'timestamp': '2025-10-02 00:33:24.370266', 'step': 12561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:24.439963', 'step': 12561, 'epoch': 2}
{'type': 'loss', 'content': 0.07798915356397629, 'timestamp': '2025-10-02 00:33:24.450105', 'step': 12562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:24.512320', 'step': 12562, 'epoch': 2}
{'type': 'loss', 'content': 0.09171665459871292, 'timestamp': '2025-10-02 00:33:24.521866', 'step': 12563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:24.576090', 'step': 12563, 'epoch': 2}
{'type': 'loss', 'content': 0.06692134588956833, 'timestamp': '2025-10-02 00:33:24.583214', 'step': 12564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:24.638465', 'step': 12564, 'epoch': 2}
{'type': 'loss', 'content': 0.013653076253831387, 'timestamp': '2025-10-02 00:33:24.641368', 'step': 12565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:24.695849', 'step': 12565, 'epoch': 2}
{'type': 'loss', 'content': 0.16086018085479736, 'timestamp': '2025-10-02 00:33:24.698094', 'step': 12566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:24.755164', 'step': 12566, 'epoch': 2}
{'type': 'loss', 'content': 0.013280175626277924, 'timestamp': '2025-10-02 00:33:24.764535', 'step': 12567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:24.823912', 'step': 12567, 'epoch': 2}
{'type': 'loss', 'content': 0.12872369587421417, 'timestamp': '2025-10-02 00:33:24.834038', 'step': 12568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:24.898304', 'step': 12568, 'epoch': 2}
{'type': 'loss', 'content': 0.01817546971142292, 'timestamp': '2025-10-02 00:33:24.909308', 'step': 12569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:24.964223', 'step': 12569, 'epoch': 2}
{'type': 'loss', 'content': 0.05901084467768669, 'timestamp': '2025-10-02 00:33:24.970237', 'step': 12570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:25.034021', 'step': 12570, 'epoch': 2}
{'type': 'loss', 'content': 0.03752943500876427, 'timestamp': '2025-10-02 00:33:25.040367', 'step': 12571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:25.098775', 'step': 12571, 'epoch': 2}
{'type': 'loss', 'content': 0.13292783498764038, 'timestamp': '2025-10-02 00:33:25.110331', 'step': 12572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:25.167559', 'step': 12572, 'epoch': 2}
{'type': 'loss', 'content': 0.1295807808637619, 'timestamp': '2025-10-02 00:33:25.171035', 'step': 12573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:25.237962', 'step': 12573, 'epoch': 2}
{'type': 'loss', 'content': 0.02895195595920086, 'timestamp': '2025-10-02 00:33:25.247679', 'step': 12574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:25.316742', 'step': 12574, 'epoch': 2}
{'type': 'loss', 'content': 0.05077286437153816, 'timestamp': '2025-10-02 00:33:25.319792', 'step': 12575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:25.379630', 'step': 12575, 'epoch': 2}
{'type': 'loss', 'content': 0.10238122195005417, 'timestamp': '2025-10-02 00:33:25.388287', 'step': 12576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:25.446239', 'step': 12576, 'epoch': 2}
{'type': 'loss', 'content': 0.051275018602609634, 'timestamp': '2025-10-02 00:33:25.455901', 'step': 12577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:25.511847', 'step': 12577, 'epoch': 2}
{'type': 'loss', 'content': 0.14741599559783936, 'timestamp': '2025-10-02 00:33:25.514234', 'step': 12578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:25.571889', 'step': 12578, 'epoch': 2}
{'type': 'loss', 'content': 0.08570881932973862, 'timestamp': '2025-10-02 00:33:25.574266', 'step': 12579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:25.640396', 'step': 12579, 'epoch': 2}
{'type': 'loss', 'content': 0.041370805352926254, 'timestamp': '2025-10-02 00:33:25.651814', 'step': 12580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:25.706400', 'step': 12580, 'epoch': 2}
{'type': 'loss', 'content': 0.016698887571692467, 'timestamp': '2025-10-02 00:33:25.709043', 'step': 12581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:25.764829', 'step': 12581, 'epoch': 2}
{'type': 'loss', 'content': 0.0775698870420456, 'timestamp': '2025-10-02 00:33:25.770111', 'step': 12582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:25.839035', 'step': 12582, 'epoch': 2}
{'type': 'loss', 'content': 0.044296883046627045, 'timestamp': '2025-10-02 00:33:25.849269', 'step': 12583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:25.910729', 'step': 12583, 'epoch': 2}
{'type': 'loss', 'content': 0.02567915804684162, 'timestamp': '2025-10-02 00:33:25.917030', 'step': 12584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:25.970888', 'step': 12584, 'epoch': 2}
{'type': 'loss', 'content': 0.020481858402490616, 'timestamp': '2025-10-02 00:33:25.975227', 'step': 12585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:26.031025', 'step': 12585, 'epoch': 2}
{'type': 'loss', 'content': 0.07722242176532745, 'timestamp': '2025-10-02 00:33:26.033720', 'step': 12586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:26.089643', 'step': 12586, 'epoch': 2}
{'type': 'loss', 'content': 0.05325689539313316, 'timestamp': '2025-10-02 00:33:26.095802', 'step': 12587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:26.150685', 'step': 12587, 'epoch': 2}
{'type': 'loss', 'content': 0.07110801339149475, 'timestamp': '2025-10-02 00:33:26.157151', 'step': 12588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:26.211505', 'step': 12588, 'epoch': 2}
{'type': 'loss', 'content': 0.09248869121074677, 'timestamp': '2025-10-02 00:33:26.214480', 'step': 12589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:26.269538', 'step': 12589, 'epoch': 2}
{'type': 'loss', 'content': 0.025514256209135056, 'timestamp': '2025-10-02 00:33:26.272283', 'step': 12590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:26.334015', 'step': 12590, 'epoch': 2}
{'type': 'loss', 'content': 0.06415775418281555, 'timestamp': '2025-10-02 00:33:26.344503', 'step': 12591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:33:26.420773', 'step': 12591, 'epoch': 2}
{'type': 'loss', 'content': 0.016806019470095634, 'timestamp': '2025-10-02 00:33:26.434977', 'step': 12592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:33:26.490934', 'step': 12592, 'epoch': 2}
{'type': 'loss', 'content': 0.1463392674922943, 'timestamp': '2025-10-02 00:33:26.493275', 'step': 12593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:26.548873', 'step': 12593, 'epoch': 2}
{'type': 'loss', 'content': 0.04033059999346733, 'timestamp': '2025-10-02 00:33:26.551227', 'step': 12594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:26.609008', 'step': 12594, 'epoch': 2}
{'type': 'loss', 'content': 0.026172049343585968, 'timestamp': '2025-10-02 00:33:26.618530', 'step': 12595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:26.680408', 'step': 12595, 'epoch': 2}
{'type': 'loss', 'content': 0.029435116797685623, 'timestamp': '2025-10-02 00:33:26.691666', 'step': 12596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:33:26.754072', 'step': 12596, 'epoch': 2}
{'type': 'loss', 'content': 0.040705736726522446, 'timestamp': '2025-10-02 00:33:26.765876', 'step': 12597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:26.822174', 'step': 12597, 'epoch': 2}
{'type': 'loss', 'content': 0.0733645111322403, 'timestamp': '2025-10-02 00:33:26.824910', 'step': 12598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:26.888785', 'step': 12598, 'epoch': 2}
{'type': 'loss', 'content': 0.004555368795990944, 'timestamp': '2025-10-02 00:33:26.899464', 'step': 12599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:26.957578', 'step': 12599, 'epoch': 2}
{'type': 'loss', 'content': 0.0756496787071228, 'timestamp': '2025-10-02 00:33:26.966214', 'step': 12600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:27.021464', 'step': 12600, 'epoch': 2}
{'type': 'loss', 'content': 0.07628826051950455, 'timestamp': '2025-10-02 00:33:27.025022', 'step': 12601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:27.080302', 'step': 12601, 'epoch': 2}
{'type': 'loss', 'content': 0.07263538986444473, 'timestamp': '2025-10-02 00:33:27.089646', 'step': 12602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:27.145300', 'step': 12602, 'epoch': 2}
{'type': 'loss', 'content': 0.0954219326376915, 'timestamp': '2025-10-02 00:33:27.152712', 'step': 12603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:27.209252', 'step': 12603, 'epoch': 2}
{'type': 'loss', 'content': 0.057898372411727905, 'timestamp': '2025-10-02 00:33:27.215466', 'step': 12604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:27.276126', 'step': 12604, 'epoch': 2}
{'type': 'loss', 'content': 0.03985244408249855, 'timestamp': '2025-10-02 00:33:27.287657', 'step': 12605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:27.342204', 'step': 12605, 'epoch': 2}
{'type': 'loss', 'content': 0.050510577857494354, 'timestamp': '2025-10-02 00:33:27.344859', 'step': 12606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:27.407535', 'step': 12606, 'epoch': 2}
{'type': 'loss', 'content': 0.04023260623216629, 'timestamp': '2025-10-02 00:33:27.410192', 'step': 12607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:27.474170', 'step': 12607, 'epoch': 2}
{'type': 'loss', 'content': 0.014345604926347733, 'timestamp': '2025-10-02 00:33:27.485438', 'step': 12608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:27.550641', 'step': 12608, 'epoch': 2}
{'type': 'loss', 'content': 0.017537035048007965, 'timestamp': '2025-10-02 00:33:27.561977', 'step': 12609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:27.622893', 'step': 12609, 'epoch': 2}
{'type': 'loss', 'content': 0.05281783267855644, 'timestamp': '2025-10-02 00:33:27.633123', 'step': 12610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:27.689655', 'step': 12610, 'epoch': 2}
{'type': 'loss', 'content': 0.09494251757860184, 'timestamp': '2025-10-02 00:33:27.695782', 'step': 12611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:27.751146', 'step': 12611, 'epoch': 2}
{'type': 'loss', 'content': 0.0669519305229187, 'timestamp': '2025-10-02 00:33:27.761293', 'step': 12612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:27.815247', 'step': 12612, 'epoch': 2}
{'type': 'loss', 'content': 0.03074323944747448, 'timestamp': '2025-10-02 00:33:27.817559', 'step': 12613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:27.872933', 'step': 12613, 'epoch': 2}
{'type': 'loss', 'content': 0.04318588227033615, 'timestamp': '2025-10-02 00:33:27.875348', 'step': 12614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:27.930187', 'step': 12614, 'epoch': 2}
{'type': 'loss', 'content': 0.16292180120944977, 'timestamp': '2025-10-02 00:33:27.933079', 'step': 12615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:27.988467', 'step': 12615, 'epoch': 2}
{'type': 'loss', 'content': 0.01578740030527115, 'timestamp': '2025-10-02 00:33:27.994273', 'step': 12616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:28.048365', 'step': 12616, 'epoch': 2}
{'type': 'loss', 'content': 0.05311061814427376, 'timestamp': '2025-10-02 00:33:28.050912', 'step': 12617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:28.105363', 'step': 12617, 'epoch': 2}
{'type': 'loss', 'content': 0.02309095300734043, 'timestamp': '2025-10-02 00:33:28.113156', 'step': 12618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:28.168486', 'step': 12618, 'epoch': 2}
{'type': 'loss', 'content': 0.035895004868507385, 'timestamp': '2025-10-02 00:33:28.177842', 'step': 12619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:28.232381', 'step': 12619, 'epoch': 2}
{'type': 'loss', 'content': 0.07161837816238403, 'timestamp': '2025-10-02 00:33:28.238274', 'step': 12620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:28.292697', 'step': 12620, 'epoch': 2}
{'type': 'loss', 'content': 0.17210733890533447, 'timestamp': '2025-10-02 00:33:28.295115', 'step': 12621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:28.354167', 'step': 12621, 'epoch': 2}
{'type': 'loss', 'content': 0.0275060273706913, 'timestamp': '2025-10-02 00:33:28.364346', 'step': 12622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:28.423814', 'step': 12622, 'epoch': 2}
{'type': 'loss', 'content': 0.04349345713853836, 'timestamp': '2025-10-02 00:33:28.433126', 'step': 12623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:28.488833', 'step': 12623, 'epoch': 2}
{'type': 'loss', 'content': 0.06230336055159569, 'timestamp': '2025-10-02 00:33:28.494897', 'step': 12624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:28.549617', 'step': 12624, 'epoch': 2}
{'type': 'loss', 'content': 0.1373622864484787, 'timestamp': '2025-10-02 00:33:28.552036', 'step': 12625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:28.605416', 'step': 12625, 'epoch': 2}
{'type': 'loss', 'content': 0.037757113575935364, 'timestamp': '2025-10-02 00:33:28.608130', 'step': 12626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:28.663003', 'step': 12626, 'epoch': 2}
{'type': 'loss', 'content': 0.07668271660804749, 'timestamp': '2025-10-02 00:33:28.665404', 'step': 12627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:28.720196', 'step': 12627, 'epoch': 2}
{'type': 'loss', 'content': 0.04409635066986084, 'timestamp': '2025-10-02 00:33:28.730345', 'step': 12628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:28.783218', 'step': 12628, 'epoch': 2}
{'type': 'loss', 'content': 0.09955126792192459, 'timestamp': '2025-10-02 00:33:28.786675', 'step': 12629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:28.841965', 'step': 12629, 'epoch': 2}
{'type': 'loss', 'content': 0.017096806317567825, 'timestamp': '2025-10-02 00:33:28.844225', 'step': 12630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:28.899294', 'step': 12630, 'epoch': 2}
{'type': 'loss', 'content': 0.03574133291840553, 'timestamp': '2025-10-02 00:33:28.905523', 'step': 12631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:28.960589', 'step': 12631, 'epoch': 2}
{'type': 'loss', 'content': 0.01064217183738947, 'timestamp': '2025-10-02 00:33:28.967014', 'step': 12632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:29.019985', 'step': 12632, 'epoch': 2}
{'type': 'loss', 'content': 0.05429425463080406, 'timestamp': '2025-10-02 00:33:29.022050', 'step': 12633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:29.075744', 'step': 12633, 'epoch': 2}
{'type': 'loss', 'content': 0.114298976957798, 'timestamp': '2025-10-02 00:33:29.078288', 'step': 12634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:29.133598', 'step': 12634, 'epoch': 2}
{'type': 'loss', 'content': 0.003239052603021264, 'timestamp': '2025-10-02 00:33:29.136501', 'step': 12635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:29.190492', 'step': 12635, 'epoch': 2}
{'type': 'loss', 'content': 0.06052091717720032, 'timestamp': '2025-10-02 00:33:29.196659', 'step': 12636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:29.250291', 'step': 12636, 'epoch': 2}
{'type': 'loss', 'content': 0.16414466500282288, 'timestamp': '2025-10-02 00:33:29.252889', 'step': 12637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:29.307212', 'step': 12637, 'epoch': 2}
{'type': 'loss', 'content': 0.22169896960258484, 'timestamp': '2025-10-02 00:33:29.310274', 'step': 12638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:29.363602', 'step': 12638, 'epoch': 2}
{'type': 'loss', 'content': 0.040036678314208984, 'timestamp': '2025-10-02 00:33:29.366931', 'step': 12639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:29.423403', 'step': 12639, 'epoch': 2}
{'type': 'loss', 'content': 0.09151732176542282, 'timestamp': '2025-10-02 00:33:29.431997', 'step': 12640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:29.490767', 'step': 12640, 'epoch': 2}
{'type': 'loss', 'content': 0.09513968229293823, 'timestamp': '2025-10-02 00:33:29.494222', 'step': 12641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:29.550473', 'step': 12641, 'epoch': 2}
{'type': 'loss', 'content': 0.029734911397099495, 'timestamp': '2025-10-02 00:33:29.552971', 'step': 12642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:29.606755', 'step': 12642, 'epoch': 2}
{'type': 'loss', 'content': 0.13606669008731842, 'timestamp': '2025-10-02 00:33:29.608880', 'step': 12643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:29.663400', 'step': 12643, 'epoch': 2}
{'type': 'loss', 'content': 0.11524811387062073, 'timestamp': '2025-10-02 00:33:29.669149', 'step': 12644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:29.722994', 'step': 12644, 'epoch': 2}
{'type': 'loss', 'content': 0.054300811141729355, 'timestamp': '2025-10-02 00:33:29.732703', 'step': 12645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:29.794393', 'step': 12645, 'epoch': 2}
{'type': 'loss', 'content': 0.033427946269512177, 'timestamp': '2025-10-02 00:33:29.805084', 'step': 12646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:29.859766', 'step': 12646, 'epoch': 2}
{'type': 'loss', 'content': 0.03403288125991821, 'timestamp': '2025-10-02 00:33:29.862781', 'step': 12647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:29.918990', 'step': 12647, 'epoch': 2}
{'type': 'loss', 'content': 0.11180483549833298, 'timestamp': '2025-10-02 00:33:29.925447', 'step': 12648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:29.981631', 'step': 12648, 'epoch': 2}
{'type': 'loss', 'content': 0.03907278925180435, 'timestamp': '2025-10-02 00:33:29.991878', 'step': 12649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:30.049030', 'step': 12649, 'epoch': 2}
{'type': 'loss', 'content': 0.042947426438331604, 'timestamp': '2025-10-02 00:33:30.051848', 'step': 12650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:30.106394', 'step': 12650, 'epoch': 2}
{'type': 'loss', 'content': 0.09835079312324524, 'timestamp': '2025-10-02 00:33:30.112561', 'step': 12651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:30.167613', 'step': 12651, 'epoch': 2}
{'type': 'loss', 'content': 0.07955209910869598, 'timestamp': '2025-10-02 00:33:30.173767', 'step': 12652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:30.229854', 'step': 12652, 'epoch': 2}
{'type': 'loss', 'content': 0.07933753728866577, 'timestamp': '2025-10-02 00:33:30.236050', 'step': 12653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:30.290703', 'step': 12653, 'epoch': 2}
{'type': 'loss', 'content': 0.16099628806114197, 'timestamp': '2025-10-02 00:33:30.293248', 'step': 12654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:30.352735', 'step': 12654, 'epoch': 2}
{'type': 'loss', 'content': 0.019191328436136246, 'timestamp': '2025-10-02 00:33:30.362949', 'step': 12655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:30.419775', 'step': 12655, 'epoch': 2}
{'type': 'loss', 'content': 0.052512768656015396, 'timestamp': '2025-10-02 00:33:30.426118', 'step': 12656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:30.480606', 'step': 12656, 'epoch': 2}
{'type': 'loss', 'content': 0.07648562639951706, 'timestamp': '2025-10-02 00:33:30.485523', 'step': 12657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:30.546847', 'step': 12657, 'epoch': 2}
{'type': 'loss', 'content': 0.14188826084136963, 'timestamp': '2025-10-02 00:33:30.549825', 'step': 12658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:30.604855', 'step': 12658, 'epoch': 2}
{'type': 'loss', 'content': 0.14710791409015656, 'timestamp': '2025-10-02 00:33:30.608232', 'step': 12659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:30.663246', 'step': 12659, 'epoch': 2}
{'type': 'loss', 'content': 0.10146065801382065, 'timestamp': '2025-10-02 00:33:30.669935', 'step': 12660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:30.724573', 'step': 12660, 'epoch': 2}
{'type': 'loss', 'content': 0.09081755578517914, 'timestamp': '2025-10-02 00:33:30.727339', 'step': 12661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:30.782196', 'step': 12661, 'epoch': 2}
{'type': 'loss', 'content': 0.07545249909162521, 'timestamp': '2025-10-02 00:33:30.784527', 'step': 12662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:30.839641', 'step': 12662, 'epoch': 2}
{'type': 'loss', 'content': 0.11922672390937805, 'timestamp': '2025-10-02 00:33:30.848974', 'step': 12663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:33:30.917036', 'step': 12663, 'epoch': 2}
{'type': 'loss', 'content': 0.016560155898332596, 'timestamp': '2025-10-02 00:33:30.929815', 'step': 12664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:30.988207', 'step': 12664, 'epoch': 2}
{'type': 'loss', 'content': 0.016470886766910553, 'timestamp': '2025-10-02 00:33:30.999220', 'step': 12665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:31.054268', 'step': 12665, 'epoch': 2}
{'type': 'loss', 'content': 0.06637098640203476, 'timestamp': '2025-10-02 00:33:31.056431', 'step': 12666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:31.121996', 'step': 12666, 'epoch': 2}
{'type': 'loss', 'content': 0.03612348437309265, 'timestamp': '2025-10-02 00:33:31.132487', 'step': 12667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:33:31.188148', 'step': 12667, 'epoch': 2}
{'type': 'loss', 'content': 0.21408416330814362, 'timestamp': '2025-10-02 00:33:31.194435', 'step': 12668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:31.250244', 'step': 12668, 'epoch': 2}
{'type': 'loss', 'content': 0.09924392402172089, 'timestamp': '2025-10-02 00:33:31.253766', 'step': 12669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:31.311317', 'step': 12669, 'epoch': 2}
{'type': 'loss', 'content': 0.0390731617808342, 'timestamp': '2025-10-02 00:33:31.314669', 'step': 12670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:31.374128', 'step': 12670, 'epoch': 2}
{'type': 'loss', 'content': 0.11026477813720703, 'timestamp': '2025-10-02 00:33:31.377108', 'step': 12671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:31.436299', 'step': 12671, 'epoch': 2}
{'type': 'loss', 'content': 0.026785947382450104, 'timestamp': '2025-10-02 00:33:31.444464', 'step': 12672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:31.505419', 'step': 12672, 'epoch': 2}
{'type': 'loss', 'content': 0.11357516795396805, 'timestamp': '2025-10-02 00:33:31.508100', 'step': 12673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:31.567758', 'step': 12673, 'epoch': 2}
{'type': 'loss', 'content': 0.08567439764738083, 'timestamp': '2025-10-02 00:33:31.570842', 'step': 12674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:31.627365', 'step': 12674, 'epoch': 2}
{'type': 'loss', 'content': 0.10056282579898834, 'timestamp': '2025-10-02 00:33:31.630641', 'step': 12675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:31.686123', 'step': 12675, 'epoch': 2}
{'type': 'loss', 'content': 0.07675015181303024, 'timestamp': '2025-10-02 00:33:31.692391', 'step': 12676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:31.746710', 'step': 12676, 'epoch': 2}
{'type': 'loss', 'content': 0.16921740770339966, 'timestamp': '2025-10-02 00:33:31.750387', 'step': 12677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:31.808204', 'step': 12677, 'epoch': 2}
{'type': 'loss', 'content': 0.1678990125656128, 'timestamp': '2025-10-02 00:33:31.810356', 'step': 12678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:31.864384', 'step': 12678, 'epoch': 2}
{'type': 'loss', 'content': 0.08028808236122131, 'timestamp': '2025-10-02 00:33:31.866775', 'step': 12679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:31.923177', 'step': 12679, 'epoch': 2}
{'type': 'loss', 'content': 0.05393189564347267, 'timestamp': '2025-10-02 00:33:31.931758', 'step': 12680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:31.989700', 'step': 12680, 'epoch': 2}
{'type': 'loss', 'content': 0.07524227350950241, 'timestamp': '2025-10-02 00:33:31.992558', 'step': 12681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:32.055173', 'step': 12681, 'epoch': 2}
{'type': 'loss', 'content': 0.02805185317993164, 'timestamp': '2025-10-02 00:33:32.064550', 'step': 12682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:32.121801', 'step': 12682, 'epoch': 2}
{'type': 'loss', 'content': 0.06529617309570312, 'timestamp': '2025-10-02 00:33:32.129476', 'step': 12683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:32.184285', 'step': 12683, 'epoch': 2}
{'type': 'loss', 'content': 0.040048420429229736, 'timestamp': '2025-10-02 00:33:32.191091', 'step': 12684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:32.247699', 'step': 12684, 'epoch': 2}
{'type': 'loss', 'content': 0.1033305898308754, 'timestamp': '2025-10-02 00:33:32.251146', 'step': 12685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:32.306198', 'step': 12685, 'epoch': 2}
{'type': 'loss', 'content': 0.06384501606225967, 'timestamp': '2025-10-02 00:33:32.308985', 'step': 12686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:32.365538', 'step': 12686, 'epoch': 2}
{'type': 'loss', 'content': 0.16670870780944824, 'timestamp': '2025-10-02 00:33:32.367916', 'step': 12687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:32.421375', 'step': 12687, 'epoch': 2}
{'type': 'loss', 'content': 0.057563666254282, 'timestamp': '2025-10-02 00:33:32.428940', 'step': 12688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:32.483058', 'step': 12688, 'epoch': 2}
{'type': 'loss', 'content': 0.06901907920837402, 'timestamp': '2025-10-02 00:33:32.486126', 'step': 12689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:32.540472', 'step': 12689, 'epoch': 2}
{'type': 'loss', 'content': 0.07449530810117722, 'timestamp': '2025-10-02 00:33:32.542828', 'step': 12690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:32.604841', 'step': 12690, 'epoch': 2}
{'type': 'loss', 'content': 0.028761137276887894, 'timestamp': '2025-10-02 00:33:32.615367', 'step': 12691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:32.671859', 'step': 12691, 'epoch': 2}
{'type': 'loss', 'content': 0.03647608309984207, 'timestamp': '2025-10-02 00:33:32.682188', 'step': 12692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:32.736251', 'step': 12692, 'epoch': 2}
{'type': 'loss', 'content': 0.03857938572764397, 'timestamp': '2025-10-02 00:33:32.742415', 'step': 12693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:32.797037', 'step': 12693, 'epoch': 2}
{'type': 'loss', 'content': 0.027592068538069725, 'timestamp': '2025-10-02 00:33:32.801051', 'step': 12694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:32.855266', 'step': 12694, 'epoch': 2}
{'type': 'loss', 'content': 0.1789880245923996, 'timestamp': '2025-10-02 00:33:32.857855', 'step': 12695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:32.915527', 'step': 12695, 'epoch': 2}
{'type': 'loss', 'content': 0.10805310308933258, 'timestamp': '2025-10-02 00:33:32.921836', 'step': 12696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:33:32.987877', 'step': 12696, 'epoch': 2}
{'type': 'loss', 'content': 0.01919110305607319, 'timestamp': '2025-10-02 00:33:33.000899', 'step': 12697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:33.064651', 'step': 12697, 'epoch': 2}
{'type': 'loss', 'content': 0.02131938561797142, 'timestamp': '2025-10-02 00:33:33.075301', 'step': 12698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:33.130448', 'step': 12698, 'epoch': 2}
{'type': 'loss', 'content': 0.03391774743795395, 'timestamp': '2025-10-02 00:33:33.132980', 'step': 12699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:33.196254', 'step': 12699, 'epoch': 2}
{'type': 'loss', 'content': 0.022644605487585068, 'timestamp': '2025-10-02 00:33:33.207652', 'step': 12700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:33.262398', 'step': 12700, 'epoch': 2}
{'type': 'loss', 'content': 0.14223772287368774, 'timestamp': '2025-10-02 00:33:33.264547', 'step': 12701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:33.317845', 'step': 12701, 'epoch': 2}
{'type': 'loss', 'content': 0.051959119737148285, 'timestamp': '2025-10-02 00:33:33.320662', 'step': 12702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:33.375223', 'step': 12702, 'epoch': 2}
{'type': 'loss', 'content': 0.13392409682273865, 'timestamp': '2025-10-02 00:33:33.377998', 'step': 12703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:33.432041', 'step': 12703, 'epoch': 2}
{'type': 'loss', 'content': 0.028604647144675255, 'timestamp': '2025-10-02 00:33:33.438114', 'step': 12704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:33.491794', 'step': 12704, 'epoch': 2}
{'type': 'loss', 'content': 0.06215565279126167, 'timestamp': '2025-10-02 00:33:33.494333', 'step': 12705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:33.549546', 'step': 12705, 'epoch': 2}
{'type': 'loss', 'content': 0.015500931069254875, 'timestamp': '2025-10-02 00:33:33.552294', 'step': 12706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:33.606329', 'step': 12706, 'epoch': 2}
{'type': 'loss', 'content': 0.033128462731838226, 'timestamp': '2025-10-02 00:33:33.609021', 'step': 12707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:33.663856', 'step': 12707, 'epoch': 2}
{'type': 'loss', 'content': 0.1363183856010437, 'timestamp': '2025-10-02 00:33:33.672283', 'step': 12708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:33.725984', 'step': 12708, 'epoch': 2}
{'type': 'loss', 'content': 0.06693675369024277, 'timestamp': '2025-10-02 00:33:33.729139', 'step': 12709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:33.785539', 'step': 12709, 'epoch': 2}
{'type': 'loss', 'content': 0.035061679780483246, 'timestamp': '2025-10-02 00:33:33.795072', 'step': 12710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:33.852280', 'step': 12710, 'epoch': 2}
{'type': 'loss', 'content': 0.0748368576169014, 'timestamp': '2025-10-02 00:33:33.854468', 'step': 12711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:33.910485', 'step': 12711, 'epoch': 2}
{'type': 'loss', 'content': 0.02468588575720787, 'timestamp': '2025-10-02 00:33:33.916929', 'step': 12712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:33.970946', 'step': 12712, 'epoch': 2}
{'type': 'loss', 'content': 0.042188599705696106, 'timestamp': '2025-10-02 00:33:33.981012', 'step': 12713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:34.037523', 'step': 12713, 'epoch': 2}
{'type': 'loss', 'content': 0.01892816461622715, 'timestamp': '2025-10-02 00:33:34.047104', 'step': 12714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:34.107007', 'step': 12714, 'epoch': 2}
{'type': 'loss', 'content': 0.06928689032793045, 'timestamp': '2025-10-02 00:33:34.117197', 'step': 12715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:34.172048', 'step': 12715, 'epoch': 2}
{'type': 'loss', 'content': 0.07212543487548828, 'timestamp': '2025-10-02 00:33:34.178307', 'step': 12716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:34.232156', 'step': 12716, 'epoch': 2}
{'type': 'loss', 'content': 0.025959080085158348, 'timestamp': '2025-10-02 00:33:34.234613', 'step': 12717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:34.288862', 'step': 12717, 'epoch': 2}
{'type': 'loss', 'content': 0.12910379469394684, 'timestamp': '2025-10-02 00:33:34.291797', 'step': 12718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:34.353335', 'step': 12718, 'epoch': 2}
{'type': 'loss', 'content': 0.043269697576761246, 'timestamp': '2025-10-02 00:33:34.363809', 'step': 12719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:34.417847', 'step': 12719, 'epoch': 2}
{'type': 'loss', 'content': 0.032963693141937256, 'timestamp': '2025-10-02 00:33:34.424858', 'step': 12720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:34.478072', 'step': 12720, 'epoch': 2}
{'type': 'loss', 'content': 0.16969357430934906, 'timestamp': '2025-10-02 00:33:34.480389', 'step': 12721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:34.534354', 'step': 12721, 'epoch': 2}
{'type': 'loss', 'content': 0.10307732224464417, 'timestamp': '2025-10-02 00:33:34.540583', 'step': 12722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:34.596161', 'step': 12722, 'epoch': 2}
{'type': 'loss', 'content': 0.13402201235294342, 'timestamp': '2025-10-02 00:33:34.598992', 'step': 12723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:34.652929', 'step': 12723, 'epoch': 2}
{'type': 'loss', 'content': 0.2021448016166687, 'timestamp': '2025-10-02 00:33:34.660363', 'step': 12724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:34.715037', 'step': 12724, 'epoch': 2}
{'type': 'loss', 'content': 0.03204883635044098, 'timestamp': '2025-10-02 00:33:34.725295', 'step': 12725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:34.779023', 'step': 12725, 'epoch': 2}
{'type': 'loss', 'content': 0.08007770776748657, 'timestamp': '2025-10-02 00:33:34.781388', 'step': 12726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:34.835020', 'step': 12726, 'epoch': 2}
{'type': 'loss', 'content': 0.1410692036151886, 'timestamp': '2025-10-02 00:33:34.837458', 'step': 12727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:34.891909', 'step': 12727, 'epoch': 2}
{'type': 'loss', 'content': 0.04696470499038696, 'timestamp': '2025-10-02 00:33:34.902002', 'step': 12728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:34.956529', 'step': 12728, 'epoch': 2}
{'type': 'loss', 'content': 0.13046273589134216, 'timestamp': '2025-10-02 00:33:34.959234', 'step': 12729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:35.012409', 'step': 12729, 'epoch': 2}
{'type': 'loss', 'content': 0.10054474323987961, 'timestamp': '2025-10-02 00:33:35.015047', 'step': 12730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:35.070337', 'step': 12730, 'epoch': 2}
{'type': 'loss', 'content': 0.06461931765079498, 'timestamp': '2025-10-02 00:33:35.079702', 'step': 12731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:35.138318', 'step': 12731, 'epoch': 2}
{'type': 'loss', 'content': 0.032730650156736374, 'timestamp': '2025-10-02 00:33:35.148452', 'step': 12732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:35.202756', 'step': 12732, 'epoch': 2}
{'type': 'loss', 'content': 0.01863749511539936, 'timestamp': '2025-10-02 00:33:35.208974', 'step': 12733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:35.263408', 'step': 12733, 'epoch': 2}
{'type': 'loss', 'content': 0.1447656750679016, 'timestamp': '2025-10-02 00:33:35.265793', 'step': 12734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:33:35.320174', 'step': 12734, 'epoch': 2}
{'type': 'loss', 'content': 0.045167870819568634, 'timestamp': '2025-10-02 00:33:35.322364', 'step': 12735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:35.375494', 'step': 12735, 'epoch': 2}
{'type': 'loss', 'content': 0.0397312231361866, 'timestamp': '2025-10-02 00:33:35.381848', 'step': 12736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:35.435214', 'step': 12736, 'epoch': 2}
{'type': 'loss', 'content': 0.1143316701054573, 'timestamp': '2025-10-02 00:33:35.437768', 'step': 12737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:35.491878', 'step': 12737, 'epoch': 2}
{'type': 'loss', 'content': 0.04209965467453003, 'timestamp': '2025-10-02 00:33:35.494444', 'step': 12738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:35.548316', 'step': 12738, 'epoch': 2}
{'type': 'loss', 'content': 0.0351363830268383, 'timestamp': '2025-10-02 00:33:35.550841', 'step': 12739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:35.605136', 'step': 12739, 'epoch': 2}
{'type': 'loss', 'content': 0.10278958827257156, 'timestamp': '2025-10-02 00:33:35.611914', 'step': 12740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:35.665139', 'step': 12740, 'epoch': 2}
{'type': 'loss', 'content': 0.025410009548068047, 'timestamp': '2025-10-02 00:33:35.667720', 'step': 12741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:35.722917', 'step': 12741, 'epoch': 2}
{'type': 'loss', 'content': 0.020058881491422653, 'timestamp': '2025-10-02 00:33:35.725346', 'step': 12742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:35.780257', 'step': 12742, 'epoch': 2}
{'type': 'loss', 'content': 0.17276041209697723, 'timestamp': '2025-10-02 00:33:35.782634', 'step': 12743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:35.836417', 'step': 12743, 'epoch': 2}
{'type': 'loss', 'content': 0.11878205090761185, 'timestamp': '2025-10-02 00:33:35.843528', 'step': 12744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:35.897720', 'step': 12744, 'epoch': 2}
{'type': 'loss', 'content': 0.08259528875350952, 'timestamp': '2025-10-02 00:33:35.900415', 'step': 12745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:35.955412', 'step': 12745, 'epoch': 2}
{'type': 'loss', 'content': 0.053156498819589615, 'timestamp': '2025-10-02 00:33:35.961731', 'step': 12746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:36.017425', 'step': 12746, 'epoch': 2}
{'type': 'loss', 'content': 0.07746598869562149, 'timestamp': '2025-10-02 00:33:36.020508', 'step': 12747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:33:36.073539', 'step': 12747, 'epoch': 2}
{'type': 'loss', 'content': 0.09406249225139618, 'timestamp': '2025-10-02 00:33:36.079830', 'step': 12748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:36.134296', 'step': 12748, 'epoch': 2}
{'type': 'loss', 'content': 0.14521420001983643, 'timestamp': '2025-10-02 00:33:36.136703', 'step': 12749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:36.190644', 'step': 12749, 'epoch': 2}
{'type': 'loss', 'content': 0.06964054703712463, 'timestamp': '2025-10-02 00:33:36.193400', 'step': 12750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:36.246881', 'step': 12750, 'epoch': 2}
{'type': 'loss', 'content': 0.08925393223762512, 'timestamp': '2025-10-02 00:33:36.249665', 'step': 12751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:36.304750', 'step': 12751, 'epoch': 2}
{'type': 'loss', 'content': 0.08221016824245453, 'timestamp': '2025-10-02 00:33:36.313221', 'step': 12752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:36.368083', 'step': 12752, 'epoch': 2}
{'type': 'loss', 'content': 0.013848187401890755, 'timestamp': '2025-10-02 00:33:36.370330', 'step': 12753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:36.425277', 'step': 12753, 'epoch': 2}
{'type': 'loss', 'content': 0.009525361470878124, 'timestamp': '2025-10-02 00:33:36.431563', 'step': 12754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:36.485783', 'step': 12754, 'epoch': 2}
{'type': 'loss', 'content': 0.17920753359794617, 'timestamp': '2025-10-02 00:33:36.488520', 'step': 12755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:36.542322', 'step': 12755, 'epoch': 2}
{'type': 'loss', 'content': 0.054685138165950775, 'timestamp': '2025-10-02 00:33:36.549083', 'step': 12756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:36.602587', 'step': 12756, 'epoch': 2}
{'type': 'loss', 'content': 0.04874573275446892, 'timestamp': '2025-10-02 00:33:36.605028', 'step': 12757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:36.659221', 'step': 12757, 'epoch': 2}
{'type': 'loss', 'content': 0.05972384661436081, 'timestamp': '2025-10-02 00:33:36.661813', 'step': 12758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:36.716980', 'step': 12758, 'epoch': 2}
{'type': 'loss', 'content': 0.023268699645996094, 'timestamp': '2025-10-02 00:33:36.726538', 'step': 12759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:36.782761', 'step': 12759, 'epoch': 2}
{'type': 'loss', 'content': 0.19701410830020905, 'timestamp': '2025-10-02 00:33:36.788685', 'step': 12760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:36.842840', 'step': 12760, 'epoch': 2}
{'type': 'loss', 'content': 0.023381780833005905, 'timestamp': '2025-10-02 00:33:36.845519', 'step': 12761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:36.900353', 'step': 12761, 'epoch': 2}
{'type': 'loss', 'content': 0.03128746896982193, 'timestamp': '2025-10-02 00:33:36.908199', 'step': 12762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:36.963088', 'step': 12762, 'epoch': 2}
{'type': 'loss', 'content': 0.0826418399810791, 'timestamp': '2025-10-02 00:33:36.965463', 'step': 12763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:37.018904', 'step': 12763, 'epoch': 2}
{'type': 'loss', 'content': 0.08700735867023468, 'timestamp': '2025-10-02 00:33:37.024820', 'step': 12764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:33:37.078230', 'step': 12764, 'epoch': 2}
{'type': 'loss', 'content': 0.27670493721961975, 'timestamp': '2025-10-02 00:33:37.080558', 'step': 12765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:37.133785', 'step': 12765, 'epoch': 2}
{'type': 'loss', 'content': 0.13684451580047607, 'timestamp': '2025-10-02 00:33:37.136076', 'step': 12766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:37.197632', 'step': 12766, 'epoch': 2}
{'type': 'loss', 'content': 0.06580732762813568, 'timestamp': '2025-10-02 00:33:37.208317', 'step': 12767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:37.262985', 'step': 12767, 'epoch': 2}
{'type': 'loss', 'content': 0.010020457208156586, 'timestamp': '2025-10-02 00:33:37.273065', 'step': 12768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:37.326920', 'step': 12768, 'epoch': 2}
{'type': 'loss', 'content': 0.13212850689888, 'timestamp': '2025-10-02 00:33:37.329283', 'step': 12769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:37.383133', 'step': 12769, 'epoch': 2}
{'type': 'loss', 'content': 0.08154483139514923, 'timestamp': '2025-10-02 00:33:37.389515', 'step': 12770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:37.447220', 'step': 12770, 'epoch': 2}
{'type': 'loss', 'content': 0.11220230907201767, 'timestamp': '2025-10-02 00:33:37.450304', 'step': 12771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:37.504601', 'step': 12771, 'epoch': 2}
{'type': 'loss', 'content': 0.0804191529750824, 'timestamp': '2025-10-02 00:33:37.510539', 'step': 12772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:37.563417', 'step': 12772, 'epoch': 2}
{'type': 'loss', 'content': 0.03454392030835152, 'timestamp': '2025-10-02 00:33:37.566070', 'step': 12773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:37.621815', 'step': 12773, 'epoch': 2}
{'type': 'loss', 'content': 0.037578683346509933, 'timestamp': '2025-10-02 00:33:37.631422', 'step': 12774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:37.685830', 'step': 12774, 'epoch': 2}
{'type': 'loss', 'content': 0.06183726340532303, 'timestamp': '2025-10-02 00:33:37.693661', 'step': 12775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:37.747489', 'step': 12775, 'epoch': 2}
{'type': 'loss', 'content': 0.06903843581676483, 'timestamp': '2025-10-02 00:33:37.753350', 'step': 12776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:37.806836', 'step': 12776, 'epoch': 2}
{'type': 'loss', 'content': 0.05185868963599205, 'timestamp': '2025-10-02 00:33:37.809166', 'step': 12777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:37.862529', 'step': 12777, 'epoch': 2}
{'type': 'loss', 'content': 0.07892025262117386, 'timestamp': '2025-10-02 00:33:37.865350', 'step': 12778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:37.919942', 'step': 12778, 'epoch': 2}
{'type': 'loss', 'content': 0.1249583512544632, 'timestamp': '2025-10-02 00:33:37.922230', 'step': 12779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:37.975906', 'step': 12779, 'epoch': 2}
{'type': 'loss', 'content': 0.10146357119083405, 'timestamp': '2025-10-02 00:33:37.981600', 'step': 12780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:38.036755', 'step': 12780, 'epoch': 2}
{'type': 'loss', 'content': 0.062308769673109055, 'timestamp': '2025-10-02 00:33:38.039919', 'step': 12781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:38.093731', 'step': 12781, 'epoch': 2}
{'type': 'loss', 'content': 0.03756154328584671, 'timestamp': '2025-10-02 00:33:38.096112', 'step': 12782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:38.150327', 'step': 12782, 'epoch': 2}
{'type': 'loss', 'content': 0.062419697642326355, 'timestamp': '2025-10-02 00:33:38.156536', 'step': 12783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:33:38.236050', 'step': 12783, 'epoch': 2}
{'type': 'loss', 'content': 0.03977179527282715, 'timestamp': '2025-10-02 00:33:38.249489', 'step': 12784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:38.303461', 'step': 12784, 'epoch': 2}
{'type': 'loss', 'content': 0.13097071647644043, 'timestamp': '2025-10-02 00:33:38.305829', 'step': 12785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:38.362663', 'step': 12785, 'epoch': 2}
{'type': 'loss', 'content': 0.12337072938680649, 'timestamp': '2025-10-02 00:33:38.364804', 'step': 12786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:38.420181', 'step': 12786, 'epoch': 2}
{'type': 'loss', 'content': 0.03984655812382698, 'timestamp': '2025-10-02 00:33:38.422753', 'step': 12787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:38.477698', 'step': 12787, 'epoch': 2}
{'type': 'loss', 'content': 0.04768221825361252, 'timestamp': '2025-10-02 00:33:38.483984', 'step': 12788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:38.544069', 'step': 12788, 'epoch': 2}
{'type': 'loss', 'content': 0.06784194707870483, 'timestamp': '2025-10-02 00:33:38.555413', 'step': 12789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:38.610109', 'step': 12789, 'epoch': 2}
{'type': 'loss', 'content': 0.018758144229650497, 'timestamp': '2025-10-02 00:33:38.613092', 'step': 12790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:38.674868', 'step': 12790, 'epoch': 2}
{'type': 'loss', 'content': 0.09310303628444672, 'timestamp': '2025-10-02 00:33:38.685520', 'step': 12791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:38.747076', 'step': 12791, 'epoch': 2}
{'type': 'loss', 'content': 0.03518233820796013, 'timestamp': '2025-10-02 00:33:38.754110', 'step': 12792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:38.807494', 'step': 12792, 'epoch': 2}
{'type': 'loss', 'content': 0.0735778883099556, 'timestamp': '2025-10-02 00:33:38.817380', 'step': 12793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:38.873029', 'step': 12793, 'epoch': 2}
{'type': 'loss', 'content': 0.015947097912430763, 'timestamp': '2025-10-02 00:33:38.879327', 'step': 12794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:38.934193', 'step': 12794, 'epoch': 2}
{'type': 'loss', 'content': 0.14164361357688904, 'timestamp': '2025-10-02 00:33:38.936550', 'step': 12795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:38.989721', 'step': 12795, 'epoch': 2}
{'type': 'loss', 'content': 0.13791519403457642, 'timestamp': '2025-10-02 00:33:38.995645', 'step': 12796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:39.048995', 'step': 12796, 'epoch': 2}
{'type': 'loss', 'content': 0.04459948465228081, 'timestamp': '2025-10-02 00:33:39.051914', 'step': 12797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:39.106443', 'step': 12797, 'epoch': 2}
{'type': 'loss', 'content': 0.15761788189411163, 'timestamp': '2025-10-02 00:33:39.108847', 'step': 12798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:39.162618', 'step': 12798, 'epoch': 2}
{'type': 'loss', 'content': 0.090227410197258, 'timestamp': '2025-10-02 00:33:39.165315', 'step': 12799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:39.219262', 'step': 12799, 'epoch': 2}
{'type': 'loss', 'content': 0.10019410401582718, 'timestamp': '2025-10-02 00:33:39.224946', 'step': 12800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:39.278469', 'step': 12800, 'epoch': 2}
{'type': 'loss', 'content': 0.10757163166999817, 'timestamp': '2025-10-02 00:33:39.281048', 'step': 12801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:39.335612', 'step': 12801, 'epoch': 2}
{'type': 'loss', 'content': 0.013341077603399754, 'timestamp': '2025-10-02 00:33:39.343459', 'step': 12802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:39.401529', 'step': 12802, 'epoch': 2}
{'type': 'loss', 'content': 0.1341286599636078, 'timestamp': '2025-10-02 00:33:39.404589', 'step': 12803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:39.464160', 'step': 12803, 'epoch': 2}
{'type': 'loss', 'content': 0.024589180946350098, 'timestamp': '2025-10-02 00:33:39.475933', 'step': 12804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:39.533329', 'step': 12804, 'epoch': 2}
{'type': 'loss', 'content': 0.027642786502838135, 'timestamp': '2025-10-02 00:33:39.544305', 'step': 12805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:39.598851', 'step': 12805, 'epoch': 2}
{'type': 'loss', 'content': 0.0994391143321991, 'timestamp': '2025-10-02 00:33:39.602051', 'step': 12806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:39.658405', 'step': 12806, 'epoch': 2}
{'type': 'loss', 'content': 0.08163812756538391, 'timestamp': '2025-10-02 00:33:39.661355', 'step': 12807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:39.716633', 'step': 12807, 'epoch': 2}
{'type': 'loss', 'content': 0.02360629476606846, 'timestamp': '2025-10-02 00:33:39.723650', 'step': 12808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:39.787713', 'step': 12808, 'epoch': 2}
{'type': 'loss', 'content': 0.05196496844291687, 'timestamp': '2025-10-02 00:33:39.799226', 'step': 12809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:33:39.855891', 'step': 12809, 'epoch': 2}
{'type': 'loss', 'content': 0.10598402470350266, 'timestamp': '2025-10-02 00:33:39.858788', 'step': 12810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:39.915933', 'step': 12810, 'epoch': 2}
{'type': 'loss', 'content': 0.1963154673576355, 'timestamp': '2025-10-02 00:33:39.925297', 'step': 12811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:39.992164', 'step': 12811, 'epoch': 2}
{'type': 'loss', 'content': 0.13282571732997894, 'timestamp': '2025-10-02 00:33:40.001193', 'step': 12812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:40.062075', 'step': 12812, 'epoch': 2}
{'type': 'loss', 'content': 0.04228173941373825, 'timestamp': '2025-10-02 00:33:40.072174', 'step': 12813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:40.129792', 'step': 12813, 'epoch': 2}
{'type': 'loss', 'content': 0.06604910641908646, 'timestamp': '2025-10-02 00:33:40.134506', 'step': 12814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:40.194334', 'step': 12814, 'epoch': 2}
{'type': 'loss', 'content': 0.0826718881726265, 'timestamp': '2025-10-02 00:33:40.197333', 'step': 12815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:40.254042', 'step': 12815, 'epoch': 2}
{'type': 'loss', 'content': 0.11113397032022476, 'timestamp': '2025-10-02 00:33:40.261428', 'step': 12816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:40.318946', 'step': 12816, 'epoch': 2}
{'type': 'loss', 'content': 0.09566579759120941, 'timestamp': '2025-10-02 00:33:40.322496', 'step': 12817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:40.376746', 'step': 12817, 'epoch': 2}
{'type': 'loss', 'content': 0.04761000722646713, 'timestamp': '2025-10-02 00:33:40.380878', 'step': 12818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:40.445884', 'step': 12818, 'epoch': 2}
{'type': 'loss', 'content': 0.05277062952518463, 'timestamp': '2025-10-02 00:33:40.456104', 'step': 12819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:40.515610', 'step': 12819, 'epoch': 2}
{'type': 'loss', 'content': 0.018750011920928955, 'timestamp': '2025-10-02 00:33:40.524320', 'step': 12820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:40.579038', 'step': 12820, 'epoch': 2}
{'type': 'loss', 'content': 0.07919827848672867, 'timestamp': '2025-10-02 00:33:40.581229', 'step': 12821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:40.635378', 'step': 12821, 'epoch': 2}
{'type': 'loss', 'content': 0.16982638835906982, 'timestamp': '2025-10-02 00:33:40.638753', 'step': 12822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:40.695995', 'step': 12822, 'epoch': 2}
{'type': 'loss', 'content': 0.09266821295022964, 'timestamp': '2025-10-02 00:33:40.705526', 'step': 12823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:40.761169', 'step': 12823, 'epoch': 2}
{'type': 'loss', 'content': 0.04514443874359131, 'timestamp': '2025-10-02 00:33:40.767280', 'step': 12824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:40.822413', 'step': 12824, 'epoch': 2}
{'type': 'loss', 'content': 0.16289642453193665, 'timestamp': '2025-10-02 00:33:40.826122', 'step': 12825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:33:40.892977', 'step': 12825, 'epoch': 2}
{'type': 'loss', 'content': 0.03217202052474022, 'timestamp': '2025-10-02 00:33:40.903826', 'step': 12826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:33:40.960763', 'step': 12826, 'epoch': 2}
{'type': 'loss', 'content': 0.1310364305973053, 'timestamp': '2025-10-02 00:33:40.963848', 'step': 12827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:41.026491', 'step': 12827, 'epoch': 2}
{'type': 'loss', 'content': 0.040236979722976685, 'timestamp': '2025-10-02 00:33:41.037737', 'step': 12828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:41.092890', 'step': 12828, 'epoch': 2}
{'type': 'loss', 'content': 0.029072413221001625, 'timestamp': '2025-10-02 00:33:41.099285', 'step': 12829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:41.154815', 'step': 12829, 'epoch': 2}
{'type': 'loss', 'content': 0.10134859383106232, 'timestamp': '2025-10-02 00:33:41.157620', 'step': 12830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:41.213732', 'step': 12830, 'epoch': 2}
{'type': 'loss', 'content': 0.05026404559612274, 'timestamp': '2025-10-02 00:33:41.217125', 'step': 12831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:41.272495', 'step': 12831, 'epoch': 2}
{'type': 'loss', 'content': 0.11375536024570465, 'timestamp': '2025-10-02 00:33:41.279182', 'step': 12832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:41.336517', 'step': 12832, 'epoch': 2}
{'type': 'loss', 'content': 0.15485630929470062, 'timestamp': '2025-10-02 00:33:41.339665', 'step': 12833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:41.397759', 'step': 12833, 'epoch': 2}
{'type': 'loss', 'content': 0.06374942511320114, 'timestamp': '2025-10-02 00:33:41.407327', 'step': 12834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:41.465643', 'step': 12834, 'epoch': 2}
{'type': 'loss', 'content': 0.06267531216144562, 'timestamp': '2025-10-02 00:33:41.468109', 'step': 12835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:41.523670', 'step': 12835, 'epoch': 2}
{'type': 'loss', 'content': 0.022386273369193077, 'timestamp': '2025-10-02 00:33:41.529251', 'step': 12836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:41.583213', 'step': 12836, 'epoch': 2}
{'type': 'loss', 'content': 0.03775475546717644, 'timestamp': '2025-10-02 00:33:41.585986', 'step': 12837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:41.640694', 'step': 12837, 'epoch': 2}
{'type': 'loss', 'content': 0.07314833253622055, 'timestamp': '2025-10-02 00:33:41.643201', 'step': 12838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:41.698875', 'step': 12838, 'epoch': 2}
{'type': 'loss', 'content': 0.06108834967017174, 'timestamp': '2025-10-02 00:33:41.708225', 'step': 12839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:41.769876', 'step': 12839, 'epoch': 2}
{'type': 'loss', 'content': 0.03745967522263527, 'timestamp': '2025-10-02 00:33:41.781130', 'step': 12840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:33:41.835629', 'step': 12840, 'epoch': 2}
{'type': 'loss', 'content': 0.0712662786245346, 'timestamp': '2025-10-02 00:33:41.838660', 'step': 12841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:41.894586', 'step': 12841, 'epoch': 2}
{'type': 'loss', 'content': 0.011308526620268822, 'timestamp': '2025-10-02 00:33:41.903955', 'step': 12842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:41.957781', 'step': 12842, 'epoch': 2}
{'type': 'loss', 'content': 0.04823749139904976, 'timestamp': '2025-10-02 00:33:41.960361', 'step': 12843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:42.015225', 'step': 12843, 'epoch': 2}
{'type': 'loss', 'content': 0.036318935453891754, 'timestamp': '2025-10-02 00:33:42.022254', 'step': 12844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:42.077240', 'step': 12844, 'epoch': 2}
{'type': 'loss', 'content': 0.059408195316791534, 'timestamp': '2025-10-02 00:33:42.079489', 'step': 12845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:42.134035', 'step': 12845, 'epoch': 2}
{'type': 'loss', 'content': 0.1269708275794983, 'timestamp': '2025-10-02 00:33:42.136572', 'step': 12846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:42.191183', 'step': 12846, 'epoch': 2}
{'type': 'loss', 'content': 0.048383649438619614, 'timestamp': '2025-10-02 00:33:42.193777', 'step': 12847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:42.248236', 'step': 12847, 'epoch': 2}
{'type': 'loss', 'content': 0.07529785484075546, 'timestamp': '2025-10-02 00:33:42.255225', 'step': 12848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:42.313410', 'step': 12848, 'epoch': 2}
{'type': 'loss', 'content': 0.013974970206618309, 'timestamp': '2025-10-02 00:33:42.324341', 'step': 12849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:42.379657', 'step': 12849, 'epoch': 2}
{'type': 'loss', 'content': 0.04601205885410309, 'timestamp': '2025-10-02 00:33:42.387491', 'step': 12850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:42.442374', 'step': 12850, 'epoch': 2}
{'type': 'loss', 'content': 0.12038788199424744, 'timestamp': '2025-10-02 00:33:42.444705', 'step': 12851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:42.500393', 'step': 12851, 'epoch': 2}
{'type': 'loss', 'content': 0.028155626729130745, 'timestamp': '2025-10-02 00:33:42.510756', 'step': 12852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:42.565578', 'step': 12852, 'epoch': 2}
{'type': 'loss', 'content': 0.02705003321170807, 'timestamp': '2025-10-02 00:33:42.567944', 'step': 12853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:42.621908', 'step': 12853, 'epoch': 2}
{'type': 'loss', 'content': 0.06744302809238434, 'timestamp': '2025-10-02 00:33:42.624539', 'step': 12854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:42.678883', 'step': 12854, 'epoch': 2}
{'type': 'loss', 'content': 0.042064253240823746, 'timestamp': '2025-10-02 00:33:42.681444', 'step': 12855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:42.737446', 'step': 12855, 'epoch': 2}
{'type': 'loss', 'content': 0.07053624838590622, 'timestamp': '2025-10-02 00:33:42.747796', 'step': 12856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:42.802049', 'step': 12856, 'epoch': 2}
{'type': 'loss', 'content': 0.08883858472108841, 'timestamp': '2025-10-02 00:33:42.804568', 'step': 12857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:42.858957', 'step': 12857, 'epoch': 2}
{'type': 'loss', 'content': 0.08108727633953094, 'timestamp': '2025-10-02 00:33:42.862160', 'step': 12858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:42.916856', 'step': 12858, 'epoch': 2}
{'type': 'loss', 'content': 0.07548361271619797, 'timestamp': '2025-10-02 00:33:42.926223', 'step': 12859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:42.980780', 'step': 12859, 'epoch': 2}
{'type': 'loss', 'content': 0.11217908561229706, 'timestamp': '2025-10-02 00:33:42.986763', 'step': 12860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:43.040759', 'step': 12860, 'epoch': 2}
{'type': 'loss', 'content': 0.14012742042541504, 'timestamp': '2025-10-02 00:33:43.043274', 'step': 12861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:43.096814', 'step': 12861, 'epoch': 2}
{'type': 'loss', 'content': 0.027505220845341682, 'timestamp': '2025-10-02 00:33:43.104588', 'step': 12862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:43.160652', 'step': 12862, 'epoch': 2}
{'type': 'loss', 'content': 0.04558534547686577, 'timestamp': '2025-10-02 00:33:43.163272', 'step': 12863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:43.218091', 'step': 12863, 'epoch': 2}
{'type': 'loss', 'content': 0.1221592053771019, 'timestamp': '2025-10-02 00:33:43.224143', 'step': 12864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:43.277997', 'step': 12864, 'epoch': 2}
{'type': 'loss', 'content': 0.05544690042734146, 'timestamp': '2025-10-02 00:33:43.284127', 'step': 12865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:43.338859', 'step': 12865, 'epoch': 2}
{'type': 'loss', 'content': 0.04736227169632912, 'timestamp': '2025-10-02 00:33:43.344997', 'step': 12866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:43.583944', 'step': 12866, 'epoch': 2}
{'type': 'loss', 'content': 0.06054440885782242, 'timestamp': '2025-10-02 00:33:43.592804', 'step': 12867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:43.650784', 'step': 12867, 'epoch': 2}
{'type': 'loss', 'content': 0.02516058273613453, 'timestamp': '2025-10-02 00:33:43.659274', 'step': 12868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:43.714225', 'step': 12868, 'epoch': 2}
{'type': 'loss', 'content': 0.10332043468952179, 'timestamp': '2025-10-02 00:33:43.717066', 'step': 12869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:33:43.787550', 'step': 12869, 'epoch': 2}
{'type': 'loss', 'content': 0.060300011187791824, 'timestamp': '2025-10-02 00:33:43.799397', 'step': 12870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:43.854332', 'step': 12870, 'epoch': 2}
{'type': 'loss', 'content': 0.010562241077423096, 'timestamp': '2025-10-02 00:33:43.856682', 'step': 12871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:43.911229', 'step': 12871, 'epoch': 2}
{'type': 'loss', 'content': 0.08982158452272415, 'timestamp': '2025-10-02 00:33:43.917213', 'step': 12872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:43.971906', 'step': 12872, 'epoch': 2}
{'type': 'loss', 'content': 0.06736408919095993, 'timestamp': '2025-10-02 00:33:43.977549', 'step': 12873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:44.032056', 'step': 12873, 'epoch': 2}
{'type': 'loss', 'content': 0.020715709775686264, 'timestamp': '2025-10-02 00:33:44.034372', 'step': 12874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:44.091050', 'step': 12874, 'epoch': 2}
{'type': 'loss', 'content': 0.016773099079728127, 'timestamp': '2025-10-02 00:33:44.100539', 'step': 12875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:44.156889', 'step': 12875, 'epoch': 2}
{'type': 'loss', 'content': 0.08494622260332108, 'timestamp': '2025-10-02 00:33:44.166819', 'step': 12876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:44.221106', 'step': 12876, 'epoch': 2}
{'type': 'loss', 'content': 0.03195307031273842, 'timestamp': '2025-10-02 00:33:44.223859', 'step': 12877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:44.287554', 'step': 12877, 'epoch': 2}
{'type': 'loss', 'content': 0.14230766892433167, 'timestamp': '2025-10-02 00:33:44.290019', 'step': 12878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:44.345293', 'step': 12878, 'epoch': 2}
{'type': 'loss', 'content': 0.008782259188592434, 'timestamp': '2025-10-02 00:33:44.352621', 'step': 12879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:44.406505', 'step': 12879, 'epoch': 2}
{'type': 'loss', 'content': 0.07884319871664047, 'timestamp': '2025-10-02 00:33:44.412365', 'step': 12880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:44.467636', 'step': 12880, 'epoch': 2}
{'type': 'loss', 'content': 0.04641173034906387, 'timestamp': '2025-10-02 00:33:44.470084', 'step': 12881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:44.524877', 'step': 12881, 'epoch': 2}
{'type': 'loss', 'content': 0.06199099123477936, 'timestamp': '2025-10-02 00:33:44.527550', 'step': 12882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:44.582871', 'step': 12882, 'epoch': 2}
{'type': 'loss', 'content': 0.014282616786658764, 'timestamp': '2025-10-02 00:33:44.585854', 'step': 12883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:44.640951', 'step': 12883, 'epoch': 2}
{'type': 'loss', 'content': 0.10191348195075989, 'timestamp': '2025-10-02 00:33:44.647392', 'step': 12884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:44.707066', 'step': 12884, 'epoch': 2}
{'type': 'loss', 'content': 0.10349204391241074, 'timestamp': '2025-10-02 00:33:44.709673', 'step': 12885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:44.763513', 'step': 12885, 'epoch': 2}
{'type': 'loss', 'content': 0.05619661882519722, 'timestamp': '2025-10-02 00:33:44.765687', 'step': 12886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:44.820767', 'step': 12886, 'epoch': 2}
{'type': 'loss', 'content': 0.05107645317912102, 'timestamp': '2025-10-02 00:33:44.823779', 'step': 12887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:44.878653', 'step': 12887, 'epoch': 2}
{'type': 'loss', 'content': 0.06794338673353195, 'timestamp': '2025-10-02 00:33:44.884404', 'step': 12888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:44.938629', 'step': 12888, 'epoch': 2}
{'type': 'loss', 'content': 0.06249368190765381, 'timestamp': '2025-10-02 00:33:44.941111', 'step': 12889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:44.995871', 'step': 12889, 'epoch': 2}
{'type': 'loss', 'content': 0.07031504809856415, 'timestamp': '2025-10-02 00:33:44.998846', 'step': 12890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:45.053861', 'step': 12890, 'epoch': 2}
{'type': 'loss', 'content': 0.027089498937129974, 'timestamp': '2025-10-02 00:33:45.056525', 'step': 12891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:45.110997', 'step': 12891, 'epoch': 2}
{'type': 'loss', 'content': 0.04005535691976547, 'timestamp': '2025-10-02 00:33:45.116792', 'step': 12892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:45.170661', 'step': 12892, 'epoch': 2}
{'type': 'loss', 'content': 0.005636936519294977, 'timestamp': '2025-10-02 00:33:45.173197', 'step': 12893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:45.227291', 'step': 12893, 'epoch': 2}
{'type': 'loss', 'content': 0.1508910059928894, 'timestamp': '2025-10-02 00:33:45.229697', 'step': 12894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:45.283692', 'step': 12894, 'epoch': 2}
{'type': 'loss', 'content': 0.19222475588321686, 'timestamp': '2025-10-02 00:33:45.286023', 'step': 12895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:45.339805', 'step': 12895, 'epoch': 2}
{'type': 'loss', 'content': 0.08689652383327484, 'timestamp': '2025-10-02 00:33:45.345690', 'step': 12896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:45.399726', 'step': 12896, 'epoch': 2}
{'type': 'loss', 'content': 0.08322451263666153, 'timestamp': '2025-10-02 00:33:45.402743', 'step': 12897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:45.457204', 'step': 12897, 'epoch': 2}
{'type': 'loss', 'content': 0.03410627692937851, 'timestamp': '2025-10-02 00:33:45.462936', 'step': 12898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:45.516895', 'step': 12898, 'epoch': 2}
{'type': 'loss', 'content': 0.04314834997057915, 'timestamp': '2025-10-02 00:33:45.519263', 'step': 12899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:45.577661', 'step': 12899, 'epoch': 2}
{'type': 'loss', 'content': 0.08065789937973022, 'timestamp': '2025-10-02 00:33:45.588530', 'step': 12900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:45.642330', 'step': 12900, 'epoch': 2}
{'type': 'loss', 'content': 0.017441846430301666, 'timestamp': '2025-10-02 00:33:45.648027', 'step': 12901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:45.702076', 'step': 12901, 'epoch': 2}
{'type': 'loss', 'content': 0.05682109668850899, 'timestamp': '2025-10-02 00:33:45.704422', 'step': 12902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:45.758297', 'step': 12902, 'epoch': 2}
{'type': 'loss', 'content': 0.11988171190023422, 'timestamp': '2025-10-02 00:33:45.760672', 'step': 12903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:45.815359', 'step': 12903, 'epoch': 2}
{'type': 'loss', 'content': 0.04248422756791115, 'timestamp': '2025-10-02 00:33:45.821400', 'step': 12904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:45.874861', 'step': 12904, 'epoch': 2}
{'type': 'loss', 'content': 0.12924407422542572, 'timestamp': '2025-10-02 00:33:45.880684', 'step': 12905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:45.935205', 'step': 12905, 'epoch': 2}
{'type': 'loss', 'content': 0.0772751197218895, 'timestamp': '2025-10-02 00:33:45.937289', 'step': 12906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:45.992207', 'step': 12906, 'epoch': 2}
{'type': 'loss', 'content': 0.07387041300535202, 'timestamp': '2025-10-02 00:33:45.997733', 'step': 12907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:46.056648', 'step': 12907, 'epoch': 2}
{'type': 'loss', 'content': 0.10967250913381577, 'timestamp': '2025-10-02 00:33:46.067535', 'step': 12908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:46.121132', 'step': 12908, 'epoch': 2}
{'type': 'loss', 'content': 0.10117531567811966, 'timestamp': '2025-10-02 00:33:46.128462', 'step': 12909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:46.181849', 'step': 12909, 'epoch': 2}
{'type': 'loss', 'content': 0.15905684232711792, 'timestamp': '2025-10-02 00:33:46.184842', 'step': 12910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:46.238462', 'step': 12910, 'epoch': 2}
{'type': 'loss', 'content': 0.1802462637424469, 'timestamp': '2025-10-02 00:33:46.240687', 'step': 12911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:46.295323', 'step': 12911, 'epoch': 2}
{'type': 'loss', 'content': 0.028683189302682877, 'timestamp': '2025-10-02 00:33:46.301691', 'step': 12912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:46.355343', 'step': 12912, 'epoch': 2}
{'type': 'loss', 'content': 0.1040804460644722, 'timestamp': '2025-10-02 00:33:46.357388', 'step': 12913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:46.412605', 'step': 12913, 'epoch': 2}
{'type': 'loss', 'content': 0.07447963207960129, 'timestamp': '2025-10-02 00:33:46.422133', 'step': 12914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:33:46.485152', 'step': 12914, 'epoch': 2}
{'type': 'loss', 'content': 0.028901007026433945, 'timestamp': '2025-10-02 00:33:46.495988', 'step': 12915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:46.555722', 'step': 12915, 'epoch': 2}
{'type': 'loss', 'content': 0.07149605453014374, 'timestamp': '2025-10-02 00:33:46.566706', 'step': 12916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:46.620295', 'step': 12916, 'epoch': 2}
{'type': 'loss', 'content': 0.07110145688056946, 'timestamp': '2025-10-02 00:33:46.622943', 'step': 12917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:46.677017', 'step': 12917, 'epoch': 2}
{'type': 'loss', 'content': 0.12058984488248825, 'timestamp': '2025-10-02 00:33:46.679630', 'step': 12918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:46.735753', 'step': 12918, 'epoch': 2}
{'type': 'loss', 'content': 0.032709941267967224, 'timestamp': '2025-10-02 00:33:46.743506', 'step': 12919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:46.803653', 'step': 12919, 'epoch': 2}
{'type': 'loss', 'content': 0.049380529671907425, 'timestamp': '2025-10-02 00:33:46.814562', 'step': 12920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:46.866980', 'step': 12920, 'epoch': 2}
{'type': 'loss', 'content': 0.14835675060749054, 'timestamp': '2025-10-02 00:33:46.869311', 'step': 12921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:46.923644', 'step': 12921, 'epoch': 2}
{'type': 'loss', 'content': 0.04705001413822174, 'timestamp': '2025-10-02 00:33:46.931379', 'step': 12922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:46.986500', 'step': 12922, 'epoch': 2}
{'type': 'loss', 'content': 0.07199005037546158, 'timestamp': '2025-10-02 00:33:46.989041', 'step': 12923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:33:47.065635', 'step': 12923, 'epoch': 2}
{'type': 'loss', 'content': 0.03799768164753914, 'timestamp': '2025-10-02 00:33:47.080270', 'step': 12924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:47.134876', 'step': 12924, 'epoch': 2}
{'type': 'loss', 'content': 0.10258094221353531, 'timestamp': '2025-10-02 00:33:47.137167', 'step': 12925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:47.192697', 'step': 12925, 'epoch': 2}
{'type': 'loss', 'content': 0.049267325550317764, 'timestamp': '2025-10-02 00:33:47.198783', 'step': 12926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:47.253538', 'step': 12926, 'epoch': 2}
{'type': 'loss', 'content': 0.011532708071172237, 'timestamp': '2025-10-02 00:33:47.256962', 'step': 12927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:47.312620', 'step': 12927, 'epoch': 2}
{'type': 'loss', 'content': 0.06685718894004822, 'timestamp': '2025-10-02 00:33:47.318721', 'step': 12928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:47.372250', 'step': 12928, 'epoch': 2}
{'type': 'loss', 'content': 0.15855355560779572, 'timestamp': '2025-10-02 00:33:47.374681', 'step': 12929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:47.430505', 'step': 12929, 'epoch': 2}
{'type': 'loss', 'content': 0.05995820835232735, 'timestamp': '2025-10-02 00:33:47.433131', 'step': 12930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:47.487585', 'step': 12930, 'epoch': 2}
{'type': 'loss', 'content': 0.0907532349228859, 'timestamp': '2025-10-02 00:33:47.490783', 'step': 12931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:47.547039', 'step': 12931, 'epoch': 2}
{'type': 'loss', 'content': 0.049572963267564774, 'timestamp': '2025-10-02 00:33:47.557125', 'step': 12932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:47.611084', 'step': 12932, 'epoch': 2}
{'type': 'loss', 'content': 0.08779240399599075, 'timestamp': '2025-10-02 00:33:47.613866', 'step': 12933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:47.668552', 'step': 12933, 'epoch': 2}
{'type': 'loss', 'content': 0.10507574677467346, 'timestamp': '2025-10-02 00:33:47.677843', 'step': 12934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:47.733625', 'step': 12934, 'epoch': 2}
{'type': 'loss', 'content': 0.041914135217666626, 'timestamp': '2025-10-02 00:33:47.739470', 'step': 12935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:47.794338', 'step': 12935, 'epoch': 2}
{'type': 'loss', 'content': 0.10282726585865021, 'timestamp': '2025-10-02 00:33:47.800244', 'step': 12936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:47.854666', 'step': 12936, 'epoch': 2}
{'type': 'loss', 'content': 0.04836554080247879, 'timestamp': '2025-10-02 00:33:47.864890', 'step': 12937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:47.920775', 'step': 12937, 'epoch': 2}
{'type': 'loss', 'content': 0.030169278383255005, 'timestamp': '2025-10-02 00:33:47.930158', 'step': 12938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:47.985118', 'step': 12938, 'epoch': 2}
{'type': 'loss', 'content': 0.05533202737569809, 'timestamp': '2025-10-02 00:33:47.987558', 'step': 12939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:48.043608', 'step': 12939, 'epoch': 2}
{'type': 'loss', 'content': 0.030199456959962845, 'timestamp': '2025-10-02 00:33:48.049651', 'step': 12940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:48.108163', 'step': 12940, 'epoch': 2}
{'type': 'loss', 'content': 0.05698510631918907, 'timestamp': '2025-10-02 00:33:48.114312', 'step': 12941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:48.169208', 'step': 12941, 'epoch': 2}
{'type': 'loss', 'content': 0.0057833814062178135, 'timestamp': '2025-10-02 00:33:48.171697', 'step': 12942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:48.226994', 'step': 12942, 'epoch': 2}
{'type': 'loss', 'content': 0.09318022429943085, 'timestamp': '2025-10-02 00:33:48.229617', 'step': 12943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:48.284183', 'step': 12943, 'epoch': 2}
{'type': 'loss', 'content': 0.11706694960594177, 'timestamp': '2025-10-02 00:33:48.290288', 'step': 12944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:48.343125', 'step': 12944, 'epoch': 2}
{'type': 'loss', 'content': 0.06061287596821785, 'timestamp': '2025-10-02 00:33:48.345415', 'step': 12945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:48.399649', 'step': 12945, 'epoch': 2}
{'type': 'loss', 'content': 0.06457877904176712, 'timestamp': '2025-10-02 00:33:48.402165', 'step': 12946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:48.455845', 'step': 12946, 'epoch': 2}
{'type': 'loss', 'content': 0.12585531175136566, 'timestamp': '2025-10-02 00:33:48.458267', 'step': 12947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:48.512166', 'step': 12947, 'epoch': 2}
{'type': 'loss', 'content': 0.016514185816049576, 'timestamp': '2025-10-02 00:33:48.518252', 'step': 12948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:33:48.599047', 'step': 12948, 'epoch': 2}
{'type': 'loss', 'content': 0.02484886907041073, 'timestamp': '2025-10-02 00:33:48.615413', 'step': 12949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:48.678981', 'step': 12949, 'epoch': 2}
{'type': 'loss', 'content': 0.021977713331580162, 'timestamp': '2025-10-02 00:33:48.689491', 'step': 12950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:48.749419', 'step': 12950, 'epoch': 2}
{'type': 'loss', 'content': 0.13438186049461365, 'timestamp': '2025-10-02 00:33:48.758965', 'step': 12951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:33:48.814495', 'step': 12951, 'epoch': 2}
{'type': 'loss', 'content': 0.04696616530418396, 'timestamp': '2025-10-02 00:33:48.821148', 'step': 12952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:48.876324', 'step': 12952, 'epoch': 2}
{'type': 'loss', 'content': 0.0272382739931345, 'timestamp': '2025-10-02 00:33:48.884078', 'step': 12953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:48.941996', 'step': 12953, 'epoch': 2}
{'type': 'loss', 'content': 0.11453738808631897, 'timestamp': '2025-10-02 00:33:48.945384', 'step': 12954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:49.001223', 'step': 12954, 'epoch': 2}
{'type': 'loss', 'content': 0.10310396552085876, 'timestamp': '2025-10-02 00:33:49.003875', 'step': 12955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:49.060487', 'step': 12955, 'epoch': 2}
{'type': 'loss', 'content': 0.1378251165151596, 'timestamp': '2025-10-02 00:33:49.066383', 'step': 12956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:49.121066', 'step': 12956, 'epoch': 2}
{'type': 'loss', 'content': 0.05921103432774544, 'timestamp': '2025-10-02 00:33:49.128924', 'step': 12957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:49.187100', 'step': 12957, 'epoch': 2}
{'type': 'loss', 'content': 0.25695618987083435, 'timestamp': '2025-10-02 00:33:49.190315', 'step': 12958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:49.249369', 'step': 12958, 'epoch': 2}
{'type': 'loss', 'content': 0.1306934505701065, 'timestamp': '2025-10-02 00:33:49.252671', 'step': 12959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:49.307496', 'step': 12959, 'epoch': 2}
{'type': 'loss', 'content': 0.04272285848855972, 'timestamp': '2025-10-02 00:33:49.317566', 'step': 12960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:49.373282', 'step': 12960, 'epoch': 2}
{'type': 'loss', 'content': 0.02075718715786934, 'timestamp': '2025-10-02 00:33:49.381092', 'step': 12961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:49.437132', 'step': 12961, 'epoch': 2}
{'type': 'loss', 'content': 0.09702016413211823, 'timestamp': '2025-10-02 00:33:49.446506', 'step': 12962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:49.504380', 'step': 12962, 'epoch': 2}
{'type': 'loss', 'content': 0.07584168761968613, 'timestamp': '2025-10-02 00:33:49.506812', 'step': 12963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:49.561268', 'step': 12963, 'epoch': 2}
{'type': 'loss', 'content': 0.128379687666893, 'timestamp': '2025-10-02 00:33:49.567288', 'step': 12964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:49.629321', 'step': 12964, 'epoch': 2}
{'type': 'loss', 'content': 0.0074264672584831715, 'timestamp': '2025-10-02 00:33:49.640642', 'step': 12965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:49.712177', 'step': 12965, 'epoch': 2}
{'type': 'loss', 'content': 0.03560740873217583, 'timestamp': '2025-10-02 00:33:49.718118', 'step': 12966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:49.775746', 'step': 12966, 'epoch': 2}
{'type': 'loss', 'content': 0.03688810393214226, 'timestamp': '2025-10-02 00:33:49.779621', 'step': 12967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:49.838291', 'step': 12967, 'epoch': 2}
{'type': 'loss', 'content': 0.04981992766261101, 'timestamp': '2025-10-02 00:33:49.844755', 'step': 12968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:49.901091', 'step': 12968, 'epoch': 2}
{'type': 'loss', 'content': 0.06709947437047958, 'timestamp': '2025-10-02 00:33:49.907187', 'step': 12969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:33:49.979758', 'step': 12969, 'epoch': 2}
{'type': 'loss', 'content': 0.017251545563340187, 'timestamp': '2025-10-02 00:33:49.992096', 'step': 12970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:50.048615', 'step': 12970, 'epoch': 2}
{'type': 'loss', 'content': 0.1611458659172058, 'timestamp': '2025-10-02 00:33:50.052391', 'step': 12971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:33:50.127282', 'step': 12971, 'epoch': 2}
{'type': 'loss', 'content': 0.02082245796918869, 'timestamp': '2025-10-02 00:33:50.141502', 'step': 12972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:50.199920', 'step': 12972, 'epoch': 2}
{'type': 'loss', 'content': 0.2137928456068039, 'timestamp': '2025-10-02 00:33:50.202346', 'step': 12973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:33:50.266900', 'step': 12973, 'epoch': 2}
{'type': 'loss', 'content': 0.055731236934661865, 'timestamp': '2025-10-02 00:33:50.277767', 'step': 12974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:50.333114', 'step': 12974, 'epoch': 2}
{'type': 'loss', 'content': 0.08768296986818314, 'timestamp': '2025-10-02 00:33:50.335338', 'step': 12975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:50.394965', 'step': 12975, 'epoch': 2}
{'type': 'loss', 'content': 0.07919374853372574, 'timestamp': '2025-10-02 00:33:50.402010', 'step': 12976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:50.459201', 'step': 12976, 'epoch': 2}
{'type': 'loss', 'content': 0.09339240938425064, 'timestamp': '2025-10-02 00:33:50.461476', 'step': 12977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:50.515593', 'step': 12977, 'epoch': 2}
{'type': 'loss', 'content': 0.09319724887609482, 'timestamp': '2025-10-02 00:33:50.518388', 'step': 12978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:50.575337', 'step': 12978, 'epoch': 2}
{'type': 'loss', 'content': 0.030517451465129852, 'timestamp': '2025-10-02 00:33:50.577796', 'step': 12979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:50.640390', 'step': 12979, 'epoch': 2}
{'type': 'loss', 'content': 0.10619083791971207, 'timestamp': '2025-10-02 00:33:50.647377', 'step': 12980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:50.716281', 'step': 12980, 'epoch': 2}
{'type': 'loss', 'content': 0.05069936811923981, 'timestamp': '2025-10-02 00:33:50.727610', 'step': 12981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:50.785909', 'step': 12981, 'epoch': 2}
{'type': 'loss', 'content': 0.0939045399427414, 'timestamp': '2025-10-02 00:33:50.788989', 'step': 12982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:50.845166', 'step': 12982, 'epoch': 2}
{'type': 'loss', 'content': 0.18019942939281464, 'timestamp': '2025-10-02 00:33:50.848853', 'step': 12983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:50.905985', 'step': 12983, 'epoch': 2}
{'type': 'loss', 'content': 0.07702488452196121, 'timestamp': '2025-10-02 00:33:50.911913', 'step': 12984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:50.966104', 'step': 12984, 'epoch': 2}
{'type': 'loss', 'content': 0.08635984361171722, 'timestamp': '2025-10-02 00:33:50.976338', 'step': 12985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:51.033150', 'step': 12985, 'epoch': 2}
{'type': 'loss', 'content': 0.08256920427083969, 'timestamp': '2025-10-02 00:33:51.035527', 'step': 12986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:51.089196', 'step': 12986, 'epoch': 2}
{'type': 'loss', 'content': 0.06782642751932144, 'timestamp': '2025-10-02 00:33:51.091699', 'step': 12987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:51.146068', 'step': 12987, 'epoch': 2}
{'type': 'loss', 'content': 0.07679466903209686, 'timestamp': '2025-10-02 00:33:51.152029', 'step': 12988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:51.206763', 'step': 12988, 'epoch': 2}
{'type': 'loss', 'content': 0.15062417089939117, 'timestamp': '2025-10-02 00:33:51.209417', 'step': 12989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:51.264242', 'step': 12989, 'epoch': 2}
{'type': 'loss', 'content': 0.06742917746305466, 'timestamp': '2025-10-02 00:33:51.266691', 'step': 12990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:33:51.337193', 'step': 12990, 'epoch': 2}
{'type': 'loss', 'content': 0.013080443255603313, 'timestamp': '2025-10-02 00:33:51.349535', 'step': 12991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:51.404686', 'step': 12991, 'epoch': 2}
{'type': 'loss', 'content': 0.11359857022762299, 'timestamp': '2025-10-02 00:33:51.410771', 'step': 12992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:51.466292', 'step': 12992, 'epoch': 2}
{'type': 'loss', 'content': 0.03479985147714615, 'timestamp': '2025-10-02 00:33:51.474047', 'step': 12993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:33:51.537323', 'step': 12993, 'epoch': 2}
{'type': 'loss', 'content': 0.010729179717600346, 'timestamp': '2025-10-02 00:33:51.547747', 'step': 12994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:51.603126', 'step': 12994, 'epoch': 2}
{'type': 'loss', 'content': 0.03765293210744858, 'timestamp': '2025-10-02 00:33:51.605796', 'step': 12995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:51.663342', 'step': 12995, 'epoch': 2}
{'type': 'loss', 'content': 0.03505126014351845, 'timestamp': '2025-10-02 00:33:51.672948', 'step': 12996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:51.727354', 'step': 12996, 'epoch': 2}
{'type': 'loss', 'content': 0.10822337120771408, 'timestamp': '2025-10-02 00:33:51.733360', 'step': 12997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:51.787931', 'step': 12997, 'epoch': 2}
{'type': 'loss', 'content': 0.08202395588159561, 'timestamp': '2025-10-02 00:33:51.793840', 'step': 12998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:51.848545', 'step': 12998, 'epoch': 2}
{'type': 'loss', 'content': 0.1829649657011032, 'timestamp': '2025-10-02 00:33:51.850942', 'step': 12999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:51.904594', 'step': 12999, 'epoch': 2}
{'type': 'loss', 'content': 0.03151985630393028, 'timestamp': '2025-10-02 00:33:51.910746', 'step': 13000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 13000', 'timestamp': '2025-10-02 00:33:52.318711', 'step': 13000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:52.378494', 'step': 13000, 'epoch': 2}
{'type': 'loss', 'content': 0.031129423528909683, 'timestamp': '2025-10-02 00:33:52.385119', 'step': 13001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:52.443536', 'step': 13001, 'epoch': 2}
{'type': 'loss', 'content': 0.051500726491212845, 'timestamp': '2025-10-02 00:33:52.446389', 'step': 13002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:52.500147', 'step': 13002, 'epoch': 2}
{'type': 'loss', 'content': 0.2538149356842041, 'timestamp': '2025-10-02 00:33:52.502895', 'step': 13003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:52.558242', 'step': 13003, 'epoch': 2}
{'type': 'loss', 'content': 0.14977781474590302, 'timestamp': '2025-10-02 00:33:52.564765', 'step': 13004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:33:52.618584', 'step': 13004, 'epoch': 2}
{'type': 'loss', 'content': 0.08126082271337509, 'timestamp': '2025-10-02 00:33:52.621188', 'step': 13005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:52.678262', 'step': 13005, 'epoch': 2}
{'type': 'loss', 'content': 0.010057073086500168, 'timestamp': '2025-10-02 00:33:52.687775', 'step': 13006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:52.750764', 'step': 13006, 'epoch': 2}
{'type': 'loss', 'content': 0.07333233207464218, 'timestamp': '2025-10-02 00:33:52.761354', 'step': 13007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:33:52.831823', 'step': 13007, 'epoch': 2}
{'type': 'loss', 'content': 0.11726626008749008, 'timestamp': '2025-10-02 00:33:52.845025', 'step': 13008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:33:52.905947', 'step': 13008, 'epoch': 2}
{'type': 'loss', 'content': 0.08278399705886841, 'timestamp': '2025-10-02 00:33:52.917498', 'step': 13009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:52.979297', 'step': 13009, 'epoch': 2}
{'type': 'loss', 'content': 0.03406677767634392, 'timestamp': '2025-10-02 00:33:52.984343', 'step': 13010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:53.038959', 'step': 13010, 'epoch': 2}
{'type': 'loss', 'content': 0.031236151233315468, 'timestamp': '2025-10-02 00:33:53.044989', 'step': 13011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:53.104244', 'step': 13011, 'epoch': 2}
{'type': 'loss', 'content': 0.057567134499549866, 'timestamp': '2025-10-02 00:33:53.110473', 'step': 13012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:53.163775', 'step': 13012, 'epoch': 2}
{'type': 'loss', 'content': 0.09410407394170761, 'timestamp': '2025-10-02 00:33:53.166220', 'step': 13013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:53.221460', 'step': 13013, 'epoch': 2}
{'type': 'loss', 'content': 0.03760700672864914, 'timestamp': '2025-10-02 00:33:53.223861', 'step': 13014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:53.279847', 'step': 13014, 'epoch': 2}
{'type': 'loss', 'content': 0.08810561895370483, 'timestamp': '2025-10-02 00:33:53.282336', 'step': 13015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:53.339361', 'step': 13015, 'epoch': 2}
{'type': 'loss', 'content': 0.034849926829338074, 'timestamp': '2025-10-02 00:33:53.349496', 'step': 13016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:53.404639', 'step': 13016, 'epoch': 2}
{'type': 'loss', 'content': 0.11248810589313507, 'timestamp': '2025-10-02 00:33:53.414863', 'step': 13017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:53.469657', 'step': 13017, 'epoch': 2}
{'type': 'loss', 'content': 0.06117399409413338, 'timestamp': '2025-10-02 00:33:53.472335', 'step': 13018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:53.527207', 'step': 13018, 'epoch': 2}
{'type': 'loss', 'content': 0.12607474625110626, 'timestamp': '2025-10-02 00:33:53.529481', 'step': 13019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:53.585807', 'step': 13019, 'epoch': 2}
{'type': 'loss', 'content': 0.08002401143312454, 'timestamp': '2025-10-02 00:33:53.595955', 'step': 13020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:53.650328', 'step': 13020, 'epoch': 2}
{'type': 'loss', 'content': 0.047310445457696915, 'timestamp': '2025-10-02 00:33:53.659669', 'step': 13021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:53.714587', 'step': 13021, 'epoch': 2}
{'type': 'loss', 'content': 0.13519874215126038, 'timestamp': '2025-10-02 00:33:53.716874', 'step': 13022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:53.776363', 'step': 13022, 'epoch': 2}
{'type': 'loss', 'content': 0.05053316429257393, 'timestamp': '2025-10-02 00:33:53.786526', 'step': 13023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:33:53.856392', 'step': 13023, 'epoch': 2}
{'type': 'loss', 'content': 0.03264174237847328, 'timestamp': '2025-10-02 00:33:53.869617', 'step': 13024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:33:53.925721', 'step': 13024, 'epoch': 2}
{'type': 'loss', 'content': 0.12084761261940002, 'timestamp': '2025-10-02 00:33:53.928378', 'step': 13025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:53.983344', 'step': 13025, 'epoch': 2}
{'type': 'loss', 'content': 0.07264772802591324, 'timestamp': '2025-10-02 00:33:53.986109', 'step': 13026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:54.045063', 'step': 13026, 'epoch': 2}
{'type': 'loss', 'content': 0.03748811036348343, 'timestamp': '2025-10-02 00:33:54.055234', 'step': 13027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:54.109634', 'step': 13027, 'epoch': 2}
{'type': 'loss', 'content': 0.03716457262635231, 'timestamp': '2025-10-02 00:33:54.119705', 'step': 13028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:54.177894', 'step': 13028, 'epoch': 2}
{'type': 'loss', 'content': 0.012101959437131882, 'timestamp': '2025-10-02 00:33:54.188888', 'step': 13029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:54.246263', 'step': 13029, 'epoch': 2}
{'type': 'loss', 'content': 0.0463956780731678, 'timestamp': '2025-10-02 00:33:54.255630', 'step': 13030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:54.310715', 'step': 13030, 'epoch': 2}
{'type': 'loss', 'content': 0.1526477336883545, 'timestamp': '2025-10-02 00:33:54.313107', 'step': 13031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:54.368082', 'step': 13031, 'epoch': 2}
{'type': 'loss', 'content': 0.0397387370467186, 'timestamp': '2025-10-02 00:33:54.374930', 'step': 13032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:54.429284', 'step': 13032, 'epoch': 2}
{'type': 'loss', 'content': 0.05438656732439995, 'timestamp': '2025-10-02 00:33:54.431867', 'step': 13033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:54.487058', 'step': 13033, 'epoch': 2}
{'type': 'loss', 'content': 0.12470874935388565, 'timestamp': '2025-10-02 00:33:54.489420', 'step': 13034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:54.544368', 'step': 13034, 'epoch': 2}
{'type': 'loss', 'content': 0.055124323815107346, 'timestamp': '2025-10-02 00:33:54.547300', 'step': 13035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:54.602245', 'step': 13035, 'epoch': 2}
{'type': 'loss', 'content': 0.0330391488969326, 'timestamp': '2025-10-02 00:33:54.608944', 'step': 13036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:54.663901', 'step': 13036, 'epoch': 2}
{'type': 'loss', 'content': 0.04625523090362549, 'timestamp': '2025-10-02 00:33:54.674168', 'step': 13037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:54.730560', 'step': 13037, 'epoch': 2}
{'type': 'loss', 'content': 0.042142078280448914, 'timestamp': '2025-10-02 00:33:54.732977', 'step': 13038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:54.787778', 'step': 13038, 'epoch': 2}
{'type': 'loss', 'content': 0.0815323069691658, 'timestamp': '2025-10-02 00:33:54.790273', 'step': 13039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:54.844866', 'step': 13039, 'epoch': 2}
{'type': 'loss', 'content': 0.09135974943637848, 'timestamp': '2025-10-02 00:33:54.850827', 'step': 13040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:54.904931', 'step': 13040, 'epoch': 2}
{'type': 'loss', 'content': 0.026735736057162285, 'timestamp': '2025-10-02 00:33:54.907456', 'step': 13041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:54.961647', 'step': 13041, 'epoch': 2}
{'type': 'loss', 'content': 0.05486796051263809, 'timestamp': '2025-10-02 00:33:54.970990', 'step': 13042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:55.026246', 'step': 13042, 'epoch': 2}
{'type': 'loss', 'content': 0.0275861918926239, 'timestamp': '2025-10-02 00:33:55.033926', 'step': 13043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:55.088160', 'step': 13043, 'epoch': 2}
{'type': 'loss', 'content': 0.03301657736301422, 'timestamp': '2025-10-02 00:33:55.094205', 'step': 13044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:55.148334', 'step': 13044, 'epoch': 2}
{'type': 'loss', 'content': 0.06972108781337738, 'timestamp': '2025-10-02 00:33:55.150625', 'step': 13045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:55.205640', 'step': 13045, 'epoch': 2}
{'type': 'loss', 'content': 0.05757669731974602, 'timestamp': '2025-10-02 00:33:55.214973', 'step': 13046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:55.269245', 'step': 13046, 'epoch': 2}
{'type': 'loss', 'content': 0.10736651718616486, 'timestamp': '2025-10-02 00:33:55.272076', 'step': 13047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:55.327752', 'step': 13047, 'epoch': 2}
{'type': 'loss', 'content': 0.20285002887248993, 'timestamp': '2025-10-02 00:33:55.334124', 'step': 13048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:55.390370', 'step': 13048, 'epoch': 2}
{'type': 'loss', 'content': 0.03830936551094055, 'timestamp': '2025-10-02 00:33:55.392938', 'step': 13049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:55.446885', 'step': 13049, 'epoch': 2}
{'type': 'loss', 'content': 0.11589516699314117, 'timestamp': '2025-10-02 00:33:55.449236', 'step': 13050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:55.503404', 'step': 13050, 'epoch': 2}
{'type': 'loss', 'content': 0.07406388223171234, 'timestamp': '2025-10-02 00:33:55.505411', 'step': 13051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:55.559267', 'step': 13051, 'epoch': 2}
{'type': 'loss', 'content': 0.09227455407381058, 'timestamp': '2025-10-02 00:33:55.564964', 'step': 13052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:55.620939', 'step': 13052, 'epoch': 2}
{'type': 'loss', 'content': 0.09366446733474731, 'timestamp': '2025-10-02 00:33:55.623143', 'step': 13053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:55.677217', 'step': 13053, 'epoch': 2}
{'type': 'loss', 'content': 0.02820397913455963, 'timestamp': '2025-10-02 00:33:55.679629', 'step': 13054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:55.736249', 'step': 13054, 'epoch': 2}
{'type': 'loss', 'content': 0.04534604772925377, 'timestamp': '2025-10-02 00:33:55.745795', 'step': 13055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:55.799915', 'step': 13055, 'epoch': 2}
{'type': 'loss', 'content': 0.175801619887352, 'timestamp': '2025-10-02 00:33:55.806137', 'step': 13056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:55.859768', 'step': 13056, 'epoch': 2}
{'type': 'loss', 'content': 0.14610224962234497, 'timestamp': '2025-10-02 00:33:55.862195', 'step': 13057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:55.916580', 'step': 13057, 'epoch': 2}
{'type': 'loss', 'content': 0.12886109948158264, 'timestamp': '2025-10-02 00:33:55.919377', 'step': 13058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:55.974070', 'step': 13058, 'epoch': 2}
{'type': 'loss', 'content': 0.08069700747728348, 'timestamp': '2025-10-02 00:33:55.976861', 'step': 13059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:56.031339', 'step': 13059, 'epoch': 2}
{'type': 'loss', 'content': 0.04648503288626671, 'timestamp': '2025-10-02 00:33:56.037189', 'step': 13060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:56.091991', 'step': 13060, 'epoch': 2}
{'type': 'loss', 'content': 0.07200952619314194, 'timestamp': '2025-10-02 00:33:56.099742', 'step': 13061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:56.154948', 'step': 13061, 'epoch': 2}
{'type': 'loss', 'content': 0.18232226371765137, 'timestamp': '2025-10-02 00:33:56.157083', 'step': 13062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:56.217572', 'step': 13062, 'epoch': 2}
{'type': 'loss', 'content': 0.038184016942977905, 'timestamp': '2025-10-02 00:33:56.227760', 'step': 13063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:56.282607', 'step': 13063, 'epoch': 2}
{'type': 'loss', 'content': 0.041462358087301254, 'timestamp': '2025-10-02 00:33:56.288590', 'step': 13064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:56.343653', 'step': 13064, 'epoch': 2}
{'type': 'loss', 'content': 0.026359518989920616, 'timestamp': '2025-10-02 00:33:56.353932', 'step': 13065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:56.407980', 'step': 13065, 'epoch': 2}
{'type': 'loss', 'content': 0.04059332609176636, 'timestamp': '2025-10-02 00:33:56.417319', 'step': 13066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:56.472242', 'step': 13066, 'epoch': 2}
{'type': 'loss', 'content': 0.10997206717729568, 'timestamp': '2025-10-02 00:33:56.474722', 'step': 13067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:56.528603', 'step': 13067, 'epoch': 2}
{'type': 'loss', 'content': 0.18652796745300293, 'timestamp': '2025-10-02 00:33:56.534516', 'step': 13068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:56.588891', 'step': 13068, 'epoch': 2}
{'type': 'loss', 'content': 0.043179742991924286, 'timestamp': '2025-10-02 00:33:56.595027', 'step': 13069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:56.651900', 'step': 13069, 'epoch': 2}
{'type': 'loss', 'content': 0.08225616067647934, 'timestamp': '2025-10-02 00:33:56.661483', 'step': 13070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:56.715499', 'step': 13070, 'epoch': 2}
{'type': 'loss', 'content': 0.13221155107021332, 'timestamp': '2025-10-02 00:33:56.717799', 'step': 13071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:56.771995', 'step': 13071, 'epoch': 2}
{'type': 'loss', 'content': 0.09488311409950256, 'timestamp': '2025-10-02 00:33:56.777748', 'step': 13072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:56.831991', 'step': 13072, 'epoch': 2}
{'type': 'loss', 'content': 0.08225884288549423, 'timestamp': '2025-10-02 00:33:56.834227', 'step': 13073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:56.888495', 'step': 13073, 'epoch': 2}
{'type': 'loss', 'content': 0.07205366343259811, 'timestamp': '2025-10-02 00:33:56.894530', 'step': 13074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:56.951872', 'step': 13074, 'epoch': 2}
{'type': 'loss', 'content': 0.0719185397028923, 'timestamp': '2025-10-02 00:33:56.959479', 'step': 13075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:57.014113', 'step': 13075, 'epoch': 2}
{'type': 'loss', 'content': 0.017070157453417778, 'timestamp': '2025-10-02 00:33:57.024272', 'step': 13076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:57.079173', 'step': 13076, 'epoch': 2}
{'type': 'loss', 'content': 0.0965811088681221, 'timestamp': '2025-10-02 00:33:57.081725', 'step': 13077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:57.137373', 'step': 13077, 'epoch': 2}
{'type': 'loss', 'content': 0.04950140416622162, 'timestamp': '2025-10-02 00:33:57.139764', 'step': 13078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:57.194940', 'step': 13078, 'epoch': 2}
{'type': 'loss', 'content': 0.03657984361052513, 'timestamp': '2025-10-02 00:33:57.204291', 'step': 13079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:57.259751', 'step': 13079, 'epoch': 2}
{'type': 'loss', 'content': 0.1254427134990692, 'timestamp': '2025-10-02 00:33:57.265818', 'step': 13080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:57.319610', 'step': 13080, 'epoch': 2}
{'type': 'loss', 'content': 0.04061619192361832, 'timestamp': '2025-10-02 00:33:57.325677', 'step': 13081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:57.380247', 'step': 13081, 'epoch': 2}
{'type': 'loss', 'content': 0.16866753995418549, 'timestamp': '2025-10-02 00:33:57.382625', 'step': 13082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:57.437338', 'step': 13082, 'epoch': 2}
{'type': 'loss', 'content': 0.07844150066375732, 'timestamp': '2025-10-02 00:33:57.440108', 'step': 13083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:57.495417', 'step': 13083, 'epoch': 2}
{'type': 'loss', 'content': 0.12193959206342697, 'timestamp': '2025-10-02 00:33:57.501246', 'step': 13084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:57.554286', 'step': 13084, 'epoch': 2}
{'type': 'loss', 'content': 0.13171790540218353, 'timestamp': '2025-10-02 00:33:57.556286', 'step': 13085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:57.610288', 'step': 13085, 'epoch': 2}
{'type': 'loss', 'content': 0.07053054869174957, 'timestamp': '2025-10-02 00:33:57.612843', 'step': 13086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:57.668412', 'step': 13086, 'epoch': 2}
{'type': 'loss', 'content': 0.08966376632452011, 'timestamp': '2025-10-02 00:33:57.674408', 'step': 13087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:33:57.731182', 'step': 13087, 'epoch': 2}
{'type': 'loss', 'content': 0.03141995891928673, 'timestamp': '2025-10-02 00:33:57.741450', 'step': 13088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:57.800219', 'step': 13088, 'epoch': 2}
{'type': 'loss', 'content': 0.08465256541967392, 'timestamp': '2025-10-02 00:33:57.806428', 'step': 13089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:33:57.861741', 'step': 13089, 'epoch': 2}
{'type': 'loss', 'content': 0.06063114106655121, 'timestamp': '2025-10-02 00:33:57.864816', 'step': 13090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:57.921179', 'step': 13090, 'epoch': 2}
{'type': 'loss', 'content': 0.13065673410892487, 'timestamp': '2025-10-02 00:33:57.923293', 'step': 13091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:57.979384', 'step': 13091, 'epoch': 2}
{'type': 'loss', 'content': 0.04163065552711487, 'timestamp': '2025-10-02 00:33:57.989528', 'step': 13092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:58.045449', 'step': 13092, 'epoch': 2}
{'type': 'loss', 'content': 0.04308563098311424, 'timestamp': '2025-10-02 00:33:58.048931', 'step': 13093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:58.104808', 'step': 13093, 'epoch': 2}
{'type': 'loss', 'content': 0.1449970155954361, 'timestamp': '2025-10-02 00:33:58.107910', 'step': 13094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:33:58.163493', 'step': 13094, 'epoch': 2}
{'type': 'loss', 'content': 0.06653482466936111, 'timestamp': '2025-10-02 00:33:58.165646', 'step': 13095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:33:58.222516', 'step': 13095, 'epoch': 2}
{'type': 'loss', 'content': 0.016847314313054085, 'timestamp': '2025-10-02 00:33:58.229076', 'step': 13096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:58.284926', 'step': 13096, 'epoch': 2}
{'type': 'loss', 'content': 0.17464648187160492, 'timestamp': '2025-10-02 00:33:58.287922', 'step': 13097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:58.344505', 'step': 13097, 'epoch': 2}
{'type': 'loss', 'content': 0.1280548870563507, 'timestamp': '2025-10-02 00:33:58.347513', 'step': 13098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:58.403256', 'step': 13098, 'epoch': 2}
{'type': 'loss', 'content': 0.14180853962898254, 'timestamp': '2025-10-02 00:33:58.405722', 'step': 13099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:58.462334', 'step': 13099, 'epoch': 2}
{'type': 'loss', 'content': 0.055790916085243225, 'timestamp': '2025-10-02 00:33:58.469572', 'step': 13100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:58.527774', 'step': 13100, 'epoch': 2}
{'type': 'loss', 'content': 0.03994886204600334, 'timestamp': '2025-10-02 00:33:58.533851', 'step': 13101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:58.590285', 'step': 13101, 'epoch': 2}
{'type': 'loss', 'content': 0.025089291855692863, 'timestamp': '2025-10-02 00:33:58.599658', 'step': 13102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:58.654603', 'step': 13102, 'epoch': 2}
{'type': 'loss', 'content': 0.07279641181230545, 'timestamp': '2025-10-02 00:33:58.657816', 'step': 13103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:58.713584', 'step': 13103, 'epoch': 2}
{'type': 'loss', 'content': 0.09241268783807755, 'timestamp': '2025-10-02 00:33:58.720401', 'step': 13104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:33:58.778917', 'step': 13104, 'epoch': 2}
{'type': 'loss', 'content': 0.0848943218588829, 'timestamp': '2025-10-02 00:33:58.789941', 'step': 13105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:33:58.858758', 'step': 13105, 'epoch': 2}
{'type': 'loss', 'content': 0.092884860932827, 'timestamp': '2025-10-02 00:33:58.861201', 'step': 13106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:58.917431', 'step': 13106, 'epoch': 2}
{'type': 'loss', 'content': 0.05921556055545807, 'timestamp': '2025-10-02 00:33:58.924931', 'step': 13107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:58.981460', 'step': 13107, 'epoch': 2}
{'type': 'loss', 'content': 0.05783560872077942, 'timestamp': '2025-10-02 00:33:58.987852', 'step': 13108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:59.044427', 'step': 13108, 'epoch': 2}
{'type': 'loss', 'content': 0.20226715505123138, 'timestamp': '2025-10-02 00:33:59.047146', 'step': 13109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:59.104298', 'step': 13109, 'epoch': 2}
{'type': 'loss', 'content': 0.14404632151126862, 'timestamp': '2025-10-02 00:33:59.106499', 'step': 13110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:59.163843', 'step': 13110, 'epoch': 2}
{'type': 'loss', 'content': 0.10041850805282593, 'timestamp': '2025-10-02 00:33:59.166430', 'step': 13111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:33:59.224647', 'step': 13111, 'epoch': 2}
{'type': 'loss', 'content': 0.1526305079460144, 'timestamp': '2025-10-02 00:33:59.231701', 'step': 13112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:33:59.291807', 'step': 13112, 'epoch': 2}
{'type': 'loss', 'content': 0.08515868335962296, 'timestamp': '2025-10-02 00:33:59.294708', 'step': 13113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:59.349689', 'step': 13113, 'epoch': 2}
{'type': 'loss', 'content': 0.1767571121454239, 'timestamp': '2025-10-02 00:33:59.351930', 'step': 13114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:33:59.407563', 'step': 13114, 'epoch': 2}
{'type': 'loss', 'content': 0.0958363264799118, 'timestamp': '2025-10-02 00:33:59.415212', 'step': 13115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:59.471959', 'step': 13115, 'epoch': 2}
{'type': 'loss', 'content': 0.04809249937534332, 'timestamp': '2025-10-02 00:33:59.481342', 'step': 13116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:33:59.540495', 'step': 13116, 'epoch': 2}
{'type': 'loss', 'content': 0.21685194969177246, 'timestamp': '2025-10-02 00:33:59.543515', 'step': 13117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:33:59.601273', 'step': 13117, 'epoch': 2}
{'type': 'loss', 'content': 0.08454090356826782, 'timestamp': '2025-10-02 00:33:59.604461', 'step': 13118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:33:59.661990', 'step': 13118, 'epoch': 2}
{'type': 'loss', 'content': 0.051852576434612274, 'timestamp': '2025-10-02 00:33:59.671329', 'step': 13119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:33:59.728143', 'step': 13119, 'epoch': 2}
{'type': 'loss', 'content': 0.07864949107170105, 'timestamp': '2025-10-02 00:33:59.734949', 'step': 13120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:59.790637', 'step': 13120, 'epoch': 2}
{'type': 'loss', 'content': 0.05614731088280678, 'timestamp': '2025-10-02 00:33:59.792837', 'step': 13121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:59.849070', 'step': 13121, 'epoch': 2}
{'type': 'loss', 'content': 0.05608665570616722, 'timestamp': '2025-10-02 00:33:59.851718', 'step': 13122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:33:59.907163', 'step': 13122, 'epoch': 2}
{'type': 'loss', 'content': 0.024552959948778152, 'timestamp': '2025-10-02 00:33:59.909509', 'step': 13123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:33:59.966027', 'step': 13123, 'epoch': 2}
{'type': 'loss', 'content': 0.03333493694663048, 'timestamp': '2025-10-02 00:33:59.972163', 'step': 13124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:00.026510', 'step': 13124, 'epoch': 2}
{'type': 'loss', 'content': 0.06883056461811066, 'timestamp': '2025-10-02 00:34:00.032459', 'step': 13125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:00.087233', 'step': 13125, 'epoch': 2}
{'type': 'loss', 'content': 0.03686118125915527, 'timestamp': '2025-10-02 00:34:00.096513', 'step': 13126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:00.151157', 'step': 13126, 'epoch': 2}
{'type': 'loss', 'content': 0.04908719286322594, 'timestamp': '2025-10-02 00:34:00.157180', 'step': 13127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:00.216519', 'step': 13127, 'epoch': 2}
{'type': 'loss', 'content': 0.043764278292655945, 'timestamp': '2025-10-02 00:34:00.227501', 'step': 13128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:00.288640', 'step': 13128, 'epoch': 2}
{'type': 'loss', 'content': 0.04203973710536957, 'timestamp': '2025-10-02 00:34:00.299952', 'step': 13129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:00.361587', 'step': 13129, 'epoch': 2}
{'type': 'loss', 'content': 0.1340474784374237, 'timestamp': '2025-10-02 00:34:00.364116', 'step': 13130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:00.418982', 'step': 13130, 'epoch': 2}
{'type': 'loss', 'content': 0.01607668027281761, 'timestamp': '2025-10-02 00:34:00.426537', 'step': 13131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:00.488402', 'step': 13131, 'epoch': 2}
{'type': 'loss', 'content': 0.03820601850748062, 'timestamp': '2025-10-02 00:34:00.499634', 'step': 13132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:00.553756', 'step': 13132, 'epoch': 2}
{'type': 'loss', 'content': 0.075715072453022, 'timestamp': '2025-10-02 00:34:00.556168', 'step': 13133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:00.611293', 'step': 13133, 'epoch': 2}
{'type': 'loss', 'content': 0.028932347893714905, 'timestamp': '2025-10-02 00:34:00.613339', 'step': 13134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:00.667276', 'step': 13134, 'epoch': 2}
{'type': 'loss', 'content': 0.11031850427389145, 'timestamp': '2025-10-02 00:34:00.669837', 'step': 13135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:00.724742', 'step': 13135, 'epoch': 2}
{'type': 'loss', 'content': 0.051160506904125214, 'timestamp': '2025-10-02 00:34:00.732754', 'step': 13136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:00.796227', 'step': 13136, 'epoch': 2}
{'type': 'loss', 'content': 0.09174852073192596, 'timestamp': '2025-10-02 00:34:00.800994', 'step': 13137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:34:00.867378', 'step': 13137, 'epoch': 2}
{'type': 'loss', 'content': 0.03410273417830467, 'timestamp': '2025-10-02 00:34:00.878251', 'step': 13138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:00.933055', 'step': 13138, 'epoch': 2}
{'type': 'loss', 'content': 0.13924524188041687, 'timestamp': '2025-10-02 00:34:00.935436', 'step': 13139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:34:00.990171', 'step': 13139, 'epoch': 2}
{'type': 'loss', 'content': 0.12163940072059631, 'timestamp': '2025-10-02 00:34:00.996407', 'step': 13140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:34:01.064142', 'step': 13140, 'epoch': 2}
{'type': 'loss', 'content': 0.006931683514267206, 'timestamp': '2025-10-02 00:34:01.077142', 'step': 13141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:01.131959', 'step': 13141, 'epoch': 2}
{'type': 'loss', 'content': 0.036472562700510025, 'timestamp': '2025-10-02 00:34:01.134270', 'step': 13142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:01.189580', 'step': 13142, 'epoch': 2}
{'type': 'loss', 'content': 0.03730233013629913, 'timestamp': '2025-10-02 00:34:01.197180', 'step': 13143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:01.258844', 'step': 13143, 'epoch': 2}
{'type': 'loss', 'content': 0.05374536290764809, 'timestamp': '2025-10-02 00:34:01.269808', 'step': 13144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:34:01.330828', 'step': 13144, 'epoch': 2}
{'type': 'loss', 'content': 0.027343757450580597, 'timestamp': '2025-10-02 00:34:01.342339', 'step': 13145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:01.398900', 'step': 13145, 'epoch': 2}
{'type': 'loss', 'content': 0.057849202305078506, 'timestamp': '2025-10-02 00:34:01.408484', 'step': 13146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:01.463653', 'step': 13146, 'epoch': 2}
{'type': 'loss', 'content': 0.05228202044963837, 'timestamp': '2025-10-02 00:34:01.466121', 'step': 13147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:01.520111', 'step': 13147, 'epoch': 2}
{'type': 'loss', 'content': 0.2102019488811493, 'timestamp': '2025-10-02 00:34:01.525891', 'step': 13148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:01.580239', 'step': 13148, 'epoch': 2}
{'type': 'loss', 'content': 0.014239075593650341, 'timestamp': '2025-10-02 00:34:01.582624', 'step': 13149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:01.641056', 'step': 13149, 'epoch': 2}
{'type': 'loss', 'content': 0.03968677669763565, 'timestamp': '2025-10-02 00:34:01.651265', 'step': 13150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:01.706291', 'step': 13150, 'epoch': 2}
{'type': 'loss', 'content': 0.09011590480804443, 'timestamp': '2025-10-02 00:34:01.713935', 'step': 13151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:01.768372', 'step': 13151, 'epoch': 2}
{'type': 'loss', 'content': 0.02705608867108822, 'timestamp': '2025-10-02 00:34:01.775321', 'step': 13152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:01.829001', 'step': 13152, 'epoch': 2}
{'type': 'loss', 'content': 0.04001986235380173, 'timestamp': '2025-10-02 00:34:01.839246', 'step': 13153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:01.898083', 'step': 13153, 'epoch': 2}
{'type': 'loss', 'content': 0.02042088657617569, 'timestamp': '2025-10-02 00:34:01.908300', 'step': 13154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:01.972059', 'step': 13154, 'epoch': 2}
{'type': 'loss', 'content': 0.12357940524816513, 'timestamp': '2025-10-02 00:34:01.979463', 'step': 13155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:02.044225', 'step': 13155, 'epoch': 2}
{'type': 'loss', 'content': 0.08336661756038666, 'timestamp': '2025-10-02 00:34:02.050606', 'step': 13156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:02.126772', 'step': 13156, 'epoch': 2}
{'type': 'loss', 'content': 0.052454471588134766, 'timestamp': '2025-10-02 00:34:02.130348', 'step': 13157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:02.187342', 'step': 13157, 'epoch': 2}
{'type': 'loss', 'content': 0.06123372167348862, 'timestamp': '2025-10-02 00:34:02.196888', 'step': 13158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:02.252252', 'step': 13158, 'epoch': 2}
{'type': 'loss', 'content': 0.01598736084997654, 'timestamp': '2025-10-02 00:34:02.259741', 'step': 13159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:02.327473', 'step': 13159, 'epoch': 2}
{'type': 'loss', 'content': 0.019432177767157555, 'timestamp': '2025-10-02 00:34:02.338777', 'step': 13160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:02.393282', 'step': 13160, 'epoch': 2}
{'type': 'loss', 'content': 0.0590941347181797, 'timestamp': '2025-10-02 00:34:02.399381', 'step': 13161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:02.455043', 'step': 13161, 'epoch': 2}
{'type': 'loss', 'content': 0.02550419420003891, 'timestamp': '2025-10-02 00:34:02.464487', 'step': 13162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:02.519307', 'step': 13162, 'epoch': 2}
{'type': 'loss', 'content': 0.07145529985427856, 'timestamp': '2025-10-02 00:34:02.521643', 'step': 13163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:34:02.590810', 'step': 13163, 'epoch': 2}
{'type': 'loss', 'content': 0.02189471945166588, 'timestamp': '2025-10-02 00:34:02.603902', 'step': 13164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:02.658495', 'step': 13164, 'epoch': 2}
{'type': 'loss', 'content': 0.057575296610593796, 'timestamp': '2025-10-02 00:34:02.666111', 'step': 13165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:02.721393', 'step': 13165, 'epoch': 2}
{'type': 'loss', 'content': 0.08543556928634644, 'timestamp': '2025-10-02 00:34:02.723879', 'step': 13166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:02.779537', 'step': 13166, 'epoch': 2}
{'type': 'loss', 'content': 0.14526474475860596, 'timestamp': '2025-10-02 00:34:02.781854', 'step': 13167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:02.836230', 'step': 13167, 'epoch': 2}
{'type': 'loss', 'content': 0.17955690622329712, 'timestamp': '2025-10-02 00:34:02.842308', 'step': 13168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:02.895715', 'step': 13168, 'epoch': 2}
{'type': 'loss', 'content': 0.11505047231912613, 'timestamp': '2025-10-02 00:34:02.897858', 'step': 13169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:02.953127', 'step': 13169, 'epoch': 2}
{'type': 'loss', 'content': 0.1797042042016983, 'timestamp': '2025-10-02 00:34:02.955601', 'step': 13170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:03.009370', 'step': 13170, 'epoch': 2}
{'type': 'loss', 'content': 0.1266976147890091, 'timestamp': '2025-10-02 00:34:03.011848', 'step': 13171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:03.067498', 'step': 13171, 'epoch': 2}
{'type': 'loss', 'content': 0.1185484379529953, 'timestamp': '2025-10-02 00:34:03.074375', 'step': 13172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:03.127477', 'step': 13172, 'epoch': 2}
{'type': 'loss', 'content': 0.1320086568593979, 'timestamp': '2025-10-02 00:34:03.129768', 'step': 13173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:03.184027', 'step': 13173, 'epoch': 2}
{'type': 'loss', 'content': 0.02690042182803154, 'timestamp': '2025-10-02 00:34:03.186405', 'step': 13174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:03.240749', 'step': 13174, 'epoch': 2}
{'type': 'loss', 'content': 0.07851673662662506, 'timestamp': '2025-10-02 00:34:03.243218', 'step': 13175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:03.297765', 'step': 13175, 'epoch': 2}
{'type': 'loss', 'content': 0.06991369277238846, 'timestamp': '2025-10-02 00:34:03.304103', 'step': 13176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:03.358419', 'step': 13176, 'epoch': 2}
{'type': 'loss', 'content': 0.058347877115011215, 'timestamp': '2025-10-02 00:34:03.360414', 'step': 13177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:03.414257', 'step': 13177, 'epoch': 2}
{'type': 'loss', 'content': 0.10011956095695496, 'timestamp': '2025-10-02 00:34:03.416959', 'step': 13178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:03.470919', 'step': 13178, 'epoch': 2}
{'type': 'loss', 'content': 0.04293441027402878, 'timestamp': '2025-10-02 00:34:03.473851', 'step': 13179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:03.527943', 'step': 13179, 'epoch': 2}
{'type': 'loss', 'content': 0.11045663803815842, 'timestamp': '2025-10-02 00:34:03.533864', 'step': 13180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:03.587024', 'step': 13180, 'epoch': 2}
{'type': 'loss', 'content': 0.07882758229970932, 'timestamp': '2025-10-02 00:34:03.589540', 'step': 13181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:03.645843', 'step': 13181, 'epoch': 2}
{'type': 'loss', 'content': 0.05730690807104111, 'timestamp': '2025-10-02 00:34:03.655226', 'step': 13182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:03.710653', 'step': 13182, 'epoch': 2}
{'type': 'loss', 'content': 0.02928382158279419, 'timestamp': '2025-10-02 00:34:03.716456', 'step': 13183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:03.771722', 'step': 13183, 'epoch': 2}
{'type': 'loss', 'content': 0.06983647495508194, 'timestamp': '2025-10-02 00:34:03.780054', 'step': 13184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:03.843183', 'step': 13184, 'epoch': 2}
{'type': 'loss', 'content': 0.013073242269456387, 'timestamp': '2025-10-02 00:34:03.853433', 'step': 13185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:03.908384', 'step': 13185, 'epoch': 2}
{'type': 'loss', 'content': 0.029530907049775124, 'timestamp': '2025-10-02 00:34:03.911001', 'step': 13186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:03.970661', 'step': 13186, 'epoch': 2}
{'type': 'loss', 'content': 0.037746693938970566, 'timestamp': '2025-10-02 00:34:03.980767', 'step': 13187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:04.035150', 'step': 13187, 'epoch': 2}
{'type': 'loss', 'content': 0.0966896340250969, 'timestamp': '2025-10-02 00:34:04.041099', 'step': 13188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:04.094651', 'step': 13188, 'epoch': 2}
{'type': 'loss', 'content': 0.09678735584020615, 'timestamp': '2025-10-02 00:34:04.097164', 'step': 13189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:04.152649', 'step': 13189, 'epoch': 2}
{'type': 'loss', 'content': 0.11680684983730316, 'timestamp': '2025-10-02 00:34:04.161954', 'step': 13190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:04.217194', 'step': 13190, 'epoch': 2}
{'type': 'loss', 'content': 0.007218671962618828, 'timestamp': '2025-10-02 00:34:04.219788', 'step': 13191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:04.278770', 'step': 13191, 'epoch': 2}
{'type': 'loss', 'content': 0.0467340461909771, 'timestamp': '2025-10-02 00:34:04.289696', 'step': 13192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:04.347763', 'step': 13192, 'epoch': 2}
{'type': 'loss', 'content': 0.04363356530666351, 'timestamp': '2025-10-02 00:34:04.358760', 'step': 13193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:04.412806', 'step': 13193, 'epoch': 2}
{'type': 'loss', 'content': 0.16372975707054138, 'timestamp': '2025-10-02 00:34:04.415058', 'step': 13194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:04.470046', 'step': 13194, 'epoch': 2}
{'type': 'loss', 'content': 0.014846321195363998, 'timestamp': '2025-10-02 00:34:04.476044', 'step': 13195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:04.530784', 'step': 13195, 'epoch': 2}
{'type': 'loss', 'content': 0.10583676397800446, 'timestamp': '2025-10-02 00:34:04.536578', 'step': 13196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:04.588944', 'step': 13196, 'epoch': 2}
{'type': 'loss', 'content': 0.07747189700603485, 'timestamp': '2025-10-02 00:34:04.591365', 'step': 13197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:04.648479', 'step': 13197, 'epoch': 2}
{'type': 'loss', 'content': 0.09200781583786011, 'timestamp': '2025-10-02 00:34:04.658023', 'step': 13198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:04.713506', 'step': 13198, 'epoch': 2}
{'type': 'loss', 'content': 0.02499370463192463, 'timestamp': '2025-10-02 00:34:04.721145', 'step': 13199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:04.776560', 'step': 13199, 'epoch': 2}
{'type': 'loss', 'content': 0.01236723642796278, 'timestamp': '2025-10-02 00:34:04.786835', 'step': 13200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:04.841513', 'step': 13200, 'epoch': 2}
{'type': 'loss', 'content': 0.03579101338982582, 'timestamp': '2025-10-02 00:34:04.851371', 'step': 13201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:04.910524', 'step': 13201, 'epoch': 2}
{'type': 'loss', 'content': 0.023934975266456604, 'timestamp': '2025-10-02 00:34:04.920758', 'step': 13202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:04.975319', 'step': 13202, 'epoch': 2}
{'type': 'loss', 'content': 0.1867210865020752, 'timestamp': '2025-10-02 00:34:04.977970', 'step': 13203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:05.034367', 'step': 13203, 'epoch': 2}
{'type': 'loss', 'content': 0.014793993905186653, 'timestamp': '2025-10-02 00:34:05.044703', 'step': 13204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:05.100853', 'step': 13204, 'epoch': 2}
{'type': 'loss', 'content': 0.022872857749462128, 'timestamp': '2025-10-02 00:34:05.111130', 'step': 13205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:05.165023', 'step': 13205, 'epoch': 2}
{'type': 'loss', 'content': 0.1458033174276352, 'timestamp': '2025-10-02 00:34:05.168218', 'step': 13206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:34:05.231018', 'step': 13206, 'epoch': 2}
{'type': 'loss', 'content': 0.020450172945857048, 'timestamp': '2025-10-02 00:34:05.241857', 'step': 13207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:05.296587', 'step': 13207, 'epoch': 2}
{'type': 'loss', 'content': 0.08585269749164581, 'timestamp': '2025-10-02 00:34:05.302304', 'step': 13208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:05.362492', 'step': 13208, 'epoch': 2}
{'type': 'loss', 'content': 0.016642576083540916, 'timestamp': '2025-10-02 00:34:05.373833', 'step': 13209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:05.429939', 'step': 13209, 'epoch': 2}
{'type': 'loss', 'content': 0.10024692118167877, 'timestamp': '2025-10-02 00:34:05.436026', 'step': 13210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:05.490629', 'step': 13210, 'epoch': 2}
{'type': 'loss', 'content': 0.05759048834443092, 'timestamp': '2025-10-02 00:34:05.496550', 'step': 13211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:05.551190', 'step': 13211, 'epoch': 2}
{'type': 'loss', 'content': 0.0513966828584671, 'timestamp': '2025-10-02 00:34:05.557000', 'step': 13212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:05.610948', 'step': 13212, 'epoch': 2}
{'type': 'loss', 'content': 0.02224239520728588, 'timestamp': '2025-10-02 00:34:05.617003', 'step': 13213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:05.672030', 'step': 13213, 'epoch': 2}
{'type': 'loss', 'content': 0.15405939519405365, 'timestamp': '2025-10-02 00:34:05.674249', 'step': 13214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:05.729564', 'step': 13214, 'epoch': 2}
{'type': 'loss', 'content': 0.020354971289634705, 'timestamp': '2025-10-02 00:34:05.739097', 'step': 13215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:05.794494', 'step': 13215, 'epoch': 2}
{'type': 'loss', 'content': 0.10301434993743896, 'timestamp': '2025-10-02 00:34:05.800231', 'step': 13216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:05.859135', 'step': 13216, 'epoch': 2}
{'type': 'loss', 'content': 0.02416045404970646, 'timestamp': '2025-10-02 00:34:05.870082', 'step': 13217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:05.925132', 'step': 13217, 'epoch': 2}
{'type': 'loss', 'content': 0.06610716134309769, 'timestamp': '2025-10-02 00:34:05.928218', 'step': 13218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:05.983897', 'step': 13218, 'epoch': 2}
{'type': 'loss', 'content': 0.05794425681233406, 'timestamp': '2025-10-02 00:34:05.986220', 'step': 13219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:06.040436', 'step': 13219, 'epoch': 2}
{'type': 'loss', 'content': 0.012137377634644508, 'timestamp': '2025-10-02 00:34:06.050551', 'step': 13220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:06.104796', 'step': 13220, 'epoch': 2}
{'type': 'loss', 'content': 0.02827438712120056, 'timestamp': '2025-10-02 00:34:06.107697', 'step': 13221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:06.162134', 'step': 13221, 'epoch': 2}
{'type': 'loss', 'content': 0.053356219083070755, 'timestamp': '2025-10-02 00:34:06.171459', 'step': 13222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:06.228884', 'step': 13222, 'epoch': 2}
{'type': 'loss', 'content': 0.07405425608158112, 'timestamp': '2025-10-02 00:34:06.231394', 'step': 13223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:06.285721', 'step': 13223, 'epoch': 2}
{'type': 'loss', 'content': 0.09329348802566528, 'timestamp': '2025-10-02 00:34:06.291491', 'step': 13224, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:34:33.410221', 'step': 13224, 'epoch': 2}
{'type': 'pplx', 'content': 106.64013022211448, 'timestamp': '2025-10-02 00:34:33.413688', 'step': 13224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:33.468659', 'step': 13224, 'epoch': 2}
{'type': 'loss', 'content': 0.02885550819337368, 'timestamp': '2025-10-02 00:34:33.470796', 'step': 13225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:33.525722', 'step': 13225, 'epoch': 2}
{'type': 'loss', 'content': 0.03768092766404152, 'timestamp': '2025-10-02 00:34:33.527711', 'step': 13226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:33.583263', 'step': 13226, 'epoch': 2}
{'type': 'loss', 'content': 0.09223470091819763, 'timestamp': '2025-10-02 00:34:33.585462', 'step': 13227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:33.640159', 'step': 13227, 'epoch': 2}
{'type': 'loss', 'content': 0.06028396263718605, 'timestamp': '2025-10-02 00:34:33.646469', 'step': 13228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:33.700049', 'step': 13228, 'epoch': 2}
{'type': 'loss', 'content': 0.17994073033332825, 'timestamp': '2025-10-02 00:34:33.706191', 'step': 13229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:33.760723', 'step': 13229, 'epoch': 2}
{'type': 'loss', 'content': 0.01769433170557022, 'timestamp': '2025-10-02 00:34:33.763532', 'step': 13230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:33.819537', 'step': 13230, 'epoch': 2}
{'type': 'loss', 'content': 0.037086084485054016, 'timestamp': '2025-10-02 00:34:33.822406', 'step': 13231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:33.876021', 'step': 13231, 'epoch': 2}
{'type': 'loss', 'content': 0.10640931874513626, 'timestamp': '2025-10-02 00:34:33.882500', 'step': 13232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:33.944293', 'step': 13232, 'epoch': 2}
{'type': 'loss', 'content': 0.1320052444934845, 'timestamp': '2025-10-02 00:34:33.946608', 'step': 13233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:34.000522', 'step': 13233, 'epoch': 2}
{'type': 'loss', 'content': 0.18553590774536133, 'timestamp': '2025-10-02 00:34:34.003801', 'step': 13234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:34:34.060651', 'step': 13234, 'epoch': 2}
{'type': 'loss', 'content': 0.19320939481258392, 'timestamp': '2025-10-02 00:34:34.062982', 'step': 13235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:34.117943', 'step': 13235, 'epoch': 2}
{'type': 'loss', 'content': 0.018043654039502144, 'timestamp': '2025-10-02 00:34:34.126565', 'step': 13236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:34.179245', 'step': 13236, 'epoch': 2}
{'type': 'loss', 'content': 0.11402864009141922, 'timestamp': '2025-10-02 00:34:34.182436', 'step': 13237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:34.241788', 'step': 13237, 'epoch': 2}
{'type': 'loss', 'content': 0.09194168448448181, 'timestamp': '2025-10-02 00:34:34.245406', 'step': 13238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:34.318027', 'step': 13238, 'epoch': 2}
{'type': 'loss', 'content': 0.09464278817176819, 'timestamp': '2025-10-02 00:34:34.322113', 'step': 13239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:34.382523', 'step': 13239, 'epoch': 2}
{'type': 'loss', 'content': 0.09155327081680298, 'timestamp': '2025-10-02 00:34:34.403158', 'step': 13240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:34.478062', 'step': 13240, 'epoch': 2}
{'type': 'loss', 'content': 0.07446757704019547, 'timestamp': '2025-10-02 00:34:34.488297', 'step': 13241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:34.597579', 'step': 13241, 'epoch': 2}
{'type': 'loss', 'content': 0.0526294931769371, 'timestamp': '2025-10-02 00:34:34.607761', 'step': 13242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:34.680606', 'step': 13242, 'epoch': 2}
{'type': 'loss', 'content': 0.14793790876865387, 'timestamp': '2025-10-02 00:34:34.690227', 'step': 13243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:34:34.764386', 'step': 13243, 'epoch': 2}
{'type': 'loss', 'content': 0.030477454885840416, 'timestamp': '2025-10-02 00:34:34.775850', 'step': 13244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:34.835864', 'step': 13244, 'epoch': 2}
{'type': 'loss', 'content': 0.09151677042245865, 'timestamp': '2025-10-02 00:34:34.845613', 'step': 13245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:34.905301', 'step': 13245, 'epoch': 2}
{'type': 'loss', 'content': 0.18487782776355743, 'timestamp': '2025-10-02 00:34:34.911942', 'step': 13246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:34.980123', 'step': 13246, 'epoch': 2}
{'type': 'loss', 'content': 0.15287649631500244, 'timestamp': '2025-10-02 00:34:34.998574', 'step': 13247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:35.063840', 'step': 13247, 'epoch': 2}
{'type': 'loss', 'content': 0.08243561536073685, 'timestamp': '2025-10-02 00:34:35.074885', 'step': 13248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:35.141080', 'step': 13248, 'epoch': 2}
{'type': 'loss', 'content': 0.007545452564954758, 'timestamp': '2025-10-02 00:34:35.152104', 'step': 13249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:35.215319', 'step': 13249, 'epoch': 2}
{'type': 'loss', 'content': 0.04302776977419853, 'timestamp': '2025-10-02 00:34:35.224672', 'step': 13250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:35.292022', 'step': 13250, 'epoch': 2}
{'type': 'loss', 'content': 0.05100979655981064, 'timestamp': '2025-10-02 00:34:35.295442', 'step': 13251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:34:35.351311', 'step': 13251, 'epoch': 2}
{'type': 'loss', 'content': 0.11121165752410889, 'timestamp': '2025-10-02 00:34:35.357416', 'step': 13252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:35.413038', 'step': 13252, 'epoch': 2}
{'type': 'loss', 'content': 0.0439477413892746, 'timestamp': '2025-10-02 00:34:35.422619', 'step': 13253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:35.509563', 'step': 13253, 'epoch': 2}
{'type': 'loss', 'content': 0.09621015936136246, 'timestamp': '2025-10-02 00:34:35.515760', 'step': 13254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:35.575080', 'step': 13254, 'epoch': 2}
{'type': 'loss', 'content': 0.055019013583660126, 'timestamp': '2025-10-02 00:34:35.581117', 'step': 13255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:35.658344', 'step': 13255, 'epoch': 2}
{'type': 'loss', 'content': 0.059484563767910004, 'timestamp': '2025-10-02 00:34:35.665601', 'step': 13256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:35.722638', 'step': 13256, 'epoch': 2}
{'type': 'loss', 'content': 0.10582011193037033, 'timestamp': '2025-10-02 00:34:35.726842', 'step': 13257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:35.787863', 'step': 13257, 'epoch': 2}
{'type': 'loss', 'content': 0.07539735734462738, 'timestamp': '2025-10-02 00:34:35.800751', 'step': 13258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:35.858023', 'step': 13258, 'epoch': 2}
{'type': 'loss', 'content': 0.07029405236244202, 'timestamp': '2025-10-02 00:34:35.862056', 'step': 13259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:35.919948', 'step': 13259, 'epoch': 2}
{'type': 'loss', 'content': 0.05410325527191162, 'timestamp': '2025-10-02 00:34:35.927213', 'step': 13260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:35.992663', 'step': 13260, 'epoch': 2}
{'type': 'loss', 'content': 0.1196650043129921, 'timestamp': '2025-10-02 00:34:36.002916', 'step': 13261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:36.077896', 'step': 13261, 'epoch': 2}
{'type': 'loss', 'content': 0.018286291509866714, 'timestamp': '2025-10-02 00:34:36.085737', 'step': 13262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:36.159072', 'step': 13262, 'epoch': 2}
{'type': 'loss', 'content': 0.05039406195282936, 'timestamp': '2025-10-02 00:34:36.161960', 'step': 13263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:36.224521', 'step': 13263, 'epoch': 2}
{'type': 'loss', 'content': 0.0498419888317585, 'timestamp': '2025-10-02 00:34:36.231284', 'step': 13264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:36.296063', 'step': 13264, 'epoch': 2}
{'type': 'loss', 'content': 0.11018021404743195, 'timestamp': '2025-10-02 00:34:36.306653', 'step': 13265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:36.365235', 'step': 13265, 'epoch': 2}
{'type': 'loss', 'content': 0.051474958658218384, 'timestamp': '2025-10-02 00:34:36.368460', 'step': 13266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:36.425396', 'step': 13266, 'epoch': 2}
{'type': 'loss', 'content': 0.07916572690010071, 'timestamp': '2025-10-02 00:34:36.429005', 'step': 13267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:36.494596', 'step': 13267, 'epoch': 2}
{'type': 'loss', 'content': 0.04108726605772972, 'timestamp': '2025-10-02 00:34:36.502104', 'step': 13268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:36.567475', 'step': 13268, 'epoch': 2}
{'type': 'loss', 'content': 0.1257757693529129, 'timestamp': '2025-10-02 00:34:36.570230', 'step': 13269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:36.639618', 'step': 13269, 'epoch': 2}
{'type': 'loss', 'content': 0.05249282717704773, 'timestamp': '2025-10-02 00:34:36.649751', 'step': 13270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:36.720386', 'step': 13270, 'epoch': 2}
{'type': 'loss', 'content': 0.09401267021894455, 'timestamp': '2025-10-02 00:34:36.729235', 'step': 13271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:36.794351', 'step': 13271, 'epoch': 2}
{'type': 'loss', 'content': 0.10395662486553192, 'timestamp': '2025-10-02 00:34:36.800832', 'step': 13272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:34:36.873722', 'step': 13272, 'epoch': 2}
{'type': 'loss', 'content': 0.02696603536605835, 'timestamp': '2025-10-02 00:34:36.885231', 'step': 13273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:36.950138', 'step': 13273, 'epoch': 2}
{'type': 'loss', 'content': 0.04376862570643425, 'timestamp': '2025-10-02 00:34:36.954563', 'step': 13274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:37.026411', 'step': 13274, 'epoch': 2}
{'type': 'loss', 'content': 0.018515925854444504, 'timestamp': '2025-10-02 00:34:37.036924', 'step': 13275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:37.094298', 'step': 13275, 'epoch': 2}
{'type': 'loss', 'content': 0.12167181074619293, 'timestamp': '2025-10-02 00:34:37.102691', 'step': 13276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:37.159448', 'step': 13276, 'epoch': 2}
{'type': 'loss', 'content': 0.059545423835515976, 'timestamp': '2025-10-02 00:34:37.165496', 'step': 13277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:37.227748', 'step': 13277, 'epoch': 2}
{'type': 'loss', 'content': 0.08570206165313721, 'timestamp': '2025-10-02 00:34:37.231393', 'step': 13278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:37.295524', 'step': 13278, 'epoch': 2}
{'type': 'loss', 'content': 0.05693618953227997, 'timestamp': '2025-10-02 00:34:37.299617', 'step': 13279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:34:37.370327', 'step': 13279, 'epoch': 2}
{'type': 'loss', 'content': 0.061422545462846756, 'timestamp': '2025-10-02 00:34:37.383082', 'step': 13280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:37.451823', 'step': 13280, 'epoch': 2}
{'type': 'loss', 'content': 0.0423155315220356, 'timestamp': '2025-10-02 00:34:37.455916', 'step': 13281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:37.520025', 'step': 13281, 'epoch': 2}
{'type': 'loss', 'content': 0.1073623076081276, 'timestamp': '2025-10-02 00:34:37.527929', 'step': 13282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:37.586795', 'step': 13282, 'epoch': 2}
{'type': 'loss', 'content': 0.07695480436086655, 'timestamp': '2025-10-02 00:34:37.594230', 'step': 13283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:34:37.667904', 'step': 13283, 'epoch': 2}
{'type': 'loss', 'content': 0.015285736881196499, 'timestamp': '2025-10-02 00:34:37.679376', 'step': 13284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:37.737609', 'step': 13284, 'epoch': 2}
{'type': 'loss', 'content': 0.21642915904521942, 'timestamp': '2025-10-02 00:34:37.741540', 'step': 13285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:37.822408', 'step': 13285, 'epoch': 2}
{'type': 'loss', 'content': 0.13359345495700836, 'timestamp': '2025-10-02 00:34:37.833012', 'step': 13286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:37.890979', 'step': 13286, 'epoch': 2}
{'type': 'loss', 'content': 0.07573740929365158, 'timestamp': '2025-10-02 00:34:37.895781', 'step': 13287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:37.962498', 'step': 13287, 'epoch': 2}
{'type': 'loss', 'content': 0.05523812770843506, 'timestamp': '2025-10-02 00:34:37.973986', 'step': 13288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 11520070000896.0}, 'timestamp': '2025-10-02 00:34:38.067128', 'step': 13288, 'epoch': 2}
{'type': 'loss', 'content': 0.01625838316977024, 'timestamp': '2025-10-02 00:34:38.084015', 'step': 13289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:38.146736', 'step': 13289, 'epoch': 2}
{'type': 'loss', 'content': 0.07121933251619339, 'timestamp': '2025-10-02 00:34:38.156884', 'step': 13290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:38.221075', 'step': 13290, 'epoch': 2}
{'type': 'loss', 'content': 0.026797084137797356, 'timestamp': '2025-10-02 00:34:38.228689', 'step': 13291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:38.287023', 'step': 13291, 'epoch': 2}
{'type': 'loss', 'content': 0.05449908599257469, 'timestamp': '2025-10-02 00:34:38.299724', 'step': 13292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:34:38.357711', 'step': 13292, 'epoch': 2}
{'type': 'loss', 'content': 0.17852216958999634, 'timestamp': '2025-10-02 00:34:38.361046', 'step': 13293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:38.419290', 'step': 13293, 'epoch': 2}
{'type': 'loss', 'content': 0.06646912544965744, 'timestamp': '2025-10-02 00:34:38.423052', 'step': 13294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:38.481099', 'step': 13294, 'epoch': 2}
{'type': 'loss', 'content': 0.047265175729990005, 'timestamp': '2025-10-02 00:34:38.484843', 'step': 13295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:38.542913', 'step': 13295, 'epoch': 2}
{'type': 'loss', 'content': 0.08450876921415329, 'timestamp': '2025-10-02 00:34:38.550048', 'step': 13296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:38.615237', 'step': 13296, 'epoch': 2}
{'type': 'loss', 'content': 0.041255973279476166, 'timestamp': '2025-10-02 00:34:38.618158', 'step': 13297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:38.684960', 'step': 13297, 'epoch': 2}
{'type': 'loss', 'content': 0.06542789936065674, 'timestamp': '2025-10-02 00:34:38.687755', 'step': 13298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:38.745599', 'step': 13298, 'epoch': 2}
{'type': 'loss', 'content': 0.04962562769651413, 'timestamp': '2025-10-02 00:34:38.748422', 'step': 13299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:38.804142', 'step': 13299, 'epoch': 2}
{'type': 'loss', 'content': 0.03674792870879173, 'timestamp': '2025-10-02 00:34:38.810504', 'step': 13300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:38.875942', 'step': 13300, 'epoch': 2}
{'type': 'loss', 'content': 0.012470051646232605, 'timestamp': '2025-10-02 00:34:38.886250', 'step': 13301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:38.944005', 'step': 13301, 'epoch': 2}
{'type': 'loss', 'content': 0.062258921563625336, 'timestamp': '2025-10-02 00:34:38.947752', 'step': 13302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:39.018728', 'step': 13302, 'epoch': 2}
{'type': 'loss', 'content': 0.025355001911520958, 'timestamp': '2025-10-02 00:34:39.024635', 'step': 13303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:39.093659', 'step': 13303, 'epoch': 2}
{'type': 'loss', 'content': 0.06326466053724289, 'timestamp': '2025-10-02 00:34:39.104610', 'step': 13304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:39.160792', 'step': 13304, 'epoch': 2}
{'type': 'loss', 'content': 0.031994011253118515, 'timestamp': '2025-10-02 00:34:39.164389', 'step': 13305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:39.220324', 'step': 13305, 'epoch': 2}
{'type': 'loss', 'content': 0.057911988347768784, 'timestamp': '2025-10-02 00:34:39.226450', 'step': 13306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:34:39.295731', 'step': 13306, 'epoch': 2}
{'type': 'loss', 'content': 0.02108505181968212, 'timestamp': '2025-10-02 00:34:39.306628', 'step': 13307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:39.366254', 'step': 13307, 'epoch': 2}
{'type': 'loss', 'content': 0.04738399386405945, 'timestamp': '2025-10-02 00:34:39.372935', 'step': 13308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:34:39.439031', 'step': 13308, 'epoch': 2}
{'type': 'loss', 'content': 0.02550790272653103, 'timestamp': '2025-10-02 00:34:39.450570', 'step': 13309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:39.518280', 'step': 13309, 'epoch': 2}
{'type': 'loss', 'content': 0.042090289294719696, 'timestamp': '2025-10-02 00:34:39.520525', 'step': 13310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:39.584784', 'step': 13310, 'epoch': 2}
{'type': 'loss', 'content': 0.1186194196343422, 'timestamp': '2025-10-02 00:34:39.590468', 'step': 13311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:34:39.659556', 'step': 13311, 'epoch': 2}
{'type': 'loss', 'content': 0.021558478474617004, 'timestamp': '2025-10-02 00:34:39.671002', 'step': 13312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:39.737483', 'step': 13312, 'epoch': 2}
{'type': 'loss', 'content': 0.05022493749856949, 'timestamp': '2025-10-02 00:34:39.744885', 'step': 13313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:34:39.821369', 'step': 13313, 'epoch': 2}
{'type': 'loss', 'content': 0.0682520717382431, 'timestamp': '2025-10-02 00:34:39.833695', 'step': 13314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:39.912847', 'step': 13314, 'epoch': 2}
{'type': 'loss', 'content': 0.08246677368879318, 'timestamp': '2025-10-02 00:34:39.916380', 'step': 13315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:39.979196', 'step': 13315, 'epoch': 2}
{'type': 'loss', 'content': 0.13738718628883362, 'timestamp': '2025-10-02 00:34:39.995578', 'step': 13316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:40.063906', 'step': 13316, 'epoch': 2}
{'type': 'loss', 'content': 0.12868420779705048, 'timestamp': '2025-10-02 00:34:40.074794', 'step': 13317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:40.137011', 'step': 13317, 'epoch': 2}
{'type': 'loss', 'content': 0.0850360095500946, 'timestamp': '2025-10-02 00:34:40.140111', 'step': 13318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:40.203791', 'step': 13318, 'epoch': 2}
{'type': 'loss', 'content': 0.04581073299050331, 'timestamp': '2025-10-02 00:34:40.213293', 'step': 13319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:40.276687', 'step': 13319, 'epoch': 2}
{'type': 'loss', 'content': 0.15834806859493256, 'timestamp': '2025-10-02 00:34:40.289536', 'step': 13320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:40.348897', 'step': 13320, 'epoch': 2}
{'type': 'loss', 'content': 0.11561400443315506, 'timestamp': '2025-10-02 00:34:40.352050', 'step': 13321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:40.423855', 'step': 13321, 'epoch': 2}
{'type': 'loss', 'content': 0.1377296894788742, 'timestamp': '2025-10-02 00:34:40.434597', 'step': 13322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:34:40.507754', 'step': 13322, 'epoch': 2}
{'type': 'loss', 'content': 0.007511017378419638, 'timestamp': '2025-10-02 00:34:40.520014', 'step': 13323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:40.593438', 'step': 13323, 'epoch': 2}
{'type': 'loss', 'content': 0.034829024225473404, 'timestamp': '2025-10-02 00:34:40.604700', 'step': 13324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:40.722468', 'step': 13324, 'epoch': 2}
{'type': 'loss', 'content': 0.08630942553281784, 'timestamp': '2025-10-02 00:34:40.729628', 'step': 13325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:40.795139', 'step': 13325, 'epoch': 2}
{'type': 'loss', 'content': 0.07723607122898102, 'timestamp': '2025-10-02 00:34:40.804382', 'step': 13326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:40.868907', 'step': 13326, 'epoch': 2}
{'type': 'loss', 'content': 0.016644049435853958, 'timestamp': '2025-10-02 00:34:40.872593', 'step': 13327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:40.953798', 'step': 13327, 'epoch': 2}
{'type': 'loss', 'content': 0.04580534249544144, 'timestamp': '2025-10-02 00:34:40.960998', 'step': 13328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:41.037211', 'step': 13328, 'epoch': 2}
{'type': 'loss', 'content': 0.14973340928554535, 'timestamp': '2025-10-02 00:34:41.040664', 'step': 13329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:41.107228', 'step': 13329, 'epoch': 2}
{'type': 'loss', 'content': 0.039640992879867554, 'timestamp': '2025-10-02 00:34:41.116616', 'step': 13330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:41.175357', 'step': 13330, 'epoch': 2}
{'type': 'loss', 'content': 0.18805277347564697, 'timestamp': '2025-10-02 00:34:41.178991', 'step': 13331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:41.249505', 'step': 13331, 'epoch': 2}
{'type': 'loss', 'content': 0.13479411602020264, 'timestamp': '2025-10-02 00:34:41.259133', 'step': 13332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:41.330468', 'step': 13332, 'epoch': 2}
{'type': 'loss', 'content': 0.04884735867381096, 'timestamp': '2025-10-02 00:34:41.336638', 'step': 13333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:41.405941', 'step': 13333, 'epoch': 2}
{'type': 'loss', 'content': 0.11953984200954437, 'timestamp': '2025-10-02 00:34:41.412284', 'step': 13334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:41.477900', 'step': 13334, 'epoch': 2}
{'type': 'loss', 'content': 0.09697754681110382, 'timestamp': '2025-10-02 00:34:41.480643', 'step': 13335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:41.551980', 'step': 13335, 'epoch': 2}
{'type': 'loss', 'content': 0.062395043671131134, 'timestamp': '2025-10-02 00:34:41.558744', 'step': 13336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:41.643749', 'step': 13336, 'epoch': 2}
{'type': 'loss', 'content': 0.02680327743291855, 'timestamp': '2025-10-02 00:34:41.647042', 'step': 13337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:41.707595', 'step': 13337, 'epoch': 2}
{'type': 'loss', 'content': 0.09182348102331161, 'timestamp': '2025-10-02 00:34:41.710117', 'step': 13338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:41.779273', 'step': 13338, 'epoch': 2}
{'type': 'loss', 'content': 0.03709458187222481, 'timestamp': '2025-10-02 00:34:41.787282', 'step': 13339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:41.864941', 'step': 13339, 'epoch': 2}
{'type': 'loss', 'content': 0.10836353898048401, 'timestamp': '2025-10-02 00:34:41.872034', 'step': 13340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:41.932481', 'step': 13340, 'epoch': 2}
{'type': 'loss', 'content': 0.07639964669942856, 'timestamp': '2025-10-02 00:34:41.940181', 'step': 13341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:42.018471', 'step': 13341, 'epoch': 2}
{'type': 'loss', 'content': 0.058289941400289536, 'timestamp': '2025-10-02 00:34:42.022225', 'step': 13342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:42.087498', 'step': 13342, 'epoch': 2}
{'type': 'loss', 'content': 0.00690143508836627, 'timestamp': '2025-10-02 00:34:42.099045', 'step': 13343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:42.174091', 'step': 13343, 'epoch': 2}
{'type': 'loss', 'content': 0.054447755217552185, 'timestamp': '2025-10-02 00:34:42.183835', 'step': 13344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:42.248217', 'step': 13344, 'epoch': 2}
{'type': 'loss', 'content': 0.1480681151151657, 'timestamp': '2025-10-02 00:34:42.252137', 'step': 13345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:42.317203', 'step': 13345, 'epoch': 2}
{'type': 'loss', 'content': 0.07658752053976059, 'timestamp': '2025-10-02 00:34:42.324856', 'step': 13346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:42.387415', 'step': 13346, 'epoch': 2}
{'type': 'loss', 'content': 0.09380131214857101, 'timestamp': '2025-10-02 00:34:42.393288', 'step': 13347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:42.462753', 'step': 13347, 'epoch': 2}
{'type': 'loss', 'content': 0.06425322592258453, 'timestamp': '2025-10-02 00:34:42.469362', 'step': 13348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:42.533630', 'step': 13348, 'epoch': 2}
{'type': 'loss', 'content': 0.04631171375513077, 'timestamp': '2025-10-02 00:34:42.537228', 'step': 13349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:42.595250', 'step': 13349, 'epoch': 2}
{'type': 'loss', 'content': 0.0527266189455986, 'timestamp': '2025-10-02 00:34:42.601512', 'step': 13350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:34:42.683692', 'step': 13350, 'epoch': 2}
{'type': 'loss', 'content': 0.03151167184114456, 'timestamp': '2025-10-02 00:34:42.697178', 'step': 13351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:42.765225', 'step': 13351, 'epoch': 2}
{'type': 'loss', 'content': 0.16499023139476776, 'timestamp': '2025-10-02 00:34:42.775105', 'step': 13352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:42.841990', 'step': 13352, 'epoch': 2}
{'type': 'loss', 'content': 0.010823666118085384, 'timestamp': '2025-10-02 00:34:42.852997', 'step': 13353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:42.918077', 'step': 13353, 'epoch': 2}
{'type': 'loss', 'content': 0.07957026362419128, 'timestamp': '2025-10-02 00:34:42.925004', 'step': 13354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:42.991220', 'step': 13354, 'epoch': 2}
{'type': 'loss', 'content': 0.09133198112249374, 'timestamp': '2025-10-02 00:34:42.993566', 'step': 13355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:43.056657', 'step': 13355, 'epoch': 2}
{'type': 'loss', 'content': 0.16833430528640747, 'timestamp': '2025-10-02 00:34:43.069115', 'step': 13356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:43.141593', 'step': 13356, 'epoch': 2}
{'type': 'loss', 'content': 0.016385575756430626, 'timestamp': '2025-10-02 00:34:43.151893', 'step': 13357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:43.212600', 'step': 13357, 'epoch': 2}
{'type': 'loss', 'content': 0.08316485583782196, 'timestamp': '2025-10-02 00:34:43.215557', 'step': 13358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:43.283545', 'step': 13358, 'epoch': 2}
{'type': 'loss', 'content': 0.043608564883470535, 'timestamp': '2025-10-02 00:34:43.291636', 'step': 13359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:43.352005', 'step': 13359, 'epoch': 2}
{'type': 'loss', 'content': 0.18847592175006866, 'timestamp': '2025-10-02 00:34:43.362309', 'step': 13360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:43.427726', 'step': 13360, 'epoch': 2}
{'type': 'loss', 'content': 0.07947177439928055, 'timestamp': '2025-10-02 00:34:43.437601', 'step': 13361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:43.493153', 'step': 13361, 'epoch': 2}
{'type': 'loss', 'content': 0.1733851283788681, 'timestamp': '2025-10-02 00:34:43.496516', 'step': 13362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:43.562508', 'step': 13362, 'epoch': 2}
{'type': 'loss', 'content': 0.08420488238334656, 'timestamp': '2025-10-02 00:34:43.568478', 'step': 13363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:43.633500', 'step': 13363, 'epoch': 2}
{'type': 'loss', 'content': 0.09084440022706985, 'timestamp': '2025-10-02 00:34:43.643847', 'step': 13364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:43.703154', 'step': 13364, 'epoch': 2}
{'type': 'loss', 'content': 0.11804571747779846, 'timestamp': '2025-10-02 00:34:43.710657', 'step': 13365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:43.766923', 'step': 13365, 'epoch': 2}
{'type': 'loss', 'content': 0.06244470551609993, 'timestamp': '2025-10-02 00:34:43.773171', 'step': 13366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:43.844306', 'step': 13366, 'epoch': 2}
{'type': 'loss', 'content': 0.08613383769989014, 'timestamp': '2025-10-02 00:34:43.850331', 'step': 13367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:43.905353', 'step': 13367, 'epoch': 2}
{'type': 'loss', 'content': 0.12401285022497177, 'timestamp': '2025-10-02 00:34:43.915403', 'step': 13368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:43.980506', 'step': 13368, 'epoch': 2}
{'type': 'loss', 'content': 0.05287911742925644, 'timestamp': '2025-10-02 00:34:43.987273', 'step': 13369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 640], 'flops': 12800077771264.0}, 'timestamp': '2025-10-02 00:34:44.098107', 'step': 13369, 'epoch': 2}
{'type': 'loss', 'content': 0.014627420343458652, 'timestamp': '2025-10-02 00:34:44.115259', 'step': 13370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:44.178947', 'step': 13370, 'epoch': 2}
{'type': 'loss', 'content': 0.10004220902919769, 'timestamp': '2025-10-02 00:34:44.188316', 'step': 13371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:44.258344', 'step': 13371, 'epoch': 2}
{'type': 'loss', 'content': 0.031435173004865646, 'timestamp': '2025-10-02 00:34:44.269633', 'step': 13372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:44.324586', 'step': 13372, 'epoch': 2}
{'type': 'loss', 'content': 0.04895871505141258, 'timestamp': '2025-10-02 00:34:44.334832', 'step': 13373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:44.390495', 'step': 13373, 'epoch': 2}
{'type': 'loss', 'content': 0.0622657872736454, 'timestamp': '2025-10-02 00:34:44.392699', 'step': 13374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:44.448321', 'step': 13374, 'epoch': 2}
{'type': 'loss', 'content': 0.06141899153590202, 'timestamp': '2025-10-02 00:34:44.454455', 'step': 13375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:44.509422', 'step': 13375, 'epoch': 2}
{'type': 'loss', 'content': 0.07181952148675919, 'timestamp': '2025-10-02 00:34:44.518030', 'step': 13376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:34:44.598958', 'step': 13376, 'epoch': 2}
{'type': 'loss', 'content': 0.014319151639938354, 'timestamp': '2025-10-02 00:34:44.615320', 'step': 13377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:44.669361', 'step': 13377, 'epoch': 2}
{'type': 'loss', 'content': 0.1388542801141739, 'timestamp': '2025-10-02 00:34:44.671926', 'step': 13378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:34:44.735285', 'step': 13378, 'epoch': 2}
{'type': 'loss', 'content': 0.046817149966955185, 'timestamp': '2025-10-02 00:34:44.746032', 'step': 13379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:44.801811', 'step': 13379, 'epoch': 2}
{'type': 'loss', 'content': 0.1708521544933319, 'timestamp': '2025-10-02 00:34:44.807917', 'step': 13380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:44.860978', 'step': 13380, 'epoch': 2}
{'type': 'loss', 'content': 0.09153229743242264, 'timestamp': '2025-10-02 00:34:44.863248', 'step': 13381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:44.917636', 'step': 13381, 'epoch': 2}
{'type': 'loss', 'content': 0.16042597591876984, 'timestamp': '2025-10-02 00:34:44.919953', 'step': 13382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:44.982790', 'step': 13382, 'epoch': 2}
{'type': 'loss', 'content': 0.030086684972047806, 'timestamp': '2025-10-02 00:34:44.993311', 'step': 13383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:45.048072', 'step': 13383, 'epoch': 2}
{'type': 'loss', 'content': 0.05354355648159981, 'timestamp': '2025-10-02 00:34:45.053804', 'step': 13384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:45.107640', 'step': 13384, 'epoch': 2}
{'type': 'loss', 'content': 0.06619530916213989, 'timestamp': '2025-10-02 00:34:45.109931', 'step': 13385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:45.163551', 'step': 13385, 'epoch': 2}
{'type': 'loss', 'content': 0.1278051882982254, 'timestamp': '2025-10-02 00:34:45.165658', 'step': 13386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:45.227275', 'step': 13386, 'epoch': 2}
{'type': 'loss', 'content': 0.030273091048002243, 'timestamp': '2025-10-02 00:34:45.237753', 'step': 13387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:45.292700', 'step': 13387, 'epoch': 2}
{'type': 'loss', 'content': 0.03470858186483383, 'timestamp': '2025-10-02 00:34:45.299590', 'step': 13388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:45.356828', 'step': 13388, 'epoch': 2}
{'type': 'loss', 'content': 0.0536886565387249, 'timestamp': '2025-10-02 00:34:45.359242', 'step': 13389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:45.415347', 'step': 13389, 'epoch': 2}
{'type': 'loss', 'content': 0.06363038718700409, 'timestamp': '2025-10-02 00:34:45.417892', 'step': 13390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:45.472697', 'step': 13390, 'epoch': 2}
{'type': 'loss', 'content': 0.027435125783085823, 'timestamp': '2025-10-02 00:34:45.475281', 'step': 13391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:45.530516', 'step': 13391, 'epoch': 2}
{'type': 'loss', 'content': 0.07135044038295746, 'timestamp': '2025-10-02 00:34:45.536984', 'step': 13392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:45.595286', 'step': 13392, 'epoch': 2}
{'type': 'loss', 'content': 0.10142358392477036, 'timestamp': '2025-10-02 00:34:45.598455', 'step': 13393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:45.653983', 'step': 13393, 'epoch': 2}
{'type': 'loss', 'content': 0.0582883358001709, 'timestamp': '2025-10-02 00:34:45.663525', 'step': 13394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:45.718777', 'step': 13394, 'epoch': 2}
{'type': 'loss', 'content': 0.13864600658416748, 'timestamp': '2025-10-02 00:34:45.721232', 'step': 13395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:45.776042', 'step': 13395, 'epoch': 2}
{'type': 'loss', 'content': 0.1262333244085312, 'timestamp': '2025-10-02 00:34:45.782223', 'step': 13396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:45.836213', 'step': 13396, 'epoch': 2}
{'type': 'loss', 'content': 0.06988296657800674, 'timestamp': '2025-10-02 00:34:45.842503', 'step': 13397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:45.896149', 'step': 13397, 'epoch': 2}
{'type': 'loss', 'content': 0.09341911971569061, 'timestamp': '2025-10-02 00:34:45.898697', 'step': 13398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:45.953722', 'step': 13398, 'epoch': 2}
{'type': 'loss', 'content': 0.11278314888477325, 'timestamp': '2025-10-02 00:34:45.956337', 'step': 13399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:34:46.011512', 'step': 13399, 'epoch': 2}
{'type': 'loss', 'content': 0.05071234703063965, 'timestamp': '2025-10-02 00:34:46.017497', 'step': 13400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:46.078393', 'step': 13400, 'epoch': 2}
{'type': 'loss', 'content': 0.008602862246334553, 'timestamp': '2025-10-02 00:34:46.089741', 'step': 13401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:46.144301', 'step': 13401, 'epoch': 2}
{'type': 'loss', 'content': 0.09396269917488098, 'timestamp': '2025-10-02 00:34:46.146716', 'step': 13402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:46.200976', 'step': 13402, 'epoch': 2}
{'type': 'loss', 'content': 0.13019689917564392, 'timestamp': '2025-10-02 00:34:46.203287', 'step': 13403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:46.257885', 'step': 13403, 'epoch': 2}
{'type': 'loss', 'content': 0.07283635437488556, 'timestamp': '2025-10-02 00:34:46.263479', 'step': 13404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:46.316717', 'step': 13404, 'epoch': 2}
{'type': 'loss', 'content': 0.14893363416194916, 'timestamp': '2025-10-02 00:34:46.319237', 'step': 13405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:46.373803', 'step': 13405, 'epoch': 2}
{'type': 'loss', 'content': 0.029755201190710068, 'timestamp': '2025-10-02 00:34:46.376434', 'step': 13406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:46.432153', 'step': 13406, 'epoch': 2}
{'type': 'loss', 'content': 0.10332538187503815, 'timestamp': '2025-10-02 00:34:46.438202', 'step': 13407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:46.492819', 'step': 13407, 'epoch': 2}
{'type': 'loss', 'content': 0.023938823491334915, 'timestamp': '2025-10-02 00:34:46.499045', 'step': 13408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:46.553334', 'step': 13408, 'epoch': 2}
{'type': 'loss', 'content': 0.033781711012125015, 'timestamp': '2025-10-02 00:34:46.559494', 'step': 13409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:46.615492', 'step': 13409, 'epoch': 2}
{'type': 'loss', 'content': 0.04183017089962959, 'timestamp': '2025-10-02 00:34:46.617794', 'step': 13410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:46.672504', 'step': 13410, 'epoch': 2}
{'type': 'loss', 'content': 0.06720702350139618, 'timestamp': '2025-10-02 00:34:46.674810', 'step': 13411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:46.729489', 'step': 13411, 'epoch': 2}
{'type': 'loss', 'content': 0.1755615621805191, 'timestamp': '2025-10-02 00:34:46.735376', 'step': 13412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:46.788735', 'step': 13412, 'epoch': 2}
{'type': 'loss', 'content': 0.2560156285762787, 'timestamp': '2025-10-02 00:34:46.793222', 'step': 13413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:46.847339', 'step': 13413, 'epoch': 2}
{'type': 'loss', 'content': 0.1735270470380783, 'timestamp': '2025-10-02 00:34:46.849638', 'step': 13414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:46.908053', 'step': 13414, 'epoch': 2}
{'type': 'loss', 'content': 0.08228638768196106, 'timestamp': '2025-10-02 00:34:46.917586', 'step': 13415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:46.971788', 'step': 13415, 'epoch': 2}
{'type': 'loss', 'content': 0.18507790565490723, 'timestamp': '2025-10-02 00:34:46.977909', 'step': 13416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:47.032823', 'step': 13416, 'epoch': 2}
{'type': 'loss', 'content': 0.06799435615539551, 'timestamp': '2025-10-02 00:34:47.035547', 'step': 13417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:47.091826', 'step': 13417, 'epoch': 2}
{'type': 'loss', 'content': 0.037623632699251175, 'timestamp': '2025-10-02 00:34:47.101219', 'step': 13418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:47.161444', 'step': 13418, 'epoch': 2}
{'type': 'loss', 'content': 0.025008684024214745, 'timestamp': '2025-10-02 00:34:47.171569', 'step': 13419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:47.234553', 'step': 13419, 'epoch': 2}
{'type': 'loss', 'content': 0.046729665249586105, 'timestamp': '2025-10-02 00:34:47.245861', 'step': 13420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:47.302903', 'step': 13420, 'epoch': 2}
{'type': 'loss', 'content': 0.011345944367349148, 'timestamp': '2025-10-02 00:34:47.312627', 'step': 13421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:47.369026', 'step': 13421, 'epoch': 2}
{'type': 'loss', 'content': 0.04307005554437637, 'timestamp': '2025-10-02 00:34:47.378585', 'step': 13422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:47.434987', 'step': 13422, 'epoch': 2}
{'type': 'loss', 'content': 0.09927762299776077, 'timestamp': '2025-10-02 00:34:47.437402', 'step': 13423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:47.494332', 'step': 13423, 'epoch': 2}
{'type': 'loss', 'content': 0.10225032269954681, 'timestamp': '2025-10-02 00:34:47.500484', 'step': 13424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:47.553893', 'step': 13424, 'epoch': 2}
{'type': 'loss', 'content': 0.11580746620893478, 'timestamp': '2025-10-02 00:34:47.556568', 'step': 13425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:47.611593', 'step': 13425, 'epoch': 2}
{'type': 'loss', 'content': 0.13338661193847656, 'timestamp': '2025-10-02 00:34:47.613854', 'step': 13426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:47.668191', 'step': 13426, 'epoch': 2}
{'type': 'loss', 'content': 0.00586518133059144, 'timestamp': '2025-10-02 00:34:47.675958', 'step': 13427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:47.736592', 'step': 13427, 'epoch': 2}
{'type': 'loss', 'content': 0.017865773290395737, 'timestamp': '2025-10-02 00:34:47.747547', 'step': 13428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:47.801139', 'step': 13428, 'epoch': 2}
{'type': 'loss', 'content': 0.11345242708921432, 'timestamp': '2025-10-02 00:34:47.803686', 'step': 13429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:47.858492', 'step': 13429, 'epoch': 2}
{'type': 'loss', 'content': 0.08276309818029404, 'timestamp': '2025-10-02 00:34:47.860720', 'step': 13430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:47.914901', 'step': 13430, 'epoch': 2}
{'type': 'loss', 'content': 0.019381733611226082, 'timestamp': '2025-10-02 00:34:47.922811', 'step': 13431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:47.977303', 'step': 13431, 'epoch': 2}
{'type': 'loss', 'content': 0.17423653602600098, 'timestamp': '2025-10-02 00:34:47.983153', 'step': 13432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:48.036743', 'step': 13432, 'epoch': 2}
{'type': 'loss', 'content': 0.06925050914287567, 'timestamp': '2025-10-02 00:34:48.039289', 'step': 13433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:48.096139', 'step': 13433, 'epoch': 2}
{'type': 'loss', 'content': 0.028808297589421272, 'timestamp': '2025-10-02 00:34:48.105689', 'step': 13434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:34:48.160891', 'step': 13434, 'epoch': 2}
{'type': 'loss', 'content': 0.0956154465675354, 'timestamp': '2025-10-02 00:34:48.163131', 'step': 13435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:48.217629', 'step': 13435, 'epoch': 2}
{'type': 'loss', 'content': 0.05986756086349487, 'timestamp': '2025-10-02 00:34:48.226209', 'step': 13436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:48.280198', 'step': 13436, 'epoch': 2}
{'type': 'loss', 'content': 0.031227311119437218, 'timestamp': '2025-10-02 00:34:48.282522', 'step': 13437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:48.336582', 'step': 13437, 'epoch': 2}
{'type': 'loss', 'content': 0.08529447764158249, 'timestamp': '2025-10-02 00:34:48.344495', 'step': 13438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:48.399815', 'step': 13438, 'epoch': 2}
{'type': 'loss', 'content': 0.02663680538535118, 'timestamp': '2025-10-02 00:34:48.409365', 'step': 13439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:48.464979', 'step': 13439, 'epoch': 2}
{'type': 'loss', 'content': 0.1813933551311493, 'timestamp': '2025-10-02 00:34:48.470904', 'step': 13440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:48.524370', 'step': 13440, 'epoch': 2}
{'type': 'loss', 'content': 0.052880480885505676, 'timestamp': '2025-10-02 00:34:48.530570', 'step': 13441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:48.584390', 'step': 13441, 'epoch': 2}
{'type': 'loss', 'content': 0.06334143877029419, 'timestamp': '2025-10-02 00:34:48.586978', 'step': 13442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:48.641930', 'step': 13442, 'epoch': 2}
{'type': 'loss', 'content': 0.16321612894535065, 'timestamp': '2025-10-02 00:34:48.644691', 'step': 13443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:48.698909', 'step': 13443, 'epoch': 2}
{'type': 'loss', 'content': 0.036298736929893494, 'timestamp': '2025-10-02 00:34:48.704767', 'step': 13444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:48.759317', 'step': 13444, 'epoch': 2}
{'type': 'loss', 'content': 0.044124722480773926, 'timestamp': '2025-10-02 00:34:48.764149', 'step': 13445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:48.817771', 'step': 13445, 'epoch': 2}
{'type': 'loss', 'content': 0.1107083410024643, 'timestamp': '2025-10-02 00:34:48.820913', 'step': 13446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:34:48.876856', 'step': 13446, 'epoch': 2}
{'type': 'loss', 'content': 0.081309974193573, 'timestamp': '2025-10-02 00:34:48.880023', 'step': 13447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:48.935273', 'step': 13447, 'epoch': 2}
{'type': 'loss', 'content': 0.08876227587461472, 'timestamp': '2025-10-02 00:34:48.941788', 'step': 13448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:48.995990', 'step': 13448, 'epoch': 2}
{'type': 'loss', 'content': 0.09330319613218307, 'timestamp': '2025-10-02 00:34:48.998415', 'step': 13449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:49.053489', 'step': 13449, 'epoch': 2}
{'type': 'loss', 'content': 0.06456882506608963, 'timestamp': '2025-10-02 00:34:49.059787', 'step': 13450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:49.115157', 'step': 13450, 'epoch': 2}
{'type': 'loss', 'content': 0.07177448272705078, 'timestamp': '2025-10-02 00:34:49.117352', 'step': 13451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:49.172234', 'step': 13451, 'epoch': 2}
{'type': 'loss', 'content': 0.06196282431483269, 'timestamp': '2025-10-02 00:34:49.178939', 'step': 13452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:49.235075', 'step': 13452, 'epoch': 2}
{'type': 'loss', 'content': 0.06330163776874542, 'timestamp': '2025-10-02 00:34:49.245036', 'step': 13453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:49.299605', 'step': 13453, 'epoch': 2}
{'type': 'loss', 'content': 0.05076071619987488, 'timestamp': '2025-10-02 00:34:49.302038', 'step': 13454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:49.359262', 'step': 13454, 'epoch': 2}
{'type': 'loss', 'content': 0.09161742031574249, 'timestamp': '2025-10-02 00:34:49.361656', 'step': 13455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:49.415411', 'step': 13455, 'epoch': 2}
{'type': 'loss', 'content': 0.0943591445684433, 'timestamp': '2025-10-02 00:34:49.421271', 'step': 13456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:49.475953', 'step': 13456, 'epoch': 2}
{'type': 'loss', 'content': 0.07082299143075943, 'timestamp': '2025-10-02 00:34:49.482093', 'step': 13457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:49.538267', 'step': 13457, 'epoch': 2}
{'type': 'loss', 'content': 0.07682900875806808, 'timestamp': '2025-10-02 00:34:49.544411', 'step': 13458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:49.599773', 'step': 13458, 'epoch': 2}
{'type': 'loss', 'content': 0.0804331824183464, 'timestamp': '2025-10-02 00:34:49.602716', 'step': 13459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:49.660213', 'step': 13459, 'epoch': 2}
{'type': 'loss', 'content': 0.042737677693367004, 'timestamp': '2025-10-02 00:34:49.667096', 'step': 13460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:49.722409', 'step': 13460, 'epoch': 2}
{'type': 'loss', 'content': 0.03599908947944641, 'timestamp': '2025-10-02 00:34:49.725004', 'step': 13461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:49.782178', 'step': 13461, 'epoch': 2}
{'type': 'loss', 'content': 0.028662728145718575, 'timestamp': '2025-10-02 00:34:49.784986', 'step': 13462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:49.841860', 'step': 13462, 'epoch': 2}
{'type': 'loss', 'content': 0.13853223621845245, 'timestamp': '2025-10-02 00:34:49.845017', 'step': 13463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:49.902321', 'step': 13463, 'epoch': 2}
{'type': 'loss', 'content': 0.04472914710640907, 'timestamp': '2025-10-02 00:34:49.909295', 'step': 13464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:34:49.970317', 'step': 13464, 'epoch': 2}
{'type': 'loss', 'content': 0.05634404346346855, 'timestamp': '2025-10-02 00:34:49.981867', 'step': 13465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:50.038003', 'step': 13465, 'epoch': 2}
{'type': 'loss', 'content': 0.1437956690788269, 'timestamp': '2025-10-02 00:34:50.040950', 'step': 13466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:50.104841', 'step': 13466, 'epoch': 2}
{'type': 'loss', 'content': 0.016691355034708977, 'timestamp': '2025-10-02 00:34:50.115292', 'step': 13467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 00:34:50.205022', 'step': 13467, 'epoch': 2}
{'type': 'loss', 'content': 0.01833343133330345, 'timestamp': '2025-10-02 00:34:50.222236', 'step': 13468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:50.279200', 'step': 13468, 'epoch': 2}
{'type': 'loss', 'content': 0.04466725513339043, 'timestamp': '2025-10-02 00:34:50.283040', 'step': 13469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:50.337189', 'step': 13469, 'epoch': 2}
{'type': 'loss', 'content': 0.08668810874223709, 'timestamp': '2025-10-02 00:34:50.340558', 'step': 13470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:50.412832', 'step': 13470, 'epoch': 2}
{'type': 'loss', 'content': 0.12799333035945892, 'timestamp': '2025-10-02 00:34:50.415683', 'step': 13471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:34:50.471841', 'step': 13471, 'epoch': 2}
{'type': 'loss', 'content': 0.2202131748199463, 'timestamp': '2025-10-02 00:34:50.479122', 'step': 13472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:50.534569', 'step': 13472, 'epoch': 2}
{'type': 'loss', 'content': 0.14338098466396332, 'timestamp': '2025-10-02 00:34:50.537734', 'step': 13473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:50.593220', 'step': 13473, 'epoch': 2}
{'type': 'loss', 'content': 0.14132624864578247, 'timestamp': '2025-10-02 00:34:50.596454', 'step': 13474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:50.652311', 'step': 13474, 'epoch': 2}
{'type': 'loss', 'content': 0.02881108783185482, 'timestamp': '2025-10-02 00:34:50.659912', 'step': 13475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:50.723177', 'step': 13475, 'epoch': 2}
{'type': 'loss', 'content': 0.017119275406003, 'timestamp': '2025-10-02 00:34:50.734406', 'step': 13476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:50.795838', 'step': 13476, 'epoch': 2}
{'type': 'loss', 'content': 0.03196312487125397, 'timestamp': '2025-10-02 00:34:50.807170', 'step': 13477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:50.864611', 'step': 13477, 'epoch': 2}
{'type': 'loss', 'content': 0.030692249536514282, 'timestamp': '2025-10-02 00:34:50.872405', 'step': 13478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:50.934807', 'step': 13478, 'epoch': 2}
{'type': 'loss', 'content': 0.024886153638362885, 'timestamp': '2025-10-02 00:34:50.944960', 'step': 13479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:51.000382', 'step': 13479, 'epoch': 2}
{'type': 'loss', 'content': 0.0404977984726429, 'timestamp': '2025-10-02 00:34:51.007530', 'step': 13480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:51.062362', 'step': 13480, 'epoch': 2}
{'type': 'loss', 'content': 0.05204781889915466, 'timestamp': '2025-10-02 00:34:51.064401', 'step': 13481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:51.118369', 'step': 13481, 'epoch': 2}
{'type': 'loss', 'content': 0.1306481659412384, 'timestamp': '2025-10-02 00:34:51.120659', 'step': 13482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:51.174227', 'step': 13482, 'epoch': 2}
{'type': 'loss', 'content': 0.05580617114901543, 'timestamp': '2025-10-02 00:34:51.176635', 'step': 13483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:51.230125', 'step': 13483, 'epoch': 2}
{'type': 'loss', 'content': 0.16563506424427032, 'timestamp': '2025-10-02 00:34:51.236377', 'step': 13484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:51.290331', 'step': 13484, 'epoch': 2}
{'type': 'loss', 'content': 0.015842311084270477, 'timestamp': '2025-10-02 00:34:51.292935', 'step': 13485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:51.349934', 'step': 13485, 'epoch': 2}
{'type': 'loss', 'content': 0.06613586843013763, 'timestamp': '2025-10-02 00:34:51.359514', 'step': 13486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:51.414064', 'step': 13486, 'epoch': 2}
{'type': 'loss', 'content': 0.0468420572578907, 'timestamp': '2025-10-02 00:34:51.417013', 'step': 13487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:51.470930', 'step': 13487, 'epoch': 2}
{'type': 'loss', 'content': 0.034247491508722305, 'timestamp': '2025-10-02 00:34:51.479730', 'step': 13488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:51.534918', 'step': 13488, 'epoch': 2}
{'type': 'loss', 'content': 0.04249544069170952, 'timestamp': '2025-10-02 00:34:51.538119', 'step': 13489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:51.593447', 'step': 13489, 'epoch': 2}
{'type': 'loss', 'content': 0.06431214511394501, 'timestamp': '2025-10-02 00:34:51.595560', 'step': 13490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:51.650737', 'step': 13490, 'epoch': 2}
{'type': 'loss', 'content': 0.06488677114248276, 'timestamp': '2025-10-02 00:34:51.653402', 'step': 13491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:51.708173', 'step': 13491, 'epoch': 2}
{'type': 'loss', 'content': 0.031978268176317215, 'timestamp': '2025-10-02 00:34:51.713896', 'step': 13492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:51.766850', 'step': 13492, 'epoch': 2}
{'type': 'loss', 'content': 0.049437519162893295, 'timestamp': '2025-10-02 00:34:51.769278', 'step': 13493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:51.822663', 'step': 13493, 'epoch': 2}
{'type': 'loss', 'content': 0.08884360641241074, 'timestamp': '2025-10-02 00:34:51.825388', 'step': 13494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:51.879161', 'step': 13494, 'epoch': 2}
{'type': 'loss', 'content': 0.11656446009874344, 'timestamp': '2025-10-02 00:34:51.888509', 'step': 13495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:51.942735', 'step': 13495, 'epoch': 2}
{'type': 'loss', 'content': 0.03678866848349571, 'timestamp': '2025-10-02 00:34:51.951296', 'step': 13496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:52.005462', 'step': 13496, 'epoch': 2}
{'type': 'loss', 'content': 0.015122359618544579, 'timestamp': '2025-10-02 00:34:52.007816', 'step': 13497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:52.065007', 'step': 13497, 'epoch': 2}
{'type': 'loss', 'content': 0.09348046034574509, 'timestamp': '2025-10-02 00:34:52.067353', 'step': 13498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:52.121323', 'step': 13498, 'epoch': 2}
{'type': 'loss', 'content': 0.0827813595533371, 'timestamp': '2025-10-02 00:34:52.123563', 'step': 13499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:34:52.199474', 'step': 13499, 'epoch': 2}
{'type': 'loss', 'content': 0.009198823943734169, 'timestamp': '2025-10-02 00:34:52.214089', 'step': 13500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 13500', 'timestamp': '2025-10-02 00:34:52.639240', 'step': 13500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:52.694649', 'step': 13500, 'epoch': 2}
{'type': 'loss', 'content': 0.158422589302063, 'timestamp': '2025-10-02 00:34:52.701183', 'step': 13501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:52.755645', 'step': 13501, 'epoch': 2}
{'type': 'loss', 'content': 0.014282206073403358, 'timestamp': '2025-10-02 00:34:52.758368', 'step': 13502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:52.812766', 'step': 13502, 'epoch': 2}
{'type': 'loss', 'content': 0.07707853615283966, 'timestamp': '2025-10-02 00:34:52.815225', 'step': 13503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:52.868830', 'step': 13503, 'epoch': 2}
{'type': 'loss', 'content': 0.20194219052791595, 'timestamp': '2025-10-02 00:34:52.874650', 'step': 13504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:52.927725', 'step': 13504, 'epoch': 2}
{'type': 'loss', 'content': 0.14614848792552948, 'timestamp': '2025-10-02 00:34:52.930095', 'step': 13505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:52.989966', 'step': 13505, 'epoch': 2}
{'type': 'loss', 'content': 0.02528185024857521, 'timestamp': '2025-10-02 00:34:53.000129', 'step': 13506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:53.055795', 'step': 13506, 'epoch': 2}
{'type': 'loss', 'content': 0.03886996582150459, 'timestamp': '2025-10-02 00:34:53.065345', 'step': 13507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:53.120009', 'step': 13507, 'epoch': 2}
{'type': 'loss', 'content': 0.06707264482975006, 'timestamp': '2025-10-02 00:34:53.126343', 'step': 13508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:53.179930', 'step': 13508, 'epoch': 2}
{'type': 'loss', 'content': 0.050150949507951736, 'timestamp': '2025-10-02 00:34:53.187773', 'step': 13509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:53.242072', 'step': 13509, 'epoch': 2}
{'type': 'loss', 'content': 0.07416953146457672, 'timestamp': '2025-10-02 00:34:53.244434', 'step': 13510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:53.299087', 'step': 13510, 'epoch': 2}
{'type': 'loss', 'content': 0.05231983959674835, 'timestamp': '2025-10-02 00:34:53.301624', 'step': 13511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:53.356580', 'step': 13511, 'epoch': 2}
{'type': 'loss', 'content': 0.007053222972899675, 'timestamp': '2025-10-02 00:34:53.362388', 'step': 13512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:53.421095', 'step': 13512, 'epoch': 2}
{'type': 'loss', 'content': 0.007652464788407087, 'timestamp': '2025-10-02 00:34:53.432064', 'step': 13513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:53.486659', 'step': 13513, 'epoch': 2}
{'type': 'loss', 'content': 0.08043930679559708, 'timestamp': '2025-10-02 00:34:53.489095', 'step': 13514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:53.543991', 'step': 13514, 'epoch': 2}
{'type': 'loss', 'content': 0.12142250686883926, 'timestamp': '2025-10-02 00:34:53.546626', 'step': 13515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:53.602792', 'step': 13515, 'epoch': 2}
{'type': 'loss', 'content': 0.025134041905403137, 'timestamp': '2025-10-02 00:34:53.609633', 'step': 13516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:53.665032', 'step': 13516, 'epoch': 2}
{'type': 'loss', 'content': 0.05700375884771347, 'timestamp': '2025-10-02 00:34:53.667663', 'step': 13517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:53.722333', 'step': 13517, 'epoch': 2}
{'type': 'loss', 'content': 0.17282600700855255, 'timestamp': '2025-10-02 00:34:53.724974', 'step': 13518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:53.779626', 'step': 13518, 'epoch': 2}
{'type': 'loss', 'content': 0.04027216508984566, 'timestamp': '2025-10-02 00:34:53.785820', 'step': 13519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:53.839513', 'step': 13519, 'epoch': 2}
{'type': 'loss', 'content': 0.019961955025792122, 'timestamp': '2025-10-02 00:34:53.845488', 'step': 13520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:53.899730', 'step': 13520, 'epoch': 2}
{'type': 'loss', 'content': 0.0607905276119709, 'timestamp': '2025-10-02 00:34:53.909736', 'step': 13521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:53.964021', 'step': 13521, 'epoch': 2}
{'type': 'loss', 'content': 0.10873110592365265, 'timestamp': '2025-10-02 00:34:53.966537', 'step': 13522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:54.022159', 'step': 13522, 'epoch': 2}
{'type': 'loss', 'content': 0.0013052918948233128, 'timestamp': '2025-10-02 00:34:54.024581', 'step': 13523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:54.078421', 'step': 13523, 'epoch': 2}
{'type': 'loss', 'content': 0.13313858211040497, 'timestamp': '2025-10-02 00:34:54.084389', 'step': 13524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:54.138658', 'step': 13524, 'epoch': 2}
{'type': 'loss', 'content': 0.06681149452924728, 'timestamp': '2025-10-02 00:34:54.141563', 'step': 13525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:54.196082', 'step': 13525, 'epoch': 2}
{'type': 'loss', 'content': 0.0693182572722435, 'timestamp': '2025-10-02 00:34:54.198474', 'step': 13526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:54.252197', 'step': 13526, 'epoch': 2}
{'type': 'loss', 'content': 0.16449342668056488, 'timestamp': '2025-10-02 00:34:54.254602', 'step': 13527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:54.308086', 'step': 13527, 'epoch': 2}
{'type': 'loss', 'content': 0.06035466864705086, 'timestamp': '2025-10-02 00:34:54.313781', 'step': 13528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:54.366373', 'step': 13528, 'epoch': 2}
{'type': 'loss', 'content': 0.17352890968322754, 'timestamp': '2025-10-02 00:34:54.368830', 'step': 13529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:54.423166', 'step': 13529, 'epoch': 2}
{'type': 'loss', 'content': 0.06388118118047714, 'timestamp': '2025-10-02 00:34:54.425855', 'step': 13530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:54.480724', 'step': 13530, 'epoch': 2}
{'type': 'loss', 'content': 0.041102729737758636, 'timestamp': '2025-10-02 00:34:54.486805', 'step': 13531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:54.540934', 'step': 13531, 'epoch': 2}
{'type': 'loss', 'content': 0.08196820318698883, 'timestamp': '2025-10-02 00:34:54.547061', 'step': 13532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:54.608071', 'step': 13532, 'epoch': 2}
{'type': 'loss', 'content': 0.007747754920274019, 'timestamp': '2025-10-02 00:34:54.619397', 'step': 13533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:54.673592', 'step': 13533, 'epoch': 2}
{'type': 'loss', 'content': 0.04296359047293663, 'timestamp': '2025-10-02 00:34:54.679842', 'step': 13534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:54.734581', 'step': 13534, 'epoch': 2}
{'type': 'loss', 'content': 0.0722779929637909, 'timestamp': '2025-10-02 00:34:54.737035', 'step': 13535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:54.791161', 'step': 13535, 'epoch': 2}
{'type': 'loss', 'content': 0.0776970386505127, 'timestamp': '2025-10-02 00:34:54.796937', 'step': 13536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:54.850573', 'step': 13536, 'epoch': 2}
{'type': 'loss', 'content': 0.03389886021614075, 'timestamp': '2025-10-02 00:34:54.853482', 'step': 13537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:54.907265', 'step': 13537, 'epoch': 2}
{'type': 'loss', 'content': 0.06944818794727325, 'timestamp': '2025-10-02 00:34:54.909404', 'step': 13538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:54.963086', 'step': 13538, 'epoch': 2}
{'type': 'loss', 'content': 0.020316768437623978, 'timestamp': '2025-10-02 00:34:54.965737', 'step': 13539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:55.019654', 'step': 13539, 'epoch': 2}
{'type': 'loss', 'content': 0.09622950851917267, 'timestamp': '2025-10-02 00:34:55.025367', 'step': 13540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:34:55.080149', 'step': 13540, 'epoch': 2}
{'type': 'loss', 'content': 0.13773071765899658, 'timestamp': '2025-10-02 00:34:55.082521', 'step': 13541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:55.138017', 'step': 13541, 'epoch': 2}
{'type': 'loss', 'content': 0.06054004654288292, 'timestamp': '2025-10-02 00:34:55.144167', 'step': 13542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:55.198026', 'step': 13542, 'epoch': 2}
{'type': 'loss', 'content': 0.15266184508800507, 'timestamp': '2025-10-02 00:34:55.200363', 'step': 13543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:55.255176', 'step': 13543, 'epoch': 2}
{'type': 'loss', 'content': 0.020810123533010483, 'timestamp': '2025-10-02 00:34:55.260817', 'step': 13544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:55.313898', 'step': 13544, 'epoch': 2}
{'type': 'loss', 'content': 0.0381007194519043, 'timestamp': '2025-10-02 00:34:55.321803', 'step': 13545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:55.376960', 'step': 13545, 'epoch': 2}
{'type': 'loss', 'content': 0.045299578458070755, 'timestamp': '2025-10-02 00:34:55.379355', 'step': 13546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:55.433433', 'step': 13546, 'epoch': 2}
{'type': 'loss', 'content': 0.05769243836402893, 'timestamp': '2025-10-02 00:34:55.436252', 'step': 13547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:34:55.510877', 'step': 13547, 'epoch': 2}
{'type': 'loss', 'content': 0.005936264991760254, 'timestamp': '2025-10-02 00:34:55.525126', 'step': 13548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:55.580914', 'step': 13548, 'epoch': 2}
{'type': 'loss', 'content': 0.05886857211589813, 'timestamp': '2025-10-02 00:34:55.587207', 'step': 13549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:55.643388', 'step': 13549, 'epoch': 2}
{'type': 'loss', 'content': 0.04039433225989342, 'timestamp': '2025-10-02 00:34:55.651063', 'step': 13550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:55.705265', 'step': 13550, 'epoch': 2}
{'type': 'loss', 'content': 0.009184321388602257, 'timestamp': '2025-10-02 00:34:55.712977', 'step': 13551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:34:55.771783', 'step': 13551, 'epoch': 2}
{'type': 'loss', 'content': 0.035703789442777634, 'timestamp': '2025-10-02 00:34:55.782795', 'step': 13552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:55.836541', 'step': 13552, 'epoch': 2}
{'type': 'loss', 'content': 0.05148738622665405, 'timestamp': '2025-10-02 00:34:55.838916', 'step': 13553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:55.892602', 'step': 13553, 'epoch': 2}
{'type': 'loss', 'content': 0.112952820956707, 'timestamp': '2025-10-02 00:34:55.895290', 'step': 13554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:55.949523', 'step': 13554, 'epoch': 2}
{'type': 'loss', 'content': 0.11887919902801514, 'timestamp': '2025-10-02 00:34:55.951966', 'step': 13555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:34:56.013848', 'step': 13555, 'epoch': 2}
{'type': 'loss', 'content': 0.05134950950741768, 'timestamp': '2025-10-02 00:34:56.025322', 'step': 13556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:56.079168', 'step': 13556, 'epoch': 2}
{'type': 'loss', 'content': 0.02015344798564911, 'timestamp': '2025-10-02 00:34:56.089101', 'step': 13557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:56.145607', 'step': 13557, 'epoch': 2}
{'type': 'loss', 'content': 0.02477606013417244, 'timestamp': '2025-10-02 00:34:56.148111', 'step': 13558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:56.202824', 'step': 13558, 'epoch': 2}
{'type': 'loss', 'content': 0.12283220142126083, 'timestamp': '2025-10-02 00:34:56.205054', 'step': 13559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:56.258788', 'step': 13559, 'epoch': 2}
{'type': 'loss', 'content': 0.10511638969182968, 'timestamp': '2025-10-02 00:34:56.264653', 'step': 13560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:56.318218', 'step': 13560, 'epoch': 2}
{'type': 'loss', 'content': 0.07028785347938538, 'timestamp': '2025-10-02 00:34:56.320456', 'step': 13561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:56.374463', 'step': 13561, 'epoch': 2}
{'type': 'loss', 'content': 0.12302049994468689, 'timestamp': '2025-10-02 00:34:56.376857', 'step': 13562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:56.429895', 'step': 13562, 'epoch': 2}
{'type': 'loss', 'content': 0.15019868314266205, 'timestamp': '2025-10-02 00:34:56.432304', 'step': 13563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:56.486350', 'step': 13563, 'epoch': 2}
{'type': 'loss', 'content': 0.18012073636054993, 'timestamp': '2025-10-02 00:34:56.492946', 'step': 13564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:56.547147', 'step': 13564, 'epoch': 2}
{'type': 'loss', 'content': 0.04342138022184372, 'timestamp': '2025-10-02 00:34:56.551719', 'step': 13565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:56.608031', 'step': 13565, 'epoch': 2}
{'type': 'loss', 'content': 0.09304074943065643, 'timestamp': '2025-10-02 00:34:56.611021', 'step': 13566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:56.667399', 'step': 13566, 'epoch': 2}
{'type': 'loss', 'content': 0.07434114068746567, 'timestamp': '2025-10-02 00:34:56.669929', 'step': 13567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:56.723297', 'step': 13567, 'epoch': 2}
{'type': 'loss', 'content': 0.09630660712718964, 'timestamp': '2025-10-02 00:34:56.729134', 'step': 13568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:56.783122', 'step': 13568, 'epoch': 2}
{'type': 'loss', 'content': 0.12126035243272781, 'timestamp': '2025-10-02 00:34:56.785737', 'step': 13569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:56.843367', 'step': 13569, 'epoch': 2}
{'type': 'loss', 'content': 0.09931246191263199, 'timestamp': '2025-10-02 00:34:56.851161', 'step': 13570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:56.912018', 'step': 13570, 'epoch': 2}
{'type': 'loss', 'content': 0.10393775254487991, 'timestamp': '2025-10-02 00:34:56.919763', 'step': 13571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:56.974716', 'step': 13571, 'epoch': 2}
{'type': 'loss', 'content': 0.03163287416100502, 'timestamp': '2025-10-02 00:34:56.984839', 'step': 13572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:57.038730', 'step': 13572, 'epoch': 2}
{'type': 'loss', 'content': 0.11094407737255096, 'timestamp': '2025-10-02 00:34:57.041109', 'step': 13573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:57.097955', 'step': 13573, 'epoch': 2}
{'type': 'loss', 'content': 0.07718810439109802, 'timestamp': '2025-10-02 00:34:57.104806', 'step': 13574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:57.161786', 'step': 13574, 'epoch': 2}
{'type': 'loss', 'content': 0.03187984228134155, 'timestamp': '2025-10-02 00:34:57.164245', 'step': 13575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:57.218349', 'step': 13575, 'epoch': 2}
{'type': 'loss', 'content': 0.1422448307275772, 'timestamp': '2025-10-02 00:34:57.224738', 'step': 13576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:57.279633', 'step': 13576, 'epoch': 2}
{'type': 'loss', 'content': 0.05899441987276077, 'timestamp': '2025-10-02 00:34:57.282187', 'step': 13577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:57.336692', 'step': 13577, 'epoch': 2}
{'type': 'loss', 'content': 0.04121838137507439, 'timestamp': '2025-10-02 00:34:57.344575', 'step': 13578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:57.400145', 'step': 13578, 'epoch': 2}
{'type': 'loss', 'content': 0.20322802662849426, 'timestamp': '2025-10-02 00:34:57.402760', 'step': 13579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:34:57.457711', 'step': 13579, 'epoch': 2}
{'type': 'loss', 'content': 0.12563203275203705, 'timestamp': '2025-10-02 00:34:57.463499', 'step': 13580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:57.517010', 'step': 13580, 'epoch': 2}
{'type': 'loss', 'content': 0.004963317420333624, 'timestamp': '2025-10-02 00:34:57.524891', 'step': 13581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:34:57.579342', 'step': 13581, 'epoch': 2}
{'type': 'loss', 'content': 0.10378345847129822, 'timestamp': '2025-10-02 00:34:57.581724', 'step': 13582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:57.635781', 'step': 13582, 'epoch': 2}
{'type': 'loss', 'content': 0.1661360114812851, 'timestamp': '2025-10-02 00:34:57.640536', 'step': 13583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:57.695748', 'step': 13583, 'epoch': 2}
{'type': 'loss', 'content': 0.06810624152421951, 'timestamp': '2025-10-02 00:34:57.704240', 'step': 13584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:57.758248', 'step': 13584, 'epoch': 2}
{'type': 'loss', 'content': 0.036670438945293427, 'timestamp': '2025-10-02 00:34:57.760725', 'step': 13585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:57.814606', 'step': 13585, 'epoch': 2}
{'type': 'loss', 'content': 0.06714289635419846, 'timestamp': '2025-10-02 00:34:57.817337', 'step': 13586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:57.875255', 'step': 13586, 'epoch': 2}
{'type': 'loss', 'content': 0.08581461012363434, 'timestamp': '2025-10-02 00:34:57.877441', 'step': 13587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:57.931167', 'step': 13587, 'epoch': 2}
{'type': 'loss', 'content': 0.15781152248382568, 'timestamp': '2025-10-02 00:34:57.937069', 'step': 13588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:57.991435', 'step': 13588, 'epoch': 2}
{'type': 'loss', 'content': 0.03510095179080963, 'timestamp': '2025-10-02 00:34:57.993802', 'step': 13589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:58.048526', 'step': 13589, 'epoch': 2}
{'type': 'loss', 'content': 0.02957969903945923, 'timestamp': '2025-10-02 00:34:58.050827', 'step': 13590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:34:58.106893', 'step': 13590, 'epoch': 2}
{'type': 'loss', 'content': 0.21165861189365387, 'timestamp': '2025-10-02 00:34:58.109231', 'step': 13591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:58.163604', 'step': 13591, 'epoch': 2}
{'type': 'loss', 'content': 0.12381436675786972, 'timestamp': '2025-10-02 00:34:58.170047', 'step': 13592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:58.223913', 'step': 13592, 'epoch': 2}
{'type': 'loss', 'content': 0.03744212165474892, 'timestamp': '2025-10-02 00:34:58.234115', 'step': 13593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:58.289061', 'step': 13593, 'epoch': 2}
{'type': 'loss', 'content': 0.08323386311531067, 'timestamp': '2025-10-02 00:34:58.295058', 'step': 13594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:58.348964', 'step': 13594, 'epoch': 2}
{'type': 'loss', 'content': 0.027223458513617516, 'timestamp': '2025-10-02 00:34:58.351471', 'step': 13595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:34:58.414660', 'step': 13595, 'epoch': 2}
{'type': 'loss', 'content': 0.06833761930465698, 'timestamp': '2025-10-02 00:34:58.426025', 'step': 13596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:58.481594', 'step': 13596, 'epoch': 2}
{'type': 'loss', 'content': 0.051981229335069656, 'timestamp': '2025-10-02 00:34:58.489168', 'step': 13597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:58.544938', 'step': 13597, 'epoch': 2}
{'type': 'loss', 'content': 0.06302749365568161, 'timestamp': '2025-10-02 00:34:58.549834', 'step': 13598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:34:58.606374', 'step': 13598, 'epoch': 2}
{'type': 'loss', 'content': 0.11852036416530609, 'timestamp': '2025-10-02 00:34:58.609812', 'step': 13599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:58.667659', 'step': 13599, 'epoch': 2}
{'type': 'loss', 'content': 0.0423758402466774, 'timestamp': '2025-10-02 00:34:58.673549', 'step': 13600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:58.731336', 'step': 13600, 'epoch': 2}
{'type': 'loss', 'content': 0.11811941862106323, 'timestamp': '2025-10-02 00:34:58.735131', 'step': 13601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:58.793117', 'step': 13601, 'epoch': 2}
{'type': 'loss', 'content': 0.15136609971523285, 'timestamp': '2025-10-02 00:34:58.796584', 'step': 13602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:34:58.854608', 'step': 13602, 'epoch': 2}
{'type': 'loss', 'content': 0.09166352450847626, 'timestamp': '2025-10-02 00:34:58.864100', 'step': 13603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:34:58.920088', 'step': 13603, 'epoch': 2}
{'type': 'loss', 'content': 0.038388095796108246, 'timestamp': '2025-10-02 00:34:58.927048', 'step': 13604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:58.987934', 'step': 13604, 'epoch': 2}
{'type': 'loss', 'content': 0.027080176398158073, 'timestamp': '2025-10-02 00:34:58.997861', 'step': 13605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:59.063239', 'step': 13605, 'epoch': 2}
{'type': 'loss', 'content': 0.10461460798978806, 'timestamp': '2025-10-02 00:34:59.065995', 'step': 13606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:34:59.121491', 'step': 13606, 'epoch': 2}
{'type': 'loss', 'content': 0.11642071604728699, 'timestamp': '2025-10-02 00:34:59.124597', 'step': 13607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:34:59.181657', 'step': 13607, 'epoch': 2}
{'type': 'loss', 'content': 0.10009833425283432, 'timestamp': '2025-10-02 00:34:59.188184', 'step': 13608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:59.243283', 'step': 13608, 'epoch': 2}
{'type': 'loss', 'content': 0.03737331181764603, 'timestamp': '2025-10-02 00:34:59.245934', 'step': 13609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:59.301757', 'step': 13609, 'epoch': 2}
{'type': 'loss', 'content': 0.025541171431541443, 'timestamp': '2025-10-02 00:34:59.304910', 'step': 13610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:34:59.361854', 'step': 13610, 'epoch': 2}
{'type': 'loss', 'content': 0.05804980546236038, 'timestamp': '2025-10-02 00:34:59.364475', 'step': 13611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:59.421192', 'step': 13611, 'epoch': 2}
{'type': 'loss', 'content': 0.03288800269365311, 'timestamp': '2025-10-02 00:34:59.427774', 'step': 13612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:34:59.486158', 'step': 13612, 'epoch': 2}
{'type': 'loss', 'content': 0.04958794265985489, 'timestamp': '2025-10-02 00:34:59.489110', 'step': 13613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:34:59.544829', 'step': 13613, 'epoch': 2}
{'type': 'loss', 'content': 0.1761903464794159, 'timestamp': '2025-10-02 00:34:59.549339', 'step': 13614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:34:59.624733', 'step': 13614, 'epoch': 2}
{'type': 'loss', 'content': 0.05955388769507408, 'timestamp': '2025-10-02 00:34:59.635255', 'step': 13615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:34:59.703115', 'step': 13615, 'epoch': 2}
{'type': 'loss', 'content': 0.01866198517382145, 'timestamp': '2025-10-02 00:34:59.715855', 'step': 13616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:34:59.771739', 'step': 13616, 'epoch': 2}
{'type': 'loss', 'content': 0.07580874115228653, 'timestamp': '2025-10-02 00:34:59.781682', 'step': 13617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:34:59.837292', 'step': 13617, 'epoch': 2}
{'type': 'loss', 'content': 0.05217998847365379, 'timestamp': '2025-10-02 00:34:59.840440', 'step': 13618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:34:59.897275', 'step': 13618, 'epoch': 2}
{'type': 'loss', 'content': 0.09487452358007431, 'timestamp': '2025-10-02 00:34:59.905041', 'step': 13619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:34:59.960257', 'step': 13619, 'epoch': 2}
{'type': 'loss', 'content': 0.12313215434551239, 'timestamp': '2025-10-02 00:34:59.966136', 'step': 13620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:00.019635', 'step': 13620, 'epoch': 2}
{'type': 'loss', 'content': 0.0783124566078186, 'timestamp': '2025-10-02 00:35:00.027490', 'step': 13621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:00.083856', 'step': 13621, 'epoch': 2}
{'type': 'loss', 'content': 0.005155580583959818, 'timestamp': '2025-10-02 00:35:00.091760', 'step': 13622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:00.146038', 'step': 13622, 'epoch': 2}
{'type': 'loss', 'content': 0.06409618258476257, 'timestamp': '2025-10-02 00:35:00.148310', 'step': 13623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:00.204238', 'step': 13623, 'epoch': 2}
{'type': 'loss', 'content': 0.03963354974985123, 'timestamp': '2025-10-02 00:35:00.214387', 'step': 13624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:00.268481', 'step': 13624, 'epoch': 2}
{'type': 'loss', 'content': 0.1856936812400818, 'timestamp': '2025-10-02 00:35:00.270878', 'step': 13625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:00.325015', 'step': 13625, 'epoch': 2}
{'type': 'loss', 'content': 0.042156368494033813, 'timestamp': '2025-10-02 00:35:00.327652', 'step': 13626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:35:00.388925', 'step': 13626, 'epoch': 2}
{'type': 'loss', 'content': 0.07176853716373444, 'timestamp': '2025-10-02 00:35:00.399595', 'step': 13627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:00.454914', 'step': 13627, 'epoch': 2}
{'type': 'loss', 'content': 0.10758351534605026, 'timestamp': '2025-10-02 00:35:00.460952', 'step': 13628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:00.514090', 'step': 13628, 'epoch': 2}
{'type': 'loss', 'content': 0.14715023338794708, 'timestamp': '2025-10-02 00:35:00.520155', 'step': 13629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:00.574410', 'step': 13629, 'epoch': 2}
{'type': 'loss', 'content': 0.02465158700942993, 'timestamp': '2025-10-02 00:35:00.576856', 'step': 13630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:00.630868', 'step': 13630, 'epoch': 2}
{'type': 'loss', 'content': 0.14240947365760803, 'timestamp': '2025-10-02 00:35:00.635053', 'step': 13631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:00.690658', 'step': 13631, 'epoch': 2}
{'type': 'loss', 'content': 0.014255966991186142, 'timestamp': '2025-10-02 00:35:00.700746', 'step': 13632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:00.754687', 'step': 13632, 'epoch': 2}
{'type': 'loss', 'content': 0.03738492354750633, 'timestamp': '2025-10-02 00:35:00.757073', 'step': 13633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:00.814893', 'step': 13633, 'epoch': 2}
{'type': 'loss', 'content': 0.13067491352558136, 'timestamp': '2025-10-02 00:35:00.817129', 'step': 13634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:00.879113', 'step': 13634, 'epoch': 2}
{'type': 'loss', 'content': 0.036630839109420776, 'timestamp': '2025-10-02 00:35:00.889622', 'step': 13635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:00.944313', 'step': 13635, 'epoch': 2}
{'type': 'loss', 'content': 0.022792605683207512, 'timestamp': '2025-10-02 00:35:00.952961', 'step': 13636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:01.006930', 'step': 13636, 'epoch': 2}
{'type': 'loss', 'content': 0.061849843710660934, 'timestamp': '2025-10-02 00:35:01.014847', 'step': 13637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:01.068699', 'step': 13637, 'epoch': 2}
{'type': 'loss', 'content': 0.11493729799985886, 'timestamp': '2025-10-02 00:35:01.070769', 'step': 13638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:01.125741', 'step': 13638, 'epoch': 2}
{'type': 'loss', 'content': 0.09662327170372009, 'timestamp': '2025-10-02 00:35:01.135246', 'step': 13639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:01.189701', 'step': 13639, 'epoch': 2}
{'type': 'loss', 'content': 0.09212230145931244, 'timestamp': '2025-10-02 00:35:01.199745', 'step': 13640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:01.253409', 'step': 13640, 'epoch': 2}
{'type': 'loss', 'content': 0.206145778298378, 'timestamp': '2025-10-02 00:35:01.255531', 'step': 13641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:01.309446', 'step': 13641, 'epoch': 2}
{'type': 'loss', 'content': 0.17392367124557495, 'timestamp': '2025-10-02 00:35:01.311949', 'step': 13642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:01.365789', 'step': 13642, 'epoch': 2}
{'type': 'loss', 'content': 0.05759825557470322, 'timestamp': '2025-10-02 00:35:01.368540', 'step': 13643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:01.423225', 'step': 13643, 'epoch': 2}
{'type': 'loss', 'content': 0.09669850021600723, 'timestamp': '2025-10-02 00:35:01.429039', 'step': 13644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:01.486143', 'step': 13644, 'epoch': 2}
{'type': 'loss', 'content': 0.027155887335538864, 'timestamp': '2025-10-02 00:35:01.497104', 'step': 13645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:01.550603', 'step': 13645, 'epoch': 2}
{'type': 'loss', 'content': 0.09506428986787796, 'timestamp': '2025-10-02 00:35:01.553508', 'step': 13646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:01.607180', 'step': 13646, 'epoch': 2}
{'type': 'loss', 'content': 0.0788557231426239, 'timestamp': '2025-10-02 00:35:01.609580', 'step': 13647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:01.663587', 'step': 13647, 'epoch': 2}
{'type': 'loss', 'content': 0.20229898393154144, 'timestamp': '2025-10-02 00:35:01.669311', 'step': 13648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:01.722979', 'step': 13648, 'epoch': 2}
{'type': 'loss', 'content': 0.12187133729457855, 'timestamp': '2025-10-02 00:35:01.725220', 'step': 13649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:01.780070', 'step': 13649, 'epoch': 2}
{'type': 'loss', 'content': 0.07590820640325546, 'timestamp': '2025-10-02 00:35:01.789441', 'step': 13650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:01.844061', 'step': 13650, 'epoch': 2}
{'type': 'loss', 'content': 0.054249249398708344, 'timestamp': '2025-10-02 00:35:01.846305', 'step': 13651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:01.905898', 'step': 13651, 'epoch': 2}
{'type': 'loss', 'content': 0.009752829559147358, 'timestamp': '2025-10-02 00:35:01.916856', 'step': 13652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:01.975106', 'step': 13652, 'epoch': 2}
{'type': 'loss', 'content': 0.057859718799591064, 'timestamp': '2025-10-02 00:35:01.986122', 'step': 13653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:02.040397', 'step': 13653, 'epoch': 2}
{'type': 'loss', 'content': 0.053140971809625626, 'timestamp': '2025-10-02 00:35:02.046553', 'step': 13654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:02.101075', 'step': 13654, 'epoch': 2}
{'type': 'loss', 'content': 0.05864962562918663, 'timestamp': '2025-10-02 00:35:02.103291', 'step': 13655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:02.157746', 'step': 13655, 'epoch': 2}
{'type': 'loss', 'content': 0.07195814698934555, 'timestamp': '2025-10-02 00:35:02.163996', 'step': 13656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:02.217440', 'step': 13656, 'epoch': 2}
{'type': 'loss', 'content': 0.12191324681043625, 'timestamp': '2025-10-02 00:35:02.219981', 'step': 13657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:02.274303', 'step': 13657, 'epoch': 2}
{'type': 'loss', 'content': 0.020239628851413727, 'timestamp': '2025-10-02 00:35:02.276899', 'step': 13658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:02.331773', 'step': 13658, 'epoch': 2}
{'type': 'loss', 'content': 0.08727125823497772, 'timestamp': '2025-10-02 00:35:02.334664', 'step': 13659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:02.389695', 'step': 13659, 'epoch': 2}
{'type': 'loss', 'content': 0.11916764080524445, 'timestamp': '2025-10-02 00:35:02.395557', 'step': 13660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:02.449073', 'step': 13660, 'epoch': 2}
{'type': 'loss', 'content': 0.11224588006734848, 'timestamp': '2025-10-02 00:35:02.451805', 'step': 13661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:02.506377', 'step': 13661, 'epoch': 2}
{'type': 'loss', 'content': 0.09161082655191422, 'timestamp': '2025-10-02 00:35:02.508742', 'step': 13662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:02.563613', 'step': 13662, 'epoch': 2}
{'type': 'loss', 'content': 0.050786200910806656, 'timestamp': '2025-10-02 00:35:02.566037', 'step': 13663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:35:02.636005', 'step': 13663, 'epoch': 2}
{'type': 'loss', 'content': 0.030842261388897896, 'timestamp': '2025-10-02 00:35:02.648759', 'step': 13664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:02.703019', 'step': 13664, 'epoch': 2}
{'type': 'loss', 'content': 0.0625406801700592, 'timestamp': '2025-10-02 00:35:02.705279', 'step': 13665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:02.759478', 'step': 13665, 'epoch': 2}
{'type': 'loss', 'content': 0.08857138454914093, 'timestamp': '2025-10-02 00:35:02.762032', 'step': 13666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:02.822105', 'step': 13666, 'epoch': 2}
{'type': 'loss', 'content': 0.0042609404772520065, 'timestamp': '2025-10-02 00:35:02.831421', 'step': 13667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:02.885488', 'step': 13667, 'epoch': 2}
{'type': 'loss', 'content': 0.15012648701667786, 'timestamp': '2025-10-02 00:35:02.891519', 'step': 13668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:02.947671', 'step': 13668, 'epoch': 2}
{'type': 'loss', 'content': 0.023089060559868813, 'timestamp': '2025-10-02 00:35:02.953924', 'step': 13669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:03.009379', 'step': 13669, 'epoch': 2}
{'type': 'loss', 'content': 0.13408905267715454, 'timestamp': '2025-10-02 00:35:03.012216', 'step': 13670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:03.071148', 'step': 13670, 'epoch': 2}
{'type': 'loss', 'content': 0.018827740103006363, 'timestamp': '2025-10-02 00:35:03.078796', 'step': 13671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:03.154659', 'step': 13671, 'epoch': 2}
{'type': 'loss', 'content': 0.008637671358883381, 'timestamp': '2025-10-02 00:35:03.165970', 'step': 13672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:03.219803', 'step': 13672, 'epoch': 2}
{'type': 'loss', 'content': 0.12227164208889008, 'timestamp': '2025-10-02 00:35:03.223074', 'step': 13673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:03.278587', 'step': 13673, 'epoch': 2}
{'type': 'loss', 'content': 0.03707336634397507, 'timestamp': '2025-10-02 00:35:03.288164', 'step': 13674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:03.342325', 'step': 13674, 'epoch': 2}
{'type': 'loss', 'content': 0.15977895259857178, 'timestamp': '2025-10-02 00:35:03.344640', 'step': 13675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:03.403440', 'step': 13675, 'epoch': 2}
{'type': 'loss', 'content': 0.04662167280912399, 'timestamp': '2025-10-02 00:35:03.414416', 'step': 13676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:03.473744', 'step': 13676, 'epoch': 2}
{'type': 'loss', 'content': 0.032266341149806976, 'timestamp': '2025-10-02 00:35:03.476265', 'step': 13677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:03.529853', 'step': 13677, 'epoch': 2}
{'type': 'loss', 'content': 0.09669167548418045, 'timestamp': '2025-10-02 00:35:03.536073', 'step': 13678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:03.592017', 'step': 13678, 'epoch': 2}
{'type': 'loss', 'content': 0.016978980973362923, 'timestamp': '2025-10-02 00:35:03.601593', 'step': 13679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:03.658919', 'step': 13679, 'epoch': 2}
{'type': 'loss', 'content': 0.05477752536535263, 'timestamp': '2025-10-02 00:35:03.664585', 'step': 13680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:03.718043', 'step': 13680, 'epoch': 2}
{'type': 'loss', 'content': 0.061656199395656586, 'timestamp': '2025-10-02 00:35:03.720464', 'step': 13681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:03.775323', 'step': 13681, 'epoch': 2}
{'type': 'loss', 'content': 0.21720030903816223, 'timestamp': '2025-10-02 00:35:03.777698', 'step': 13682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:03.835271', 'step': 13682, 'epoch': 2}
{'type': 'loss', 'content': 0.09289554506540298, 'timestamp': '2025-10-02 00:35:03.837871', 'step': 13683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:03.893022', 'step': 13683, 'epoch': 2}
{'type': 'loss', 'content': 0.05785108730196953, 'timestamp': '2025-10-02 00:35:03.898664', 'step': 13684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:03.956424', 'step': 13684, 'epoch': 2}
{'type': 'loss', 'content': 0.026565246284008026, 'timestamp': '2025-10-02 00:35:03.959328', 'step': 13685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:04.013421', 'step': 13685, 'epoch': 2}
{'type': 'loss', 'content': 0.014456731267273426, 'timestamp': '2025-10-02 00:35:04.016216', 'step': 13686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:35:04.083507', 'step': 13686, 'epoch': 2}
{'type': 'loss', 'content': 0.026329539716243744, 'timestamp': '2025-10-02 00:35:04.094327', 'step': 13687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:04.148931', 'step': 13687, 'epoch': 2}
{'type': 'loss', 'content': 0.09486017376184464, 'timestamp': '2025-10-02 00:35:04.155125', 'step': 13688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:04.208033', 'step': 13688, 'epoch': 2}
{'type': 'loss', 'content': 0.1557333767414093, 'timestamp': '2025-10-02 00:35:04.211020', 'step': 13689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:04.265257', 'step': 13689, 'epoch': 2}
{'type': 'loss', 'content': 0.15022148191928864, 'timestamp': '2025-10-02 00:35:04.267486', 'step': 13690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:04.321190', 'step': 13690, 'epoch': 2}
{'type': 'loss', 'content': 0.030153341591358185, 'timestamp': '2025-10-02 00:35:04.328911', 'step': 13691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:04.382932', 'step': 13691, 'epoch': 2}
{'type': 'loss', 'content': 0.13763603568077087, 'timestamp': '2025-10-02 00:35:04.389070', 'step': 13692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 00:35:04.469059', 'step': 13692, 'epoch': 2}
{'type': 'loss', 'content': 0.012187652289867401, 'timestamp': '2025-10-02 00:35:04.485275', 'step': 13693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:04.540292', 'step': 13693, 'epoch': 2}
{'type': 'loss', 'content': 0.042306751012802124, 'timestamp': '2025-10-02 00:35:04.546524', 'step': 13694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:04.600786', 'step': 13694, 'epoch': 2}
{'type': 'loss', 'content': 0.07191672921180725, 'timestamp': '2025-10-02 00:35:04.603281', 'step': 13695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:04.657597', 'step': 13695, 'epoch': 2}
{'type': 'loss', 'content': 0.027513504028320312, 'timestamp': '2025-10-02 00:35:04.666078', 'step': 13696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:04.720901', 'step': 13696, 'epoch': 2}
{'type': 'loss', 'content': 0.07519112527370453, 'timestamp': '2025-10-02 00:35:04.723455', 'step': 13697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:04.777356', 'step': 13697, 'epoch': 2}
{'type': 'loss', 'content': 0.0958116352558136, 'timestamp': '2025-10-02 00:35:04.780072', 'step': 13698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:04.834056', 'step': 13698, 'epoch': 2}
{'type': 'loss', 'content': 0.0726511999964714, 'timestamp': '2025-10-02 00:35:04.843444', 'step': 13699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:04.897996', 'step': 13699, 'epoch': 2}
{'type': 'loss', 'content': 0.028462575748562813, 'timestamp': '2025-10-02 00:35:04.906673', 'step': 13700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:04.960509', 'step': 13700, 'epoch': 2}
{'type': 'loss', 'content': 0.02038336731493473, 'timestamp': '2025-10-02 00:35:04.966792', 'step': 13701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:05.022845', 'step': 13701, 'epoch': 2}
{'type': 'loss', 'content': 0.19794850051403046, 'timestamp': '2025-10-02 00:35:05.025349', 'step': 13702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:05.079994', 'step': 13702, 'epoch': 2}
{'type': 'loss', 'content': 0.11724790930747986, 'timestamp': '2025-10-02 00:35:05.082125', 'step': 13703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:05.136631', 'step': 13703, 'epoch': 2}
{'type': 'loss', 'content': 0.05215860530734062, 'timestamp': '2025-10-02 00:35:05.143362', 'step': 13704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:05.197622', 'step': 13704, 'epoch': 2}
{'type': 'loss', 'content': 0.15229764580726624, 'timestamp': '2025-10-02 00:35:05.200017', 'step': 13705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:05.253709', 'step': 13705, 'epoch': 2}
{'type': 'loss', 'content': 0.08590180426836014, 'timestamp': '2025-10-02 00:35:05.261588', 'step': 13706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:05.316763', 'step': 13706, 'epoch': 2}
{'type': 'loss', 'content': 0.040131762623786926, 'timestamp': '2025-10-02 00:35:05.318989', 'step': 13707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:05.373908', 'step': 13707, 'epoch': 2}
{'type': 'loss', 'content': 0.07594756782054901, 'timestamp': '2025-10-02 00:35:05.379539', 'step': 13708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:05.434005', 'step': 13708, 'epoch': 2}
{'type': 'loss', 'content': 0.01486805360764265, 'timestamp': '2025-10-02 00:35:05.444260', 'step': 13709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:05.498308', 'step': 13709, 'epoch': 2}
{'type': 'loss', 'content': 0.024889761582016945, 'timestamp': '2025-10-02 00:35:05.506126', 'step': 13710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:05.560332', 'step': 13710, 'epoch': 2}
{'type': 'loss', 'content': 0.06020733341574669, 'timestamp': '2025-10-02 00:35:05.562629', 'step': 13711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:05.617372', 'step': 13711, 'epoch': 2}
{'type': 'loss', 'content': 0.05072744935750961, 'timestamp': '2025-10-02 00:35:05.623322', 'step': 13712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:35:05.689127', 'step': 13712, 'epoch': 2}
{'type': 'loss', 'content': 0.02350482903420925, 'timestamp': '2025-10-02 00:35:05.700673', 'step': 13713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:05.754856', 'step': 13713, 'epoch': 2}
{'type': 'loss', 'content': 0.13474832475185394, 'timestamp': '2025-10-02 00:35:05.757429', 'step': 13714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:05.811944', 'step': 13714, 'epoch': 2}
{'type': 'loss', 'content': 0.026350483298301697, 'timestamp': '2025-10-02 00:35:05.819596', 'step': 13715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:05.873860', 'step': 13715, 'epoch': 2}
{'type': 'loss', 'content': 0.042638085782527924, 'timestamp': '2025-10-02 00:35:05.880892', 'step': 13716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:05.934355', 'step': 13716, 'epoch': 2}
{'type': 'loss', 'content': 0.07260259240865707, 'timestamp': '2025-10-02 00:35:05.937225', 'step': 13717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:05.991774', 'step': 13717, 'epoch': 2}
{'type': 'loss', 'content': 0.06789127737283707, 'timestamp': '2025-10-02 00:35:05.994472', 'step': 13718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:06.049342', 'step': 13718, 'epoch': 2}
{'type': 'loss', 'content': 0.08037181943655014, 'timestamp': '2025-10-02 00:35:06.051576', 'step': 13719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:06.110407', 'step': 13719, 'epoch': 2}
{'type': 'loss', 'content': 0.05930529534816742, 'timestamp': '2025-10-02 00:35:06.121358', 'step': 13720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:06.179334', 'step': 13720, 'epoch': 2}
{'type': 'loss', 'content': 0.02282772585749626, 'timestamp': '2025-10-02 00:35:06.189572', 'step': 13721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:06.244741', 'step': 13721, 'epoch': 2}
{'type': 'loss', 'content': 0.19258125126361847, 'timestamp': '2025-10-02 00:35:06.247074', 'step': 13722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:06.300621', 'step': 13722, 'epoch': 2}
{'type': 'loss', 'content': 0.04143242537975311, 'timestamp': '2025-10-02 00:35:06.302889', 'step': 13723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:06.356277', 'step': 13723, 'epoch': 2}
{'type': 'loss', 'content': 0.1478954404592514, 'timestamp': '2025-10-02 00:35:06.362138', 'step': 13724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:06.415584', 'step': 13724, 'epoch': 2}
{'type': 'loss', 'content': 0.09035725891590118, 'timestamp': '2025-10-02 00:35:06.421682', 'step': 13725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:06.476513', 'step': 13725, 'epoch': 2}
{'type': 'loss', 'content': 0.1609361469745636, 'timestamp': '2025-10-02 00:35:06.478718', 'step': 13726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:06.532240', 'step': 13726, 'epoch': 2}
{'type': 'loss', 'content': 0.022728582844138145, 'timestamp': '2025-10-02 00:35:06.538481', 'step': 13727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:06.591652', 'step': 13727, 'epoch': 2}
{'type': 'loss', 'content': 0.07580262422561646, 'timestamp': '2025-10-02 00:35:06.597318', 'step': 13728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:06.651151', 'step': 13728, 'epoch': 2}
{'type': 'loss', 'content': 0.08434692025184631, 'timestamp': '2025-10-02 00:35:06.653392', 'step': 13729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:06.707558', 'step': 13729, 'epoch': 2}
{'type': 'loss', 'content': 0.01955091580748558, 'timestamp': '2025-10-02 00:35:06.713734', 'step': 13730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:06.767178', 'step': 13730, 'epoch': 2}
{'type': 'loss', 'content': 0.08445854485034943, 'timestamp': '2025-10-02 00:35:06.770017', 'step': 13731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:06.824355', 'step': 13731, 'epoch': 2}
{'type': 'loss', 'content': 0.028163257986307144, 'timestamp': '2025-10-02 00:35:06.830351', 'step': 13732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:06.883488', 'step': 13732, 'epoch': 2}
{'type': 'loss', 'content': 0.1271418035030365, 'timestamp': '2025-10-02 00:35:06.885637', 'step': 13733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:06.939503', 'step': 13733, 'epoch': 2}
{'type': 'loss', 'content': 0.040480248630046844, 'timestamp': '2025-10-02 00:35:06.948844', 'step': 13734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:07.003735', 'step': 13734, 'epoch': 2}
{'type': 'loss', 'content': 0.08550604432821274, 'timestamp': '2025-10-02 00:35:07.013226', 'step': 13735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:07.066924', 'step': 13735, 'epoch': 2}
{'type': 'loss', 'content': 0.08393514156341553, 'timestamp': '2025-10-02 00:35:07.075353', 'step': 13736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:07.129483', 'step': 13736, 'epoch': 2}
{'type': 'loss', 'content': 0.03952100872993469, 'timestamp': '2025-10-02 00:35:07.132189', 'step': 13737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:07.189044', 'step': 13737, 'epoch': 2}
{'type': 'loss', 'content': 0.026102013885974884, 'timestamp': '2025-10-02 00:35:07.198454', 'step': 13738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 11200068058304.0}, 'timestamp': '2025-10-02 00:35:07.282197', 'step': 13738, 'epoch': 2}
{'type': 'loss', 'content': 0.01834343746304512, 'timestamp': '2025-10-02 00:35:07.297301', 'step': 13739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:07.351504', 'step': 13739, 'epoch': 2}
{'type': 'loss', 'content': 0.03481563925743103, 'timestamp': '2025-10-02 00:35:07.357190', 'step': 13740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:07.415338', 'step': 13740, 'epoch': 2}
{'type': 'loss', 'content': 0.0826273113489151, 'timestamp': '2025-10-02 00:35:07.426341', 'step': 13741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:07.481241', 'step': 13741, 'epoch': 2}
{'type': 'loss', 'content': 0.060346417129039764, 'timestamp': '2025-10-02 00:35:07.490784', 'step': 13742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:07.546718', 'step': 13742, 'epoch': 2}
{'type': 'loss', 'content': 0.027175521478056908, 'timestamp': '2025-10-02 00:35:07.556279', 'step': 13743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:07.610879', 'step': 13743, 'epoch': 2}
{'type': 'loss', 'content': 0.1283319741487503, 'timestamp': '2025-10-02 00:35:07.616564', 'step': 13744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:07.669770', 'step': 13744, 'epoch': 2}
{'type': 'loss', 'content': 0.07409074902534485, 'timestamp': '2025-10-02 00:35:07.672980', 'step': 13745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:07.729097', 'step': 13745, 'epoch': 2}
{'type': 'loss', 'content': 0.10959205776453018, 'timestamp': '2025-10-02 00:35:07.735390', 'step': 13746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:07.790820', 'step': 13746, 'epoch': 2}
{'type': 'loss', 'content': 0.10937780141830444, 'timestamp': '2025-10-02 00:35:07.793920', 'step': 13747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:07.848028', 'step': 13747, 'epoch': 2}
{'type': 'loss', 'content': 0.025118054822087288, 'timestamp': '2025-10-02 00:35:07.853977', 'step': 13748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:07.907797', 'step': 13748, 'epoch': 2}
{'type': 'loss', 'content': 0.055931515991687775, 'timestamp': '2025-10-02 00:35:07.910809', 'step': 13749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:07.966185', 'step': 13749, 'epoch': 2}
{'type': 'loss', 'content': 0.13389107584953308, 'timestamp': '2025-10-02 00:35:07.969411', 'step': 13750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:08.024882', 'step': 13750, 'epoch': 2}
{'type': 'loss', 'content': 0.0411740317940712, 'timestamp': '2025-10-02 00:35:08.027455', 'step': 13751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:08.085255', 'step': 13751, 'epoch': 2}
{'type': 'loss', 'content': 0.014005457982420921, 'timestamp': '2025-10-02 00:35:08.095406', 'step': 13752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:08.149929', 'step': 13752, 'epoch': 2}
{'type': 'loss', 'content': 0.025372112169861794, 'timestamp': '2025-10-02 00:35:08.153363', 'step': 13753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:08.207351', 'step': 13753, 'epoch': 2}
{'type': 'loss', 'content': 0.10907762497663498, 'timestamp': '2025-10-02 00:35:08.209955', 'step': 13754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:08.263701', 'step': 13754, 'epoch': 2}
{'type': 'loss', 'content': 0.08700746297836304, 'timestamp': '2025-10-02 00:35:08.269896', 'step': 13755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:08.332498', 'step': 13755, 'epoch': 2}
{'type': 'loss', 'content': 0.03407716751098633, 'timestamp': '2025-10-02 00:35:08.343689', 'step': 13756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:08.401487', 'step': 13756, 'epoch': 2}
{'type': 'loss', 'content': 0.02131468616425991, 'timestamp': '2025-10-02 00:35:08.411259', 'step': 13757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:08.465134', 'step': 13757, 'epoch': 2}
{'type': 'loss', 'content': 0.13511963188648224, 'timestamp': '2025-10-02 00:35:08.472885', 'step': 13758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:08.527077', 'step': 13758, 'epoch': 2}
{'type': 'loss', 'content': 0.08649119734764099, 'timestamp': '2025-10-02 00:35:08.529184', 'step': 13759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:08.583907', 'step': 13759, 'epoch': 2}
{'type': 'loss', 'content': 0.04409375041723251, 'timestamp': '2025-10-02 00:35:08.590796', 'step': 13760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:08.645613', 'step': 13760, 'epoch': 2}
{'type': 'loss', 'content': 0.022066043689846992, 'timestamp': '2025-10-02 00:35:08.655894', 'step': 13761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:08.714244', 'step': 13761, 'epoch': 2}
{'type': 'loss', 'content': 0.04946768656373024, 'timestamp': '2025-10-02 00:35:08.717369', 'step': 13762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:08.773026', 'step': 13762, 'epoch': 2}
{'type': 'loss', 'content': 0.163010835647583, 'timestamp': '2025-10-02 00:35:08.775445', 'step': 13763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:35:08.847493', 'step': 13763, 'epoch': 2}
{'type': 'loss', 'content': 0.01102274190634489, 'timestamp': '2025-10-02 00:35:08.860701', 'step': 13764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:08.916306', 'step': 13764, 'epoch': 2}
{'type': 'loss', 'content': 0.08600033074617386, 'timestamp': '2025-10-02 00:35:08.926573', 'step': 13765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:08.984923', 'step': 13765, 'epoch': 2}
{'type': 'loss', 'content': 0.02208688110113144, 'timestamp': '2025-10-02 00:35:08.987557', 'step': 13766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:09.043045', 'step': 13766, 'epoch': 2}
{'type': 'loss', 'content': 0.029880909249186516, 'timestamp': '2025-10-02 00:35:09.049307', 'step': 13767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:09.105648', 'step': 13767, 'epoch': 2}
{'type': 'loss', 'content': 0.05476688966155052, 'timestamp': '2025-10-02 00:35:09.112022', 'step': 13768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:09.166165', 'step': 13768, 'epoch': 2}
{'type': 'loss', 'content': 0.1656350940465927, 'timestamp': '2025-10-02 00:35:09.169334', 'step': 13769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:09.227452', 'step': 13769, 'epoch': 2}
{'type': 'loss', 'content': 0.038801997900009155, 'timestamp': '2025-10-02 00:35:09.233946', 'step': 13770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:09.298210', 'step': 13770, 'epoch': 2}
{'type': 'loss', 'content': 0.11695127934217453, 'timestamp': '2025-10-02 00:35:09.301839', 'step': 13771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:09.359266', 'step': 13771, 'epoch': 2}
{'type': 'loss', 'content': 0.0933893471956253, 'timestamp': '2025-10-02 00:35:09.367016', 'step': 13772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:35:09.445705', 'step': 13772, 'epoch': 2}
{'type': 'loss', 'content': 0.005599907133728266, 'timestamp': '2025-10-02 00:35:09.458736', 'step': 13773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:09.515883', 'step': 13773, 'epoch': 2}
{'type': 'loss', 'content': 0.041053656488657, 'timestamp': '2025-10-02 00:35:09.519138', 'step': 13774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:09.576457', 'step': 13774, 'epoch': 2}
{'type': 'loss', 'content': 0.03258568048477173, 'timestamp': '2025-10-02 00:35:09.580104', 'step': 13775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:09.636321', 'step': 13775, 'epoch': 2}
{'type': 'loss', 'content': 0.03177730366587639, 'timestamp': '2025-10-02 00:35:09.646670', 'step': 13776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:09.703353', 'step': 13776, 'epoch': 2}
{'type': 'loss', 'content': 0.018518364056944847, 'timestamp': '2025-10-02 00:35:09.713646', 'step': 13777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:09.770853', 'step': 13777, 'epoch': 2}
{'type': 'loss', 'content': 0.1278027445077896, 'timestamp': '2025-10-02 00:35:09.773393', 'step': 13778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:09.832184', 'step': 13778, 'epoch': 2}
{'type': 'loss', 'content': 0.11877577006816864, 'timestamp': '2025-10-02 00:35:09.834800', 'step': 13779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:09.889081', 'step': 13779, 'epoch': 2}
{'type': 'loss', 'content': 0.09575662761926651, 'timestamp': '2025-10-02 00:35:09.896796', 'step': 13780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:09.952372', 'step': 13780, 'epoch': 2}
{'type': 'loss', 'content': 0.009574554860591888, 'timestamp': '2025-10-02 00:35:09.960219', 'step': 13781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:10.016757', 'step': 13781, 'epoch': 2}
{'type': 'loss', 'content': 0.18287481367588043, 'timestamp': '2025-10-02 00:35:10.019264', 'step': 13782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:10.074016', 'step': 13782, 'epoch': 2}
{'type': 'loss', 'content': 0.05025414377450943, 'timestamp': '2025-10-02 00:35:10.076800', 'step': 13783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:10.133775', 'step': 13783, 'epoch': 2}
{'type': 'loss', 'content': 0.06751648336648941, 'timestamp': '2025-10-02 00:35:10.140140', 'step': 13784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:10.199472', 'step': 13784, 'epoch': 2}
{'type': 'loss', 'content': 0.053948093205690384, 'timestamp': '2025-10-02 00:35:10.201793', 'step': 13785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:10.255185', 'step': 13785, 'epoch': 2}
{'type': 'loss', 'content': 0.1629759818315506, 'timestamp': '2025-10-02 00:35:10.257390', 'step': 13786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:10.310927', 'step': 13786, 'epoch': 2}
{'type': 'loss', 'content': 0.1047038808465004, 'timestamp': '2025-10-02 00:35:10.313606', 'step': 13787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:35:10.384487', 'step': 13787, 'epoch': 2}
{'type': 'loss', 'content': 0.044852301478385925, 'timestamp': '2025-10-02 00:35:10.397722', 'step': 13788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:10.452761', 'step': 13788, 'epoch': 2}
{'type': 'loss', 'content': 0.03516872972249985, 'timestamp': '2025-10-02 00:35:10.462576', 'step': 13789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:10.524214', 'step': 13789, 'epoch': 2}
{'type': 'loss', 'content': 0.019465886056423187, 'timestamp': '2025-10-02 00:35:10.530345', 'step': 13790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:10.598387', 'step': 13790, 'epoch': 2}
{'type': 'loss', 'content': 0.05131296068429947, 'timestamp': '2025-10-02 00:35:10.607026', 'step': 13791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:10.694702', 'step': 13791, 'epoch': 2}
{'type': 'loss', 'content': 0.04614468663930893, 'timestamp': '2025-10-02 00:35:10.705676', 'step': 13792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:10.764106', 'step': 13792, 'epoch': 2}
{'type': 'loss', 'content': 0.027273092418909073, 'timestamp': '2025-10-02 00:35:10.771893', 'step': 13793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:10.826631', 'step': 13793, 'epoch': 2}
{'type': 'loss', 'content': 0.03697554022073746, 'timestamp': '2025-10-02 00:35:10.834445', 'step': 13794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:10.889528', 'step': 13794, 'epoch': 2}
{'type': 'loss', 'content': 0.055985525250434875, 'timestamp': '2025-10-02 00:35:10.891774', 'step': 13795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:10.946295', 'step': 13795, 'epoch': 2}
{'type': 'loss', 'content': 0.01079537533223629, 'timestamp': '2025-10-02 00:35:10.952088', 'step': 13796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:11.007147', 'step': 13796, 'epoch': 2}
{'type': 'loss', 'content': 0.053598009049892426, 'timestamp': '2025-10-02 00:35:11.014876', 'step': 13797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:11.068481', 'step': 13797, 'epoch': 2}
{'type': 'loss', 'content': 0.12676537036895752, 'timestamp': '2025-10-02 00:35:11.071241', 'step': 13798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:11.125306', 'step': 13798, 'epoch': 2}
{'type': 'loss', 'content': 0.03430614620447159, 'timestamp': '2025-10-02 00:35:11.127545', 'step': 13799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:11.181108', 'step': 13799, 'epoch': 2}
{'type': 'loss', 'content': 0.1304415911436081, 'timestamp': '2025-10-02 00:35:11.187257', 'step': 13800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:11.245406', 'step': 13800, 'epoch': 2}
{'type': 'loss', 'content': 0.05957173556089401, 'timestamp': '2025-10-02 00:35:11.247820', 'step': 13801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 00:35:11.336592', 'step': 13801, 'epoch': 2}
{'type': 'loss', 'content': 0.024052483960986137, 'timestamp': '2025-10-02 00:35:11.352985', 'step': 13802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:11.411437', 'step': 13802, 'epoch': 2}
{'type': 'loss', 'content': 0.02934330701828003, 'timestamp': '2025-10-02 00:35:11.414132', 'step': 13803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:35:11.475788', 'step': 13803, 'epoch': 2}
{'type': 'loss', 'content': 0.055865731090307236, 'timestamp': '2025-10-02 00:35:11.487428', 'step': 13804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:11.541409', 'step': 13804, 'epoch': 2}
{'type': 'loss', 'content': 0.03375228866934776, 'timestamp': '2025-10-02 00:35:11.551137', 'step': 13805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:11.604602', 'step': 13805, 'epoch': 2}
{'type': 'loss', 'content': 0.11158096790313721, 'timestamp': '2025-10-02 00:35:11.606844', 'step': 13806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:11.661375', 'step': 13806, 'epoch': 2}
{'type': 'loss', 'content': 0.12129570543766022, 'timestamp': '2025-10-02 00:35:11.665746', 'step': 13807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:11.728151', 'step': 13807, 'epoch': 2}
{'type': 'loss', 'content': 0.10467730462551117, 'timestamp': '2025-10-02 00:35:11.735021', 'step': 13808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:11.816145', 'step': 13808, 'epoch': 2}
{'type': 'loss', 'content': 0.03885836899280548, 'timestamp': '2025-10-02 00:35:11.827138', 'step': 13809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:11.895609', 'step': 13809, 'epoch': 2}
{'type': 'loss', 'content': 0.004467352759093046, 'timestamp': '2025-10-02 00:35:11.903346', 'step': 13810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:11.957266', 'step': 13810, 'epoch': 2}
{'type': 'loss', 'content': 0.11487045139074326, 'timestamp': '2025-10-02 00:35:11.959670', 'step': 13811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:12.012912', 'step': 13811, 'epoch': 2}
{'type': 'loss', 'content': 0.06283722072839737, 'timestamp': '2025-10-02 00:35:12.021504', 'step': 13812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:12.074785', 'step': 13812, 'epoch': 2}
{'type': 'loss', 'content': 0.16826635599136353, 'timestamp': '2025-10-02 00:35:12.077707', 'step': 13813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:12.131293', 'step': 13813, 'epoch': 2}
{'type': 'loss', 'content': 0.034642305225133896, 'timestamp': '2025-10-02 00:35:12.133571', 'step': 13814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 64], 'flops': 1280007837952.0}, 'timestamp': '2025-10-02 00:35:12.186427', 'step': 13814, 'epoch': 2}
{'type': 'loss', 'content': 0.09699436277151108, 'timestamp': '2025-10-02 00:35:12.188946', 'step': 13815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:12.242349', 'step': 13815, 'epoch': 2}
{'type': 'loss', 'content': 0.08475982397794724, 'timestamp': '2025-10-02 00:35:12.248108', 'step': 13816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:12.301084', 'step': 13816, 'epoch': 2}
{'type': 'loss', 'content': 0.13557423651218414, 'timestamp': '2025-10-02 00:35:12.303462', 'step': 13817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:12.364586', 'step': 13817, 'epoch': 2}
{'type': 'loss', 'content': 0.03224915638566017, 'timestamp': '2025-10-02 00:35:12.375078', 'step': 13818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:12.429343', 'step': 13818, 'epoch': 2}
{'type': 'loss', 'content': 0.05897611752152443, 'timestamp': '2025-10-02 00:35:12.431609', 'step': 13819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:12.484999', 'step': 13819, 'epoch': 2}
{'type': 'loss', 'content': 0.0855412557721138, 'timestamp': '2025-10-02 00:35:12.490551', 'step': 13820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:12.543534', 'step': 13820, 'epoch': 2}
{'type': 'loss', 'content': 0.04897712916135788, 'timestamp': '2025-10-02 00:35:12.551339', 'step': 13821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:12.604613', 'step': 13821, 'epoch': 2}
{'type': 'loss', 'content': 0.055877700448036194, 'timestamp': '2025-10-02 00:35:12.607296', 'step': 13822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:12.661737', 'step': 13822, 'epoch': 2}
{'type': 'loss', 'content': 0.04485765099525452, 'timestamp': '2025-10-02 00:35:12.669679', 'step': 13823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:12.724814', 'step': 13823, 'epoch': 2}
{'type': 'loss', 'content': 0.17177985608577728, 'timestamp': '2025-10-02 00:35:12.731095', 'step': 13824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:12.791030', 'step': 13824, 'epoch': 2}
{'type': 'loss', 'content': 0.06742928922176361, 'timestamp': '2025-10-02 00:35:12.802397', 'step': 13825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:12.858882', 'step': 13825, 'epoch': 2}
{'type': 'loss', 'content': 0.0189011562615633, 'timestamp': '2025-10-02 00:35:12.863984', 'step': 13826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:12.926291', 'step': 13826, 'epoch': 2}
{'type': 'loss', 'content': 0.00669101532548666, 'timestamp': '2025-10-02 00:35:12.932461', 'step': 13827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:13.011652', 'step': 13827, 'epoch': 2}
{'type': 'loss', 'content': 0.014179117977619171, 'timestamp': '2025-10-02 00:35:13.018572', 'step': 13828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:13.087328', 'step': 13828, 'epoch': 2}
{'type': 'loss', 'content': 0.09432539343833923, 'timestamp': '2025-10-02 00:35:13.089688', 'step': 13829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:13.143159', 'step': 13829, 'epoch': 2}
{'type': 'loss', 'content': 0.1607280969619751, 'timestamp': '2025-10-02 00:35:13.145564', 'step': 13830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:35:13.221271', 'step': 13830, 'epoch': 2}
{'type': 'loss', 'content': 0.01602150872349739, 'timestamp': '2025-10-02 00:35:13.234939', 'step': 13831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:13.289014', 'step': 13831, 'epoch': 2}
{'type': 'loss', 'content': 0.03965897858142853, 'timestamp': '2025-10-02 00:35:13.295066', 'step': 13832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:13.348782', 'step': 13832, 'epoch': 2}
{'type': 'loss', 'content': 0.07510898262262344, 'timestamp': '2025-10-02 00:35:13.351048', 'step': 13833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:13.411369', 'step': 13833, 'epoch': 2}
{'type': 'loss', 'content': 0.13004443049430847, 'timestamp': '2025-10-02 00:35:13.413589', 'step': 13834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:13.467322', 'step': 13834, 'epoch': 2}
{'type': 'loss', 'content': 0.08009487390518188, 'timestamp': '2025-10-02 00:35:13.469591', 'step': 13835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:35:13.522929', 'step': 13835, 'epoch': 2}
{'type': 'loss', 'content': 0.17154964804649353, 'timestamp': '2025-10-02 00:35:13.528732', 'step': 13836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:13.583141', 'step': 13836, 'epoch': 2}
{'type': 'loss', 'content': 0.08140573650598526, 'timestamp': '2025-10-02 00:35:13.585380', 'step': 13837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:13.639278', 'step': 13837, 'epoch': 2}
{'type': 'loss', 'content': 0.06511752307415009, 'timestamp': '2025-10-02 00:35:13.641549', 'step': 13838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:13.694500', 'step': 13838, 'epoch': 2}
{'type': 'loss', 'content': 0.10550899803638458, 'timestamp': '2025-10-02 00:35:13.696614', 'step': 13839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:13.749755', 'step': 13839, 'epoch': 2}
{'type': 'loss', 'content': 0.08332543820142746, 'timestamp': '2025-10-02 00:35:13.755526', 'step': 13840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:13.813300', 'step': 13840, 'epoch': 2}
{'type': 'loss', 'content': 0.020454583689570427, 'timestamp': '2025-10-02 00:35:13.824270', 'step': 13841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:13.878230', 'step': 13841, 'epoch': 2}
{'type': 'loss', 'content': 0.13518348336219788, 'timestamp': '2025-10-02 00:35:13.880599', 'step': 13842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:13.934530', 'step': 13842, 'epoch': 2}
{'type': 'loss', 'content': 0.060790054500103, 'timestamp': '2025-10-02 00:35:13.937054', 'step': 13843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:35:14.005404', 'step': 13843, 'epoch': 2}
{'type': 'loss', 'content': 0.040105730295181274, 'timestamp': '2025-10-02 00:35:14.018534', 'step': 13844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:14.071323', 'step': 13844, 'epoch': 2}
{'type': 'loss', 'content': 0.031057992950081825, 'timestamp': '2025-10-02 00:35:14.077349', 'step': 13845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:14.133172', 'step': 13845, 'epoch': 2}
{'type': 'loss', 'content': 0.13244377076625824, 'timestamp': '2025-10-02 00:35:14.145452', 'step': 13846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:14.213661', 'step': 13846, 'epoch': 2}
{'type': 'loss', 'content': 0.07719314098358154, 'timestamp': '2025-10-02 00:35:14.216596', 'step': 13847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:14.280912', 'step': 13847, 'epoch': 2}
{'type': 'loss', 'content': 0.053507354110479355, 'timestamp': '2025-10-02 00:35:14.292220', 'step': 13848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:14.345349', 'step': 13848, 'epoch': 2}
{'type': 'loss', 'content': 0.08355936408042908, 'timestamp': '2025-10-02 00:35:14.347952', 'step': 13849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:14.403037', 'step': 13849, 'epoch': 2}
{'type': 'loss', 'content': 0.08644820004701614, 'timestamp': '2025-10-02 00:35:14.412412', 'step': 13850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:14.466289', 'step': 13850, 'epoch': 2}
{'type': 'loss', 'content': 0.01433480717241764, 'timestamp': '2025-10-02 00:35:14.468697', 'step': 13851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:14.521920', 'step': 13851, 'epoch': 2}
{'type': 'loss', 'content': 0.16700510680675507, 'timestamp': '2025-10-02 00:35:14.527680', 'step': 13852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:14.581783', 'step': 13852, 'epoch': 2}
{'type': 'loss', 'content': 0.08434159308671951, 'timestamp': '2025-10-02 00:35:14.584342', 'step': 13853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:14.638823', 'step': 13853, 'epoch': 2}
{'type': 'loss', 'content': 0.036710143089294434, 'timestamp': '2025-10-02 00:35:14.646627', 'step': 13854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:35:14.714340', 'step': 13854, 'epoch': 2}
{'type': 'loss', 'content': 0.06838735193014145, 'timestamp': '2025-10-02 00:35:14.726324', 'step': 13855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:35:14.789054', 'step': 13855, 'epoch': 2}
{'type': 'loss', 'content': 0.032237499952316284, 'timestamp': '2025-10-02 00:35:14.800470', 'step': 13856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:14.853584', 'step': 13856, 'epoch': 2}
{'type': 'loss', 'content': 0.10028999298810959, 'timestamp': '2025-10-02 00:35:14.859632', 'step': 13857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:14.912985', 'step': 13857, 'epoch': 2}
{'type': 'loss', 'content': 0.044689275324344635, 'timestamp': '2025-10-02 00:35:14.915128', 'step': 13858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:14.970717', 'step': 13858, 'epoch': 2}
{'type': 'loss', 'content': 0.07435523718595505, 'timestamp': '2025-10-02 00:35:14.978651', 'step': 13859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:15.032275', 'step': 13859, 'epoch': 2}
{'type': 'loss', 'content': 0.07954595237970352, 'timestamp': '2025-10-02 00:35:15.038132', 'step': 13860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:15.091592', 'step': 13860, 'epoch': 2}
{'type': 'loss', 'content': 0.09946107119321823, 'timestamp': '2025-10-02 00:35:15.097979', 'step': 13861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:15.151479', 'step': 13861, 'epoch': 2}
{'type': 'loss', 'content': 0.14431406557559967, 'timestamp': '2025-10-02 00:35:15.153601', 'step': 13862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:15.210656', 'step': 13862, 'epoch': 2}
{'type': 'loss', 'content': 0.05910729616880417, 'timestamp': '2025-10-02 00:35:15.218304', 'step': 13863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:35:15.304806', 'step': 13863, 'epoch': 2}
{'type': 'loss', 'content': 0.020578749477863312, 'timestamp': '2025-10-02 00:35:15.325203', 'step': 13864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:15.397543', 'step': 13864, 'epoch': 2}
{'type': 'loss', 'content': 0.05461925268173218, 'timestamp': '2025-10-02 00:35:15.405345', 'step': 13865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:15.490245', 'step': 13865, 'epoch': 2}
{'type': 'loss', 'content': 0.04887517914175987, 'timestamp': '2025-10-02 00:35:15.494217', 'step': 13866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:15.566793', 'step': 13866, 'epoch': 2}
{'type': 'loss', 'content': 0.0402383953332901, 'timestamp': '2025-10-02 00:35:15.570372', 'step': 13867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:15.663628', 'step': 13867, 'epoch': 2}
{'type': 'loss', 'content': 0.19739286601543427, 'timestamp': '2025-10-02 00:35:15.670597', 'step': 13868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:15.765643', 'step': 13868, 'epoch': 2}
{'type': 'loss', 'content': 0.08990686386823654, 'timestamp': '2025-10-02 00:35:15.773520', 'step': 13869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:15.844142', 'step': 13869, 'epoch': 2}
{'type': 'loss', 'content': 0.12822061777114868, 'timestamp': '2025-10-02 00:35:15.850877', 'step': 13870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:15.917097', 'step': 13870, 'epoch': 2}
{'type': 'loss', 'content': 0.020030204206705093, 'timestamp': '2025-10-02 00:35:15.928169', 'step': 13871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:16.005661', 'step': 13871, 'epoch': 2}
{'type': 'loss', 'content': 0.045303959399461746, 'timestamp': '2025-10-02 00:35:16.012647', 'step': 13872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:35:16.071598', 'step': 13872, 'epoch': 2}
{'type': 'loss', 'content': 0.0861213356256485, 'timestamp': '2025-10-02 00:35:16.074904', 'step': 13873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:16.132172', 'step': 13873, 'epoch': 2}
{'type': 'loss', 'content': 0.11935287714004517, 'timestamp': '2025-10-02 00:35:16.134571', 'step': 13874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:16.215487', 'step': 13874, 'epoch': 2}
{'type': 'loss', 'content': 0.039471518248319626, 'timestamp': '2025-10-02 00:35:16.218987', 'step': 13875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:16.281348', 'step': 13875, 'epoch': 2}
{'type': 'loss', 'content': 0.05353386327624321, 'timestamp': '2025-10-02 00:35:16.291902', 'step': 13876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:16.357317', 'step': 13876, 'epoch': 2}
{'type': 'loss', 'content': 0.11967337876558304, 'timestamp': '2025-10-02 00:35:16.360283', 'step': 13877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:16.415332', 'step': 13877, 'epoch': 2}
{'type': 'loss', 'content': 0.18254050612449646, 'timestamp': '2025-10-02 00:35:16.421455', 'step': 13878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:16.487610', 'step': 13878, 'epoch': 2}
{'type': 'loss', 'content': 0.1619480550289154, 'timestamp': '2025-10-02 00:35:16.491440', 'step': 13879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:16.553030', 'step': 13879, 'epoch': 2}
{'type': 'loss', 'content': 0.030171865597367287, 'timestamp': '2025-10-02 00:35:16.559879', 'step': 13880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:16.621660', 'step': 13880, 'epoch': 2}
{'type': 'loss', 'content': 0.024360064417123795, 'timestamp': '2025-10-02 00:35:16.629490', 'step': 13881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:16.697483', 'step': 13881, 'epoch': 2}
{'type': 'loss', 'content': 0.12403982877731323, 'timestamp': '2025-10-02 00:35:16.700115', 'step': 13882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:16.770580', 'step': 13882, 'epoch': 2}
{'type': 'loss', 'content': 0.06628328561782837, 'timestamp': '2025-10-02 00:35:16.774734', 'step': 13883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:16.836201', 'step': 13883, 'epoch': 2}
{'type': 'loss', 'content': 0.04407195746898651, 'timestamp': '2025-10-02 00:35:16.846298', 'step': 13884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:16.907683', 'step': 13884, 'epoch': 2}
{'type': 'loss', 'content': 0.1566542387008667, 'timestamp': '2025-10-02 00:35:16.915768', 'step': 13885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:16.991812', 'step': 13885, 'epoch': 2}
{'type': 'loss', 'content': 0.11565329879522324, 'timestamp': '2025-10-02 00:35:16.994888', 'step': 13886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:17.061353', 'step': 13886, 'epoch': 2}
{'type': 'loss', 'content': 0.06855107098817825, 'timestamp': '2025-10-02 00:35:17.072353', 'step': 13887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:35:17.155052', 'step': 13887, 'epoch': 2}
{'type': 'loss', 'content': 0.010616778396070004, 'timestamp': '2025-10-02 00:35:17.170340', 'step': 13888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:17.244571', 'step': 13888, 'epoch': 2}
{'type': 'loss', 'content': 0.030591491609811783, 'timestamp': '2025-10-02 00:35:17.258455', 'step': 13889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:17.336029', 'step': 13889, 'epoch': 2}
{'type': 'loss', 'content': 0.14145055413246155, 'timestamp': '2025-10-02 00:35:17.341287', 'step': 13890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:17.403952', 'step': 13890, 'epoch': 2}
{'type': 'loss', 'content': 0.027201954275369644, 'timestamp': '2025-10-02 00:35:17.415939', 'step': 13891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:17.487949', 'step': 13891, 'epoch': 2}
{'type': 'loss', 'content': 0.013471839018166065, 'timestamp': '2025-10-02 00:35:17.498095', 'step': 13892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:17.565900', 'step': 13892, 'epoch': 2}
{'type': 'loss', 'content': 0.05126641318202019, 'timestamp': '2025-10-02 00:35:17.570380', 'step': 13893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:17.626305', 'step': 13893, 'epoch': 2}
{'type': 'loss', 'content': 0.15262891352176666, 'timestamp': '2025-10-02 00:35:17.634677', 'step': 13894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:17.707503', 'step': 13894, 'epoch': 2}
{'type': 'loss', 'content': 0.08689158409833908, 'timestamp': '2025-10-02 00:35:17.716507', 'step': 13895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:17.781485', 'step': 13895, 'epoch': 2}
{'type': 'loss', 'content': 0.08011291176080704, 'timestamp': '2025-10-02 00:35:17.793915', 'step': 13896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:17.849597', 'step': 13896, 'epoch': 2}
{'type': 'loss', 'content': 0.00848450232297182, 'timestamp': '2025-10-02 00:35:17.857460', 'step': 13897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:17.934174', 'step': 13897, 'epoch': 2}
{'type': 'loss', 'content': 0.12446651607751846, 'timestamp': '2025-10-02 00:35:17.941043', 'step': 13898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:18.008833', 'step': 13898, 'epoch': 2}
{'type': 'loss', 'content': 0.06868113577365875, 'timestamp': '2025-10-02 00:35:18.017234', 'step': 13899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:18.088606', 'step': 13899, 'epoch': 2}
{'type': 'loss', 'content': 0.01959419809281826, 'timestamp': '2025-10-02 00:35:18.095068', 'step': 13900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:18.150686', 'step': 13900, 'epoch': 2}
{'type': 'loss', 'content': 0.08382593095302582, 'timestamp': '2025-10-02 00:35:18.156980', 'step': 13901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:35:18.221338', 'step': 13901, 'epoch': 2}
{'type': 'loss', 'content': 0.013148908503353596, 'timestamp': '2025-10-02 00:35:18.231993', 'step': 13902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:18.295476', 'step': 13902, 'epoch': 2}
{'type': 'loss', 'content': 0.05009181424975395, 'timestamp': '2025-10-02 00:35:18.305894', 'step': 13903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:18.368358', 'step': 13903, 'epoch': 2}
{'type': 'loss', 'content': 0.028871644288301468, 'timestamp': '2025-10-02 00:35:18.378670', 'step': 13904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:18.444522', 'step': 13904, 'epoch': 2}
{'type': 'loss', 'content': 0.05692649260163307, 'timestamp': '2025-10-02 00:35:18.455981', 'step': 13905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:18.530942', 'step': 13905, 'epoch': 2}
{'type': 'loss', 'content': 0.04111049696803093, 'timestamp': '2025-10-02 00:35:18.533385', 'step': 13906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:35:18.605510', 'step': 13906, 'epoch': 2}
{'type': 'loss', 'content': 0.1476162225008011, 'timestamp': '2025-10-02 00:35:18.608910', 'step': 13907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:18.675578', 'step': 13907, 'epoch': 2}
{'type': 'loss', 'content': 0.08031861484050751, 'timestamp': '2025-10-02 00:35:18.683959', 'step': 13908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:18.775474', 'step': 13908, 'epoch': 2}
{'type': 'loss', 'content': 0.08565595000982285, 'timestamp': '2025-10-02 00:35:18.781521', 'step': 13909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:18.850180', 'step': 13909, 'epoch': 2}
{'type': 'loss', 'content': 0.0809161365032196, 'timestamp': '2025-10-02 00:35:18.859743', 'step': 13910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:18.923995', 'step': 13910, 'epoch': 2}
{'type': 'loss', 'content': 0.041836224496364594, 'timestamp': '2025-10-02 00:35:18.935481', 'step': 13911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:19.001703', 'step': 13911, 'epoch': 2}
{'type': 'loss', 'content': 0.03867444023489952, 'timestamp': '2025-10-02 00:35:19.008738', 'step': 13912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:19.071642', 'step': 13912, 'epoch': 2}
{'type': 'loss', 'content': 0.035674259066581726, 'timestamp': '2025-10-02 00:35:19.079216', 'step': 13913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:19.139315', 'step': 13913, 'epoch': 2}
{'type': 'loss', 'content': 0.042234957218170166, 'timestamp': '2025-10-02 00:35:19.146722', 'step': 13914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:19.218402', 'step': 13914, 'epoch': 2}
{'type': 'loss', 'content': 0.038691196590662, 'timestamp': '2025-10-02 00:35:19.222949', 'step': 13915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:19.297736', 'step': 13915, 'epoch': 2}
{'type': 'loss', 'content': 0.05783655494451523, 'timestamp': '2025-10-02 00:35:19.304803', 'step': 13916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:19.367226', 'step': 13916, 'epoch': 2}
{'type': 'loss', 'content': 0.09461700171232224, 'timestamp': '2025-10-02 00:35:19.375047', 'step': 13917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:35:19.468188', 'step': 13917, 'epoch': 2}
{'type': 'loss', 'content': 0.003924443386495113, 'timestamp': '2025-10-02 00:35:19.483106', 'step': 13918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:19.564475', 'step': 13918, 'epoch': 2}
{'type': 'loss', 'content': 0.007542183622717857, 'timestamp': '2025-10-02 00:35:19.574016', 'step': 13919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:35:19.637203', 'step': 13919, 'epoch': 2}
{'type': 'loss', 'content': 0.08759720623493195, 'timestamp': '2025-10-02 00:35:19.643578', 'step': 13920, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:35:47.672375', 'step': 13920, 'epoch': 2}
{'type': 'pplx', 'content': 106.70746474968041, 'timestamp': '2025-10-02 00:35:47.675891', 'step': 13920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:47.732086', 'step': 13920, 'epoch': 2}
{'type': 'loss', 'content': 0.05929220840334892, 'timestamp': '2025-10-02 00:35:47.734622', 'step': 13921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:47.790171', 'step': 13921, 'epoch': 2}
{'type': 'loss', 'content': 0.031371068209409714, 'timestamp': '2025-10-02 00:35:47.795249', 'step': 13922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:47.852395', 'step': 13922, 'epoch': 2}
{'type': 'loss', 'content': 0.10220324248075485, 'timestamp': '2025-10-02 00:35:47.861466', 'step': 13923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:47.916881', 'step': 13923, 'epoch': 2}
{'type': 'loss', 'content': 0.03535737097263336, 'timestamp': '2025-10-02 00:35:47.923286', 'step': 13924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:47.977485', 'step': 13924, 'epoch': 2}
{'type': 'loss', 'content': 0.06398814916610718, 'timestamp': '2025-10-02 00:35:47.980922', 'step': 13925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:48.035227', 'step': 13925, 'epoch': 2}
{'type': 'loss', 'content': 0.1676449477672577, 'timestamp': '2025-10-02 00:35:48.037836', 'step': 13926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:35:48.107815', 'step': 13926, 'epoch': 2}
{'type': 'loss', 'content': 0.012471782974898815, 'timestamp': '2025-10-02 00:35:48.120148', 'step': 13927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:48.174040', 'step': 13927, 'epoch': 2}
{'type': 'loss', 'content': 0.08242825418710709, 'timestamp': '2025-10-02 00:35:48.184353', 'step': 13928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:48.249447', 'step': 13928, 'epoch': 2}
{'type': 'loss', 'content': 0.025237513706088066, 'timestamp': '2025-10-02 00:35:48.255585', 'step': 13929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:48.310140', 'step': 13929, 'epoch': 2}
{'type': 'loss', 'content': 0.018914181739091873, 'timestamp': '2025-10-02 00:35:48.316067', 'step': 13930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:48.371042', 'step': 13930, 'epoch': 2}
{'type': 'loss', 'content': 0.08560136705636978, 'timestamp': '2025-10-02 00:35:48.373455', 'step': 13931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:48.428777', 'step': 13931, 'epoch': 2}
{'type': 'loss', 'content': 0.11553602665662766, 'timestamp': '2025-10-02 00:35:48.435767', 'step': 13932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:48.490579', 'step': 13932, 'epoch': 2}
{'type': 'loss', 'content': 0.055316273123025894, 'timestamp': '2025-10-02 00:35:48.498481', 'step': 13933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:48.553487', 'step': 13933, 'epoch': 2}
{'type': 'loss', 'content': 0.023193854838609695, 'timestamp': '2025-10-02 00:35:48.561266', 'step': 13934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:48.618104', 'step': 13934, 'epoch': 2}
{'type': 'loss', 'content': 0.06254371255636215, 'timestamp': '2025-10-02 00:35:48.620495', 'step': 13935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:48.679478', 'step': 13935, 'epoch': 2}
{'type': 'loss', 'content': 0.013359872624278069, 'timestamp': '2025-10-02 00:35:48.686435', 'step': 13936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:48.746365', 'step': 13936, 'epoch': 2}
{'type': 'loss', 'content': 0.053705114871263504, 'timestamp': '2025-10-02 00:35:48.757336', 'step': 13937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:48.815181', 'step': 13937, 'epoch': 2}
{'type': 'loss', 'content': 0.007534342352300882, 'timestamp': '2025-10-02 00:35:48.822894', 'step': 13938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:48.882421', 'step': 13938, 'epoch': 2}
{'type': 'loss', 'content': 0.19103413820266724, 'timestamp': '2025-10-02 00:35:48.885059', 'step': 13939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:48.947297', 'step': 13939, 'epoch': 2}
{'type': 'loss', 'content': 0.14445850253105164, 'timestamp': '2025-10-02 00:35:48.954583', 'step': 13940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:35:49.024222', 'step': 13940, 'epoch': 2}
{'type': 'loss', 'content': 0.054286934435367584, 'timestamp': '2025-10-02 00:35:49.038012', 'step': 13941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:49.094881', 'step': 13941, 'epoch': 2}
{'type': 'loss', 'content': 0.04975882172584534, 'timestamp': '2025-10-02 00:35:49.100344', 'step': 13942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:49.156787', 'step': 13942, 'epoch': 2}
{'type': 'loss', 'content': 0.12698960304260254, 'timestamp': '2025-10-02 00:35:49.160010', 'step': 13943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:49.219595', 'step': 13943, 'epoch': 2}
{'type': 'loss', 'content': 0.0970357358455658, 'timestamp': '2025-10-02 00:35:49.226474', 'step': 13944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:35:49.288812', 'step': 13944, 'epoch': 2}
{'type': 'loss', 'content': 0.036711543798446655, 'timestamp': '2025-10-02 00:35:49.300381', 'step': 13945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:49.356913', 'step': 13945, 'epoch': 2}
{'type': 'loss', 'content': 0.045539844781160355, 'timestamp': '2025-10-02 00:35:49.360025', 'step': 13946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:49.415966', 'step': 13946, 'epoch': 2}
{'type': 'loss', 'content': 0.019216381013393402, 'timestamp': '2025-10-02 00:35:49.422587', 'step': 13947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:49.488860', 'step': 13947, 'epoch': 2}
{'type': 'loss', 'content': 0.11181750148534775, 'timestamp': '2025-10-02 00:35:49.495908', 'step': 13948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:49.553933', 'step': 13948, 'epoch': 2}
{'type': 'loss', 'content': 0.15621086955070496, 'timestamp': '2025-10-02 00:35:49.557674', 'step': 13949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:49.613279', 'step': 13949, 'epoch': 2}
{'type': 'loss', 'content': 0.0487547367811203, 'timestamp': '2025-10-02 00:35:49.616107', 'step': 13950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:49.672719', 'step': 13950, 'epoch': 2}
{'type': 'loss', 'content': 0.04165472462773323, 'timestamp': '2025-10-02 00:35:49.676412', 'step': 13951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:49.732422', 'step': 13951, 'epoch': 2}
{'type': 'loss', 'content': 0.03312946483492851, 'timestamp': '2025-10-02 00:35:49.739438', 'step': 13952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:49.797042', 'step': 13952, 'epoch': 2}
{'type': 'loss', 'content': 0.01214586477726698, 'timestamp': '2025-10-02 00:35:49.807293', 'step': 13953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:49.862368', 'step': 13953, 'epoch': 2}
{'type': 'loss', 'content': 0.07384121417999268, 'timestamp': '2025-10-02 00:35:49.870222', 'step': 13954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:49.925863', 'step': 13954, 'epoch': 2}
{'type': 'loss', 'content': 0.06772106885910034, 'timestamp': '2025-10-02 00:35:49.932139', 'step': 13955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:49.987340', 'step': 13955, 'epoch': 2}
{'type': 'loss', 'content': 0.16785122454166412, 'timestamp': '2025-10-02 00:35:49.994561', 'step': 13956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:50.051478', 'step': 13956, 'epoch': 2}
{'type': 'loss', 'content': 0.0970480889081955, 'timestamp': '2025-10-02 00:35:50.054847', 'step': 13957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:50.112011', 'step': 13957, 'epoch': 2}
{'type': 'loss', 'content': 0.09978342056274414, 'timestamp': '2025-10-02 00:35:50.115420', 'step': 13958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:50.180953', 'step': 13958, 'epoch': 2}
{'type': 'loss', 'content': 0.026504794135689735, 'timestamp': '2025-10-02 00:35:50.191432', 'step': 13959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:50.249240', 'step': 13959, 'epoch': 2}
{'type': 'loss', 'content': 0.0670958012342453, 'timestamp': '2025-10-02 00:35:50.256279', 'step': 13960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:50.314634', 'step': 13960, 'epoch': 2}
{'type': 'loss', 'content': 0.10215216130018234, 'timestamp': '2025-10-02 00:35:50.316951', 'step': 13961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:50.375121', 'step': 13961, 'epoch': 2}
{'type': 'loss', 'content': 0.08486930280923843, 'timestamp': '2025-10-02 00:35:50.377364', 'step': 13962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:50.434811', 'step': 13962, 'epoch': 2}
{'type': 'loss', 'content': 0.06297407299280167, 'timestamp': '2025-10-02 00:35:50.438052', 'step': 13963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:50.494888', 'step': 13963, 'epoch': 2}
{'type': 'loss', 'content': 0.0949711725115776, 'timestamp': '2025-10-02 00:35:50.501389', 'step': 13964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:50.558719', 'step': 13964, 'epoch': 2}
{'type': 'loss', 'content': 0.07202178239822388, 'timestamp': '2025-10-02 00:35:50.562992', 'step': 13965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:50.616929', 'step': 13965, 'epoch': 2}
{'type': 'loss', 'content': 0.03051184117794037, 'timestamp': '2025-10-02 00:35:50.620490', 'step': 13966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:50.677624', 'step': 13966, 'epoch': 2}
{'type': 'loss', 'content': 0.055617161095142365, 'timestamp': '2025-10-02 00:35:50.683847', 'step': 13967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:50.744006', 'step': 13967, 'epoch': 2}
{'type': 'loss', 'content': 0.08498158305883408, 'timestamp': '2025-10-02 00:35:50.750944', 'step': 13968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:50.810789', 'step': 13968, 'epoch': 2}
{'type': 'loss', 'content': 0.047068268060684204, 'timestamp': '2025-10-02 00:35:50.821721', 'step': 13969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:50.877272', 'step': 13969, 'epoch': 2}
{'type': 'loss', 'content': 0.03882906958460808, 'timestamp': '2025-10-02 00:35:50.879584', 'step': 13970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:50.938594', 'step': 13970, 'epoch': 2}
{'type': 'loss', 'content': 0.05678832530975342, 'timestamp': '2025-10-02 00:35:50.948769', 'step': 13971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:35:51.022926', 'step': 13971, 'epoch': 2}
{'type': 'loss', 'content': 0.012316199019551277, 'timestamp': '2025-10-02 00:35:51.036961', 'step': 13972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:51.090827', 'step': 13972, 'epoch': 2}
{'type': 'loss', 'content': 0.05949721485376358, 'timestamp': '2025-10-02 00:35:51.093228', 'step': 13973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:51.146949', 'step': 13973, 'epoch': 2}
{'type': 'loss', 'content': 0.07221496105194092, 'timestamp': '2025-10-02 00:35:51.149743', 'step': 13974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:51.203898', 'step': 13974, 'epoch': 2}
{'type': 'loss', 'content': 0.0744662880897522, 'timestamp': '2025-10-02 00:35:51.213231', 'step': 13975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:51.267957', 'step': 13975, 'epoch': 2}
{'type': 'loss', 'content': 0.06505095213651657, 'timestamp': '2025-10-02 00:35:51.278113', 'step': 13976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:51.331918', 'step': 13976, 'epoch': 2}
{'type': 'loss', 'content': 0.0531567819416523, 'timestamp': '2025-10-02 00:35:51.341829', 'step': 13977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:51.396744', 'step': 13977, 'epoch': 2}
{'type': 'loss', 'content': 0.1473482847213745, 'timestamp': '2025-10-02 00:35:51.399096', 'step': 13978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:51.456603', 'step': 13978, 'epoch': 2}
{'type': 'loss', 'content': 0.04584909975528717, 'timestamp': '2025-10-02 00:35:51.459256', 'step': 13979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:51.512927', 'step': 13979, 'epoch': 2}
{'type': 'loss', 'content': 0.06197517737746239, 'timestamp': '2025-10-02 00:35:51.520744', 'step': 13980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:51.573838', 'step': 13980, 'epoch': 2}
{'type': 'loss', 'content': 0.1064743921160698, 'timestamp': '2025-10-02 00:35:51.576337', 'step': 13981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:51.629745', 'step': 13981, 'epoch': 2}
{'type': 'loss', 'content': 0.07487723976373672, 'timestamp': '2025-10-02 00:35:51.637588', 'step': 13982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:51.691748', 'step': 13982, 'epoch': 2}
{'type': 'loss', 'content': 0.0918743908405304, 'timestamp': '2025-10-02 00:35:51.693899', 'step': 13983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:51.748029', 'step': 13983, 'epoch': 2}
{'type': 'loss', 'content': 0.12790295481681824, 'timestamp': '2025-10-02 00:35:51.753653', 'step': 13984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:51.807173', 'step': 13984, 'epoch': 2}
{'type': 'loss', 'content': 0.11519225686788559, 'timestamp': '2025-10-02 00:35:51.809552', 'step': 13985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:51.863311', 'step': 13985, 'epoch': 2}
{'type': 'loss', 'content': 0.05866437032818794, 'timestamp': '2025-10-02 00:35:51.866071', 'step': 13986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:51.919875', 'step': 13986, 'epoch': 2}
{'type': 'loss', 'content': 0.11085118353366852, 'timestamp': '2025-10-02 00:35:51.925036', 'step': 13987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:51.979148', 'step': 13987, 'epoch': 2}
{'type': 'loss', 'content': 0.030697064474225044, 'timestamp': '2025-10-02 00:35:51.985664', 'step': 13988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:52.038778', 'step': 13988, 'epoch': 2}
{'type': 'loss', 'content': 0.05934785678982735, 'timestamp': '2025-10-02 00:35:52.041162', 'step': 13989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:52.094713', 'step': 13989, 'epoch': 2}
{'type': 'loss', 'content': 0.10401485115289688, 'timestamp': '2025-10-02 00:35:52.096917', 'step': 13990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:52.151504', 'step': 13990, 'epoch': 2}
{'type': 'loss', 'content': 0.0693698450922966, 'timestamp': '2025-10-02 00:35:52.154223', 'step': 13991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:52.209086', 'step': 13991, 'epoch': 2}
{'type': 'loss', 'content': 0.05127835273742676, 'timestamp': '2025-10-02 00:35:52.214886', 'step': 13992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:52.268362', 'step': 13992, 'epoch': 2}
{'type': 'loss', 'content': 0.03793695196509361, 'timestamp': '2025-10-02 00:35:52.270724', 'step': 13993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:52.325536', 'step': 13993, 'epoch': 2}
{'type': 'loss', 'content': 0.036348607391119, 'timestamp': '2025-10-02 00:35:52.328038', 'step': 13994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:52.381887', 'step': 13994, 'epoch': 2}
{'type': 'loss', 'content': 0.06020723655819893, 'timestamp': '2025-10-02 00:35:52.388082', 'step': 13995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:52.442779', 'step': 13995, 'epoch': 2}
{'type': 'loss', 'content': 0.04859791323542595, 'timestamp': '2025-10-02 00:35:52.448606', 'step': 13996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:52.509056', 'step': 13996, 'epoch': 2}
{'type': 'loss', 'content': 0.016020698472857475, 'timestamp': '2025-10-02 00:35:52.520449', 'step': 13997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:52.574484', 'step': 13997, 'epoch': 2}
{'type': 'loss', 'content': 0.14054502546787262, 'timestamp': '2025-10-02 00:35:52.576860', 'step': 13998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:35:52.645283', 'step': 13998, 'epoch': 2}
{'type': 'loss', 'content': 0.032873235642910004, 'timestamp': '2025-10-02 00:35:52.657277', 'step': 13999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:52.712138', 'step': 13999, 'epoch': 2}
{'type': 'loss', 'content': 0.09159956872463226, 'timestamp': '2025-10-02 00:35:52.722275', 'step': 14000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 14000', 'timestamp': '2025-10-02 00:35:53.161204', 'step': 14000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:53.216653', 'step': 14000, 'epoch': 2}
{'type': 'loss', 'content': 0.07615642249584198, 'timestamp': '2025-10-02 00:35:53.219023', 'step': 14001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:53.272780', 'step': 14001, 'epoch': 2}
{'type': 'loss', 'content': 0.0455506406724453, 'timestamp': '2025-10-02 00:35:53.278694', 'step': 14002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:53.333226', 'step': 14002, 'epoch': 2}
{'type': 'loss', 'content': 0.010332499630749226, 'timestamp': '2025-10-02 00:35:53.342587', 'step': 14003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:53.397370', 'step': 14003, 'epoch': 2}
{'type': 'loss', 'content': 0.05482641980051994, 'timestamp': '2025-10-02 00:35:53.403360', 'step': 14004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:53.456618', 'step': 14004, 'epoch': 2}
{'type': 'loss', 'content': 0.1809164136648178, 'timestamp': '2025-10-02 00:35:53.458742', 'step': 14005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:53.514610', 'step': 14005, 'epoch': 2}
{'type': 'loss', 'content': 0.014021163806319237, 'timestamp': '2025-10-02 00:35:53.516799', 'step': 14006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:53.571461', 'step': 14006, 'epoch': 2}
{'type': 'loss', 'content': 0.037348754703998566, 'timestamp': '2025-10-02 00:35:53.579167', 'step': 14007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:53.632593', 'step': 14007, 'epoch': 2}
{'type': 'loss', 'content': 0.0867338702082634, 'timestamp': '2025-10-02 00:35:53.638523', 'step': 14008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:53.691963', 'step': 14008, 'epoch': 2}
{'type': 'loss', 'content': 0.052998967468738556, 'timestamp': '2025-10-02 00:35:53.694527', 'step': 14009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:53.748720', 'step': 14009, 'epoch': 2}
{'type': 'loss', 'content': 0.03365498036146164, 'timestamp': '2025-10-02 00:35:53.751224', 'step': 14010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:53.805934', 'step': 14010, 'epoch': 2}
{'type': 'loss', 'content': 0.035685718059539795, 'timestamp': '2025-10-02 00:35:53.808702', 'step': 14011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:53.862293', 'step': 14011, 'epoch': 2}
{'type': 'loss', 'content': 0.11214718222618103, 'timestamp': '2025-10-02 00:35:53.868134', 'step': 14012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:53.921580', 'step': 14012, 'epoch': 2}
{'type': 'loss', 'content': 0.037251222878694534, 'timestamp': '2025-10-02 00:35:53.931405', 'step': 14013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:35:53.994509', 'step': 14013, 'epoch': 2}
{'type': 'loss', 'content': 0.04530032351613045, 'timestamp': '2025-10-02 00:35:54.005367', 'step': 14014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:54.060942', 'step': 14014, 'epoch': 2}
{'type': 'loss', 'content': 0.02384629100561142, 'timestamp': '2025-10-02 00:35:54.070272', 'step': 14015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:54.124159', 'step': 14015, 'epoch': 2}
{'type': 'loss', 'content': 0.12115606665611267, 'timestamp': '2025-10-02 00:35:54.130307', 'step': 14016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:54.184888', 'step': 14016, 'epoch': 2}
{'type': 'loss', 'content': 0.14402233064174652, 'timestamp': '2025-10-02 00:35:54.187363', 'step': 14017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:54.240917', 'step': 14017, 'epoch': 2}
{'type': 'loss', 'content': 0.08272937685251236, 'timestamp': '2025-10-02 00:35:54.247280', 'step': 14018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:54.301944', 'step': 14018, 'epoch': 2}
{'type': 'loss', 'content': 0.028266040608286858, 'timestamp': '2025-10-02 00:35:54.304706', 'step': 14019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:54.358646', 'step': 14019, 'epoch': 2}
{'type': 'loss', 'content': 0.0723891481757164, 'timestamp': '2025-10-02 00:35:54.364471', 'step': 14020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:54.417130', 'step': 14020, 'epoch': 2}
{'type': 'loss', 'content': 0.06465406715869904, 'timestamp': '2025-10-02 00:35:54.419466', 'step': 14021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:54.473949', 'step': 14021, 'epoch': 2}
{'type': 'loss', 'content': 0.032252613455057144, 'timestamp': '2025-10-02 00:35:54.476160', 'step': 14022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:54.530411', 'step': 14022, 'epoch': 2}
{'type': 'loss', 'content': 0.043405063450336456, 'timestamp': '2025-10-02 00:35:54.532563', 'step': 14023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:54.590885', 'step': 14023, 'epoch': 2}
{'type': 'loss', 'content': 0.039877377450466156, 'timestamp': '2025-10-02 00:35:54.601826', 'step': 14024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:54.656271', 'step': 14024, 'epoch': 2}
{'type': 'loss', 'content': 0.08163117617368698, 'timestamp': '2025-10-02 00:35:54.658567', 'step': 14025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:54.712092', 'step': 14025, 'epoch': 2}
{'type': 'loss', 'content': 0.1172134056687355, 'timestamp': '2025-10-02 00:35:54.714262', 'step': 14026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:54.768653', 'step': 14026, 'epoch': 2}
{'type': 'loss', 'content': 0.03574514016509056, 'timestamp': '2025-10-02 00:35:54.777990', 'step': 14027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:54.832769', 'step': 14027, 'epoch': 2}
{'type': 'loss', 'content': 0.04292581230401993, 'timestamp': '2025-10-02 00:35:54.838352', 'step': 14028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:54.892101', 'step': 14028, 'epoch': 2}
{'type': 'loss', 'content': 0.012110210955142975, 'timestamp': '2025-10-02 00:35:54.898408', 'step': 14029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:54.953844', 'step': 14029, 'epoch': 2}
{'type': 'loss', 'content': 0.004388588014990091, 'timestamp': '2025-10-02 00:35:54.960171', 'step': 14030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:55.014498', 'step': 14030, 'epoch': 2}
{'type': 'loss', 'content': 0.03303937241435051, 'timestamp': '2025-10-02 00:35:55.020630', 'step': 14031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:55.075449', 'step': 14031, 'epoch': 2}
{'type': 'loss', 'content': 0.03653667867183685, 'timestamp': '2025-10-02 00:35:55.081416', 'step': 14032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:55.134520', 'step': 14032, 'epoch': 2}
{'type': 'loss', 'content': 0.05113661661744118, 'timestamp': '2025-10-02 00:35:55.136847', 'step': 14033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:55.190631', 'step': 14033, 'epoch': 2}
{'type': 'loss', 'content': 0.0697050616145134, 'timestamp': '2025-10-02 00:35:55.193369', 'step': 14034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:55.247694', 'step': 14034, 'epoch': 2}
{'type': 'loss', 'content': 0.028349244967103004, 'timestamp': '2025-10-02 00:35:55.250525', 'step': 14035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:55.304136', 'step': 14035, 'epoch': 2}
{'type': 'loss', 'content': 0.056608881801366806, 'timestamp': '2025-10-02 00:35:55.310090', 'step': 14036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:55.363848', 'step': 14036, 'epoch': 2}
{'type': 'loss', 'content': 0.0274018757045269, 'timestamp': '2025-10-02 00:35:55.370172', 'step': 14037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:55.424149', 'step': 14037, 'epoch': 2}
{'type': 'loss', 'content': 0.07559661567211151, 'timestamp': '2025-10-02 00:35:55.426613', 'step': 14038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:55.480321', 'step': 14038, 'epoch': 2}
{'type': 'loss', 'content': 0.15139205753803253, 'timestamp': '2025-10-02 00:35:55.482566', 'step': 14039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:55.538007', 'step': 14039, 'epoch': 2}
{'type': 'loss', 'content': 0.026432804763317108, 'timestamp': '2025-10-02 00:35:55.548153', 'step': 14040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:55.601443', 'step': 14040, 'epoch': 2}
{'type': 'loss', 'content': 0.09226185083389282, 'timestamp': '2025-10-02 00:35:55.603777', 'step': 14041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:55.657961', 'step': 14041, 'epoch': 2}
{'type': 'loss', 'content': 0.06812889128923416, 'timestamp': '2025-10-02 00:35:55.660157', 'step': 14042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:55.713634', 'step': 14042, 'epoch': 2}
{'type': 'loss', 'content': 0.024820854887366295, 'timestamp': '2025-10-02 00:35:55.716317', 'step': 14043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:55.769638', 'step': 14043, 'epoch': 2}
{'type': 'loss', 'content': 0.047066688537597656, 'timestamp': '2025-10-02 00:35:55.775844', 'step': 14044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:35:55.829284', 'step': 14044, 'epoch': 2}
{'type': 'loss', 'content': 0.038506608456373215, 'timestamp': '2025-10-02 00:35:55.839137', 'step': 14045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:55.893359', 'step': 14045, 'epoch': 2}
{'type': 'loss', 'content': 0.00984333548694849, 'timestamp': '2025-10-02 00:35:55.896028', 'step': 14046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:55.949607', 'step': 14046, 'epoch': 2}
{'type': 'loss', 'content': 0.023550143465399742, 'timestamp': '2025-10-02 00:35:55.952292', 'step': 14047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:56.011130', 'step': 14047, 'epoch': 2}
{'type': 'loss', 'content': 0.04850347712635994, 'timestamp': '2025-10-02 00:35:56.022115', 'step': 14048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:56.075769', 'step': 14048, 'epoch': 2}
{'type': 'loss', 'content': 0.028732625767588615, 'timestamp': '2025-10-02 00:35:56.078045', 'step': 14049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:56.132226', 'step': 14049, 'epoch': 2}
{'type': 'loss', 'content': 0.08743561059236526, 'timestamp': '2025-10-02 00:35:56.134408', 'step': 14050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:35:56.203274', 'step': 14050, 'epoch': 2}
{'type': 'loss', 'content': 0.059391267597675323, 'timestamp': '2025-10-02 00:35:56.215616', 'step': 14051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:56.270337', 'step': 14051, 'epoch': 2}
{'type': 'loss', 'content': 0.024136772379279137, 'timestamp': '2025-10-02 00:35:56.275995', 'step': 14052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:56.330602', 'step': 14052, 'epoch': 2}
{'type': 'loss', 'content': 0.04290848970413208, 'timestamp': '2025-10-02 00:35:56.338464', 'step': 14053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:56.393327', 'step': 14053, 'epoch': 2}
{'type': 'loss', 'content': 0.04116695001721382, 'timestamp': '2025-10-02 00:35:56.395428', 'step': 14054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:56.449494', 'step': 14054, 'epoch': 2}
{'type': 'loss', 'content': 0.13086163997650146, 'timestamp': '2025-10-02 00:35:56.451777', 'step': 14055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:56.506600', 'step': 14055, 'epoch': 2}
{'type': 'loss', 'content': 0.05503690242767334, 'timestamp': '2025-10-02 00:35:56.514945', 'step': 14056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:56.568653', 'step': 14056, 'epoch': 2}
{'type': 'loss', 'content': 0.03006506897509098, 'timestamp': '2025-10-02 00:35:56.571358', 'step': 14057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:56.625001', 'step': 14057, 'epoch': 2}
{'type': 'loss', 'content': 0.0914410799741745, 'timestamp': '2025-10-02 00:35:56.627287', 'step': 14058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:56.681646', 'step': 14058, 'epoch': 2}
{'type': 'loss', 'content': 0.047205694019794464, 'timestamp': '2025-10-02 00:35:56.685631', 'step': 14059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:56.742652', 'step': 14059, 'epoch': 2}
{'type': 'loss', 'content': 0.02470363676548004, 'timestamp': '2025-10-02 00:35:56.751213', 'step': 14060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:56.804471', 'step': 14060, 'epoch': 2}
{'type': 'loss', 'content': 0.1356336921453476, 'timestamp': '2025-10-02 00:35:56.806799', 'step': 14061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:56.861834', 'step': 14061, 'epoch': 2}
{'type': 'loss', 'content': 0.07895827293395996, 'timestamp': '2025-10-02 00:35:56.865419', 'step': 14062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:56.919332', 'step': 14062, 'epoch': 2}
{'type': 'loss', 'content': 0.04954775795340538, 'timestamp': '2025-10-02 00:35:56.927128', 'step': 14063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:56.981252', 'step': 14063, 'epoch': 2}
{'type': 'loss', 'content': 0.08424020558595657, 'timestamp': '2025-10-02 00:35:56.986875', 'step': 14064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:57.039718', 'step': 14064, 'epoch': 2}
{'type': 'loss', 'content': 0.18000100553035736, 'timestamp': '2025-10-02 00:35:57.042516', 'step': 14065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:57.098553', 'step': 14065, 'epoch': 2}
{'type': 'loss', 'content': 0.1189722865819931, 'timestamp': '2025-10-02 00:35:57.100889', 'step': 14066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:57.155330', 'step': 14066, 'epoch': 2}
{'type': 'loss', 'content': 0.0621635764837265, 'timestamp': '2025-10-02 00:35:57.162858', 'step': 14067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:57.216479', 'step': 14067, 'epoch': 2}
{'type': 'loss', 'content': 0.07238214462995529, 'timestamp': '2025-10-02 00:35:57.224369', 'step': 14068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:57.289274', 'step': 14068, 'epoch': 2}
{'type': 'loss', 'content': 0.13111449778079987, 'timestamp': '2025-10-02 00:35:57.300649', 'step': 14069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:57.355916', 'step': 14069, 'epoch': 2}
{'type': 'loss', 'content': 0.06528730690479279, 'timestamp': '2025-10-02 00:35:57.361879', 'step': 14070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:57.416621', 'step': 14070, 'epoch': 2}
{'type': 'loss', 'content': 0.014721662737429142, 'timestamp': '2025-10-02 00:35:57.419120', 'step': 14071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:57.478320', 'step': 14071, 'epoch': 2}
{'type': 'loss', 'content': 0.08320094645023346, 'timestamp': '2025-10-02 00:35:57.484099', 'step': 14072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:57.539878', 'step': 14072, 'epoch': 2}
{'type': 'loss', 'content': 0.04812711104750633, 'timestamp': '2025-10-02 00:35:57.545002', 'step': 14073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:57.601674', 'step': 14073, 'epoch': 2}
{'type': 'loss', 'content': 0.06273183971643448, 'timestamp': '2025-10-02 00:35:57.611217', 'step': 14074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:57.671771', 'step': 14074, 'epoch': 2}
{'type': 'loss', 'content': 0.06997945159673691, 'timestamp': '2025-10-02 00:35:57.681945', 'step': 14075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:57.743670', 'step': 14075, 'epoch': 2}
{'type': 'loss', 'content': 0.07682064175605774, 'timestamp': '2025-10-02 00:35:57.749388', 'step': 14076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:57.802598', 'step': 14076, 'epoch': 2}
{'type': 'loss', 'content': 0.07235701382160187, 'timestamp': '2025-10-02 00:35:57.804830', 'step': 14077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:57.859661', 'step': 14077, 'epoch': 2}
{'type': 'loss', 'content': 0.02398017793893814, 'timestamp': '2025-10-02 00:35:57.867419', 'step': 14078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:57.924089', 'step': 14078, 'epoch': 2}
{'type': 'loss', 'content': 0.16120749711990356, 'timestamp': '2025-10-02 00:35:57.927351', 'step': 14079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:57.988181', 'step': 14079, 'epoch': 2}
{'type': 'loss', 'content': 0.03681284561753273, 'timestamp': '2025-10-02 00:35:57.999194', 'step': 14080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:35:58.055686', 'step': 14080, 'epoch': 2}
{'type': 'loss', 'content': 0.06378094851970673, 'timestamp': '2025-10-02 00:35:58.063437', 'step': 14081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:58.117906', 'step': 14081, 'epoch': 2}
{'type': 'loss', 'content': 0.05147324129939079, 'timestamp': '2025-10-02 00:35:58.124172', 'step': 14082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:35:58.190041', 'step': 14082, 'epoch': 2}
{'type': 'loss', 'content': 0.016093861311674118, 'timestamp': '2025-10-02 00:35:58.200909', 'step': 14083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:58.256483', 'step': 14083, 'epoch': 2}
{'type': 'loss', 'content': 0.12113304436206818, 'timestamp': '2025-10-02 00:35:58.263134', 'step': 14084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:58.317062', 'step': 14084, 'epoch': 2}
{'type': 'loss', 'content': 0.14472615718841553, 'timestamp': '2025-10-02 00:35:58.320052', 'step': 14085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:35:58.382760', 'step': 14085, 'epoch': 2}
{'type': 'loss', 'content': 0.03674265742301941, 'timestamp': '2025-10-02 00:35:58.393212', 'step': 14086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:58.449094', 'step': 14086, 'epoch': 2}
{'type': 'loss', 'content': 0.03730082884430885, 'timestamp': '2025-10-02 00:35:58.452003', 'step': 14087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:58.508213', 'step': 14087, 'epoch': 2}
{'type': 'loss', 'content': 0.0891282856464386, 'timestamp': '2025-10-02 00:35:58.514855', 'step': 14088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:58.570839', 'step': 14088, 'epoch': 2}
{'type': 'loss', 'content': 0.05220198258757591, 'timestamp': '2025-10-02 00:35:58.573604', 'step': 14089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:35:58.630283', 'step': 14089, 'epoch': 2}
{'type': 'loss', 'content': 0.05738412216305733, 'timestamp': '2025-10-02 00:35:58.636127', 'step': 14090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:58.695166', 'step': 14090, 'epoch': 2}
{'type': 'loss', 'content': 0.019804302603006363, 'timestamp': '2025-10-02 00:35:58.704720', 'step': 14091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:58.761620', 'step': 14091, 'epoch': 2}
{'type': 'loss', 'content': 0.11490695178508759, 'timestamp': '2025-10-02 00:35:58.767406', 'step': 14092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:58.821488', 'step': 14092, 'epoch': 2}
{'type': 'loss', 'content': 0.14778903126716614, 'timestamp': '2025-10-02 00:35:58.824635', 'step': 14093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:35:58.880126', 'step': 14093, 'epoch': 2}
{'type': 'loss', 'content': 0.04432686045765877, 'timestamp': '2025-10-02 00:35:58.883037', 'step': 14094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:58.940486', 'step': 14094, 'epoch': 2}
{'type': 'loss', 'content': 0.06979257613420486, 'timestamp': '2025-10-02 00:35:58.942944', 'step': 14095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:35:58.999079', 'step': 14095, 'epoch': 2}
{'type': 'loss', 'content': 0.06555971503257751, 'timestamp': '2025-10-02 00:35:59.006051', 'step': 14096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:35:59.078258', 'step': 14096, 'epoch': 2}
{'type': 'loss', 'content': 0.052126094698905945, 'timestamp': '2025-10-02 00:35:59.091872', 'step': 14097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:59.148772', 'step': 14097, 'epoch': 2}
{'type': 'loss', 'content': 0.018750296905636787, 'timestamp': '2025-10-02 00:35:59.152041', 'step': 14098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:35:59.207938', 'step': 14098, 'epoch': 2}
{'type': 'loss', 'content': 0.08203894644975662, 'timestamp': '2025-10-02 00:35:59.210352', 'step': 14099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:35:59.267057', 'step': 14099, 'epoch': 2}
{'type': 'loss', 'content': 0.13272647559642792, 'timestamp': '2025-10-02 00:35:59.273230', 'step': 14100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:35:59.328633', 'step': 14100, 'epoch': 2}
{'type': 'loss', 'content': 0.012365217320621014, 'timestamp': '2025-10-02 00:35:59.330714', 'step': 14101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:35:59.384457', 'step': 14101, 'epoch': 2}
{'type': 'loss', 'content': 0.09900902956724167, 'timestamp': '2025-10-02 00:35:59.386943', 'step': 14102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:35:59.441214', 'step': 14102, 'epoch': 2}
{'type': 'loss', 'content': 0.05429529398679733, 'timestamp': '2025-10-02 00:35:59.443727', 'step': 14103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:59.496992', 'step': 14103, 'epoch': 2}
{'type': 'loss', 'content': 0.09956240653991699, 'timestamp': '2025-10-02 00:35:59.502862', 'step': 14104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:35:59.556771', 'step': 14104, 'epoch': 2}
{'type': 'loss', 'content': 0.08212079852819443, 'timestamp': '2025-10-02 00:35:59.559278', 'step': 14105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:35:59.623221', 'step': 14105, 'epoch': 2}
{'type': 'loss', 'content': 0.021153641864657402, 'timestamp': '2025-10-02 00:35:59.633994', 'step': 14106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:35:59.689367', 'step': 14106, 'epoch': 2}
{'type': 'loss', 'content': 0.05994157865643501, 'timestamp': '2025-10-02 00:35:59.698832', 'step': 14107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:35:59.757540', 'step': 14107, 'epoch': 2}
{'type': 'loss', 'content': 0.030502041801810265, 'timestamp': '2025-10-02 00:35:59.768513', 'step': 14108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:35:59.821649', 'step': 14108, 'epoch': 2}
{'type': 'loss', 'content': 0.07092379033565521, 'timestamp': '2025-10-02 00:35:59.825003', 'step': 14109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:59.879068', 'step': 14109, 'epoch': 2}
{'type': 'loss', 'content': 0.16784676909446716, 'timestamp': '2025-10-02 00:35:59.881389', 'step': 14110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:35:59.936101', 'step': 14110, 'epoch': 2}
{'type': 'loss', 'content': 0.171432763338089, 'timestamp': '2025-10-02 00:35:59.938388', 'step': 14111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:35:59.998238', 'step': 14111, 'epoch': 2}
{'type': 'loss', 'content': 0.12475601583719254, 'timestamp': '2025-10-02 00:36:00.004124', 'step': 14112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:00.058206', 'step': 14112, 'epoch': 2}
{'type': 'loss', 'content': 0.026032933965325356, 'timestamp': '2025-10-02 00:36:00.064540', 'step': 14113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:00.118089', 'step': 14113, 'epoch': 2}
{'type': 'loss', 'content': 0.2104906290769577, 'timestamp': '2025-10-02 00:36:00.120352', 'step': 14114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:00.175499', 'step': 14114, 'epoch': 2}
{'type': 'loss', 'content': 0.027879005298018456, 'timestamp': '2025-10-02 00:36:00.178315', 'step': 14115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:00.232880', 'step': 14115, 'epoch': 2}
{'type': 'loss', 'content': 0.19467753171920776, 'timestamp': '2025-10-02 00:36:00.239220', 'step': 14116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:00.292414', 'step': 14116, 'epoch': 2}
{'type': 'loss', 'content': 0.06851892173290253, 'timestamp': '2025-10-02 00:36:00.294973', 'step': 14117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:00.348713', 'step': 14117, 'epoch': 2}
{'type': 'loss', 'content': 0.07466359436511993, 'timestamp': '2025-10-02 00:36:00.351163', 'step': 14118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:00.404673', 'step': 14118, 'epoch': 2}
{'type': 'loss', 'content': 0.17264454066753387, 'timestamp': '2025-10-02 00:36:00.407094', 'step': 14119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:00.460918', 'step': 14119, 'epoch': 2}
{'type': 'loss', 'content': 0.08874041587114334, 'timestamp': '2025-10-02 00:36:00.467079', 'step': 14120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:00.520689', 'step': 14120, 'epoch': 2}
{'type': 'loss', 'content': 0.01320219598710537, 'timestamp': '2025-10-02 00:36:00.528572', 'step': 14121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:00.584676', 'step': 14121, 'epoch': 2}
{'type': 'loss', 'content': 0.11499092727899551, 'timestamp': '2025-10-02 00:36:00.586780', 'step': 14122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:00.641222', 'step': 14122, 'epoch': 2}
{'type': 'loss', 'content': 0.04466164484620094, 'timestamp': '2025-10-02 00:36:00.643934', 'step': 14123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:00.698167', 'step': 14123, 'epoch': 2}
{'type': 'loss', 'content': 0.06309791654348373, 'timestamp': '2025-10-02 00:36:00.703866', 'step': 14124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:00.763360', 'step': 14124, 'epoch': 2}
{'type': 'loss', 'content': 0.043405067175626755, 'timestamp': '2025-10-02 00:36:00.774672', 'step': 14125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:00.828097', 'step': 14125, 'epoch': 2}
{'type': 'loss', 'content': 0.08210835605859756, 'timestamp': '2025-10-02 00:36:00.830349', 'step': 14126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:00.890603', 'step': 14126, 'epoch': 2}
{'type': 'loss', 'content': 0.02935992181301117, 'timestamp': '2025-10-02 00:36:00.900762', 'step': 14127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:00.955714', 'step': 14127, 'epoch': 2}
{'type': 'loss', 'content': 0.06935780495405197, 'timestamp': '2025-10-02 00:36:00.964228', 'step': 14128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:01.017608', 'step': 14128, 'epoch': 2}
{'type': 'loss', 'content': 0.056757841259241104, 'timestamp': '2025-10-02 00:36:01.025452', 'step': 14129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:01.079647', 'step': 14129, 'epoch': 2}
{'type': 'loss', 'content': 0.013128705322742462, 'timestamp': '2025-10-02 00:36:01.087552', 'step': 14130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:01.141741', 'step': 14130, 'epoch': 2}
{'type': 'loss', 'content': 0.10287099331617355, 'timestamp': '2025-10-02 00:36:01.144285', 'step': 14131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:01.203573', 'step': 14131, 'epoch': 2}
{'type': 'loss', 'content': 0.026967540383338928, 'timestamp': '2025-10-02 00:36:01.214539', 'step': 14132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:01.269651', 'step': 14132, 'epoch': 2}
{'type': 'loss', 'content': 0.05201820656657219, 'timestamp': '2025-10-02 00:36:01.279973', 'step': 14133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:01.335801', 'step': 14133, 'epoch': 2}
{'type': 'loss', 'content': 0.019043663516640663, 'timestamp': '2025-10-02 00:36:01.345213', 'step': 14134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:01.399944', 'step': 14134, 'epoch': 2}
{'type': 'loss', 'content': 0.06546755880117416, 'timestamp': '2025-10-02 00:36:01.406260', 'step': 14135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:01.460352', 'step': 14135, 'epoch': 2}
{'type': 'loss', 'content': 0.0474032424390316, 'timestamp': '2025-10-02 00:36:01.467477', 'step': 14136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:01.521064', 'step': 14136, 'epoch': 2}
{'type': 'loss', 'content': 0.02574053220450878, 'timestamp': '2025-10-02 00:36:01.527754', 'step': 14137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:01.596461', 'step': 14137, 'epoch': 2}
{'type': 'loss', 'content': 0.04898156225681305, 'timestamp': '2025-10-02 00:36:01.606029', 'step': 14138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:01.664241', 'step': 14138, 'epoch': 2}
{'type': 'loss', 'content': 0.022496182471513748, 'timestamp': '2025-10-02 00:36:01.671489', 'step': 14139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:01.725860', 'step': 14139, 'epoch': 2}
{'type': 'loss', 'content': 0.05361228063702583, 'timestamp': '2025-10-02 00:36:01.731666', 'step': 14140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:36:01.799782', 'step': 14140, 'epoch': 2}
{'type': 'loss', 'content': 0.04140182584524155, 'timestamp': '2025-10-02 00:36:01.813289', 'step': 14141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:36:01.882931', 'step': 14141, 'epoch': 2}
{'type': 'loss', 'content': 0.01308166328817606, 'timestamp': '2025-10-02 00:36:01.893597', 'step': 14142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:01.949924', 'step': 14142, 'epoch': 2}
{'type': 'loss', 'content': 0.04042263329029083, 'timestamp': '2025-10-02 00:36:01.953037', 'step': 14143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:36:02.014899', 'step': 14143, 'epoch': 2}
{'type': 'loss', 'content': 0.027726251631975174, 'timestamp': '2025-10-02 00:36:02.026362', 'step': 14144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:02.081918', 'step': 14144, 'epoch': 2}
{'type': 'loss', 'content': 0.16921569406986237, 'timestamp': '2025-10-02 00:36:02.084413', 'step': 14145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:02.143884', 'step': 14145, 'epoch': 2}
{'type': 'loss', 'content': 0.2042255997657776, 'timestamp': '2025-10-02 00:36:02.146355', 'step': 14146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:02.204236', 'step': 14146, 'epoch': 2}
{'type': 'loss', 'content': 0.08274702727794647, 'timestamp': '2025-10-02 00:36:02.210448', 'step': 14147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:02.264286', 'step': 14147, 'epoch': 2}
{'type': 'loss', 'content': 0.036369726061820984, 'timestamp': '2025-10-02 00:36:02.270038', 'step': 14148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:02.324978', 'step': 14148, 'epoch': 2}
{'type': 'loss', 'content': 0.10185293108224869, 'timestamp': '2025-10-02 00:36:02.328683', 'step': 14149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:02.389154', 'step': 14149, 'epoch': 2}
{'type': 'loss', 'content': 0.07530851662158966, 'timestamp': '2025-10-02 00:36:02.394384', 'step': 14150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:36:02.461234', 'step': 14150, 'epoch': 2}
{'type': 'loss', 'content': 0.04189351201057434, 'timestamp': '2025-10-02 00:36:02.472076', 'step': 14151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:36:02.540913', 'step': 14151, 'epoch': 2}
{'type': 'loss', 'content': 0.09450951218605042, 'timestamp': '2025-10-02 00:36:02.552331', 'step': 14152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:02.607529', 'step': 14152, 'epoch': 2}
{'type': 'loss', 'content': 0.06609004735946655, 'timestamp': '2025-10-02 00:36:02.609832', 'step': 14153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:02.663988', 'step': 14153, 'epoch': 2}
{'type': 'loss', 'content': 0.03985552489757538, 'timestamp': '2025-10-02 00:36:02.666683', 'step': 14154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:02.721981', 'step': 14154, 'epoch': 2}
{'type': 'loss', 'content': 0.05216965451836586, 'timestamp': '2025-10-02 00:36:02.724440', 'step': 14155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:02.778499', 'step': 14155, 'epoch': 2}
{'type': 'loss', 'content': 0.04543296620249748, 'timestamp': '2025-10-02 00:36:02.784475', 'step': 14156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:36:02.845896', 'step': 14156, 'epoch': 2}
{'type': 'loss', 'content': 0.041421908885240555, 'timestamp': '2025-10-02 00:36:02.857693', 'step': 14157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:02.913184', 'step': 14157, 'epoch': 2}
{'type': 'loss', 'content': 0.10514634847640991, 'timestamp': '2025-10-02 00:36:02.915411', 'step': 14158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:36:02.972147', 'step': 14158, 'epoch': 2}
{'type': 'loss', 'content': 0.06626725941896439, 'timestamp': '2025-10-02 00:36:02.974468', 'step': 14159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:03.029212', 'step': 14159, 'epoch': 2}
{'type': 'loss', 'content': 0.06353759765625, 'timestamp': '2025-10-02 00:36:03.039379', 'step': 14160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:03.092828', 'step': 14160, 'epoch': 2}
{'type': 'loss', 'content': 0.1028546690940857, 'timestamp': '2025-10-02 00:36:03.095353', 'step': 14161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:03.149018', 'step': 14161, 'epoch': 2}
{'type': 'loss', 'content': 0.08288555592298508, 'timestamp': '2025-10-02 00:36:03.151225', 'step': 14162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:03.210715', 'step': 14162, 'epoch': 2}
{'type': 'loss', 'content': 0.018358035013079643, 'timestamp': '2025-10-02 00:36:03.220951', 'step': 14163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:03.275304', 'step': 14163, 'epoch': 2}
{'type': 'loss', 'content': 0.042528219521045685, 'timestamp': '2025-10-02 00:36:03.283998', 'step': 14164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:03.339177', 'step': 14164, 'epoch': 2}
{'type': 'loss', 'content': 0.12692058086395264, 'timestamp': '2025-10-02 00:36:03.341981', 'step': 14165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:03.402921', 'step': 14165, 'epoch': 2}
{'type': 'loss', 'content': 0.03900500014424324, 'timestamp': '2025-10-02 00:36:03.405394', 'step': 14166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:03.460889', 'step': 14166, 'epoch': 2}
{'type': 'loss', 'content': 0.08055749535560608, 'timestamp': '2025-10-02 00:36:03.463320', 'step': 14167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:03.518370', 'step': 14167, 'epoch': 2}
{'type': 'loss', 'content': 0.13845013082027435, 'timestamp': '2025-10-02 00:36:03.524065', 'step': 14168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:03.578701', 'step': 14168, 'epoch': 2}
{'type': 'loss', 'content': 0.01653420552611351, 'timestamp': '2025-10-02 00:36:03.588952', 'step': 14169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:36:03.650770', 'step': 14169, 'epoch': 2}
{'type': 'loss', 'content': 0.05753384158015251, 'timestamp': '2025-10-02 00:36:03.661469', 'step': 14170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:03.716477', 'step': 14170, 'epoch': 2}
{'type': 'loss', 'content': 0.06470662355422974, 'timestamp': '2025-10-02 00:36:03.718725', 'step': 14171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:03.773982', 'step': 14171, 'epoch': 2}
{'type': 'loss', 'content': 0.11034755408763885, 'timestamp': '2025-10-02 00:36:03.779597', 'step': 14172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:03.832624', 'step': 14172, 'epoch': 2}
{'type': 'loss', 'content': 0.03328385949134827, 'timestamp': '2025-10-02 00:36:03.835139', 'step': 14173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:03.888830', 'step': 14173, 'epoch': 2}
{'type': 'loss', 'content': 0.043215006589889526, 'timestamp': '2025-10-02 00:36:03.896921', 'step': 14174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:36:03.959436', 'step': 14174, 'epoch': 2}
{'type': 'loss', 'content': 0.028530439361929893, 'timestamp': '2025-10-02 00:36:03.970303', 'step': 14175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:04.024826', 'step': 14175, 'epoch': 2}
{'type': 'loss', 'content': 0.12968812882900238, 'timestamp': '2025-10-02 00:36:04.030545', 'step': 14176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:04.083913', 'step': 14176, 'epoch': 2}
{'type': 'loss', 'content': 0.07265807688236237, 'timestamp': '2025-10-02 00:36:04.091826', 'step': 14177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:04.145567', 'step': 14177, 'epoch': 2}
{'type': 'loss', 'content': 0.016465449705719948, 'timestamp': '2025-10-02 00:36:04.148391', 'step': 14178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:36:04.223112', 'step': 14178, 'epoch': 2}
{'type': 'loss', 'content': 0.03035746142268181, 'timestamp': '2025-10-02 00:36:04.236345', 'step': 14179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:04.290914', 'step': 14179, 'epoch': 2}
{'type': 'loss', 'content': 0.01397707499563694, 'timestamp': '2025-10-02 00:36:04.296516', 'step': 14180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:36:04.358102', 'step': 14180, 'epoch': 2}
{'type': 'loss', 'content': 0.009750093333423138, 'timestamp': '2025-10-02 00:36:04.369847', 'step': 14181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:04.424287', 'step': 14181, 'epoch': 2}
{'type': 'loss', 'content': 0.06869925558567047, 'timestamp': '2025-10-02 00:36:04.433664', 'step': 14182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:04.487760', 'step': 14182, 'epoch': 2}
{'type': 'loss', 'content': 0.06643769145011902, 'timestamp': '2025-10-02 00:36:04.494129', 'step': 14183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:04.547691', 'step': 14183, 'epoch': 2}
{'type': 'loss', 'content': 0.06315527111291885, 'timestamp': '2025-10-02 00:36:04.553362', 'step': 14184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:04.606576', 'step': 14184, 'epoch': 2}
{'type': 'loss', 'content': 0.02429197169840336, 'timestamp': '2025-10-02 00:36:04.612904', 'step': 14185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:04.666832', 'step': 14185, 'epoch': 2}
{'type': 'loss', 'content': 0.05211813002824783, 'timestamp': '2025-10-02 00:36:04.673163', 'step': 14186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:04.727666', 'step': 14186, 'epoch': 2}
{'type': 'loss', 'content': 0.028602514415979385, 'timestamp': '2025-10-02 00:36:04.729958', 'step': 14187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:04.791640', 'step': 14187, 'epoch': 2}
{'type': 'loss', 'content': 0.05845815688371658, 'timestamp': '2025-10-02 00:36:04.802945', 'step': 14188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:04.855937', 'step': 14188, 'epoch': 2}
{'type': 'loss', 'content': 0.11642280966043472, 'timestamp': '2025-10-02 00:36:04.858372', 'step': 14189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:04.911966', 'step': 14189, 'epoch': 2}
{'type': 'loss', 'content': 0.21535509824752808, 'timestamp': '2025-10-02 00:36:04.914092', 'step': 14190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:04.967855', 'step': 14190, 'epoch': 2}
{'type': 'loss', 'content': 0.049131788313388824, 'timestamp': '2025-10-02 00:36:04.970224', 'step': 14191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:36:05.040171', 'step': 14191, 'epoch': 2}
{'type': 'loss', 'content': 0.00940343551337719, 'timestamp': '2025-10-02 00:36:05.053300', 'step': 14192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:05.107770', 'step': 14192, 'epoch': 2}
{'type': 'loss', 'content': 0.018148906528949738, 'timestamp': '2025-10-02 00:36:05.117618', 'step': 14193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:05.172903', 'step': 14193, 'epoch': 2}
{'type': 'loss', 'content': 0.08794081956148148, 'timestamp': '2025-10-02 00:36:05.175541', 'step': 14194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:05.229901', 'step': 14194, 'epoch': 2}
{'type': 'loss', 'content': 0.07122376561164856, 'timestamp': '2025-10-02 00:36:05.232173', 'step': 14195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:05.286862', 'step': 14195, 'epoch': 2}
{'type': 'loss', 'content': 0.05445846915245056, 'timestamp': '2025-10-02 00:36:05.292705', 'step': 14196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:05.346243', 'step': 14196, 'epoch': 2}
{'type': 'loss', 'content': 0.12530700862407684, 'timestamp': '2025-10-02 00:36:05.348533', 'step': 14197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:05.408006', 'step': 14197, 'epoch': 2}
{'type': 'loss', 'content': 0.02563784271478653, 'timestamp': '2025-10-02 00:36:05.418202', 'step': 14198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:05.472267', 'step': 14198, 'epoch': 2}
{'type': 'loss', 'content': 0.08413688838481903, 'timestamp': '2025-10-02 00:36:05.474835', 'step': 14199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:05.536371', 'step': 14199, 'epoch': 2}
{'type': 'loss', 'content': 0.04081672802567482, 'timestamp': '2025-10-02 00:36:05.547605', 'step': 14200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:05.602214', 'step': 14200, 'epoch': 2}
{'type': 'loss', 'content': 0.019897256046533585, 'timestamp': '2025-10-02 00:36:05.609884', 'step': 14201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:05.664099', 'step': 14201, 'epoch': 2}
{'type': 'loss', 'content': 0.10037665069103241, 'timestamp': '2025-10-02 00:36:05.667390', 'step': 14202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:05.723241', 'step': 14202, 'epoch': 2}
{'type': 'loss', 'content': 0.05008215084671974, 'timestamp': '2025-10-02 00:36:05.726482', 'step': 14203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:05.780415', 'step': 14203, 'epoch': 2}
{'type': 'loss', 'content': 0.07593318819999695, 'timestamp': '2025-10-02 00:36:05.786429', 'step': 14204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:05.841296', 'step': 14204, 'epoch': 2}
{'type': 'loss', 'content': 0.03582851216197014, 'timestamp': '2025-10-02 00:36:05.851597', 'step': 14205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:05.906638', 'step': 14205, 'epoch': 2}
{'type': 'loss', 'content': 0.03049764223396778, 'timestamp': '2025-10-02 00:36:05.915978', 'step': 14206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:05.974947', 'step': 14206, 'epoch': 2}
{'type': 'loss', 'content': 0.044957563281059265, 'timestamp': '2025-10-02 00:36:05.977696', 'step': 14207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:06.034352', 'step': 14207, 'epoch': 2}
{'type': 'loss', 'content': 0.019519373774528503, 'timestamp': '2025-10-02 00:36:06.040237', 'step': 14208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:06.094548', 'step': 14208, 'epoch': 2}
{'type': 'loss', 'content': 0.08163698017597198, 'timestamp': '2025-10-02 00:36:06.097920', 'step': 14209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:06.153789', 'step': 14209, 'epoch': 2}
{'type': 'loss', 'content': 0.11290399730205536, 'timestamp': '2025-10-02 00:36:06.156018', 'step': 14210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:06.212260', 'step': 14210, 'epoch': 2}
{'type': 'loss', 'content': 0.024551520124077797, 'timestamp': '2025-10-02 00:36:06.221796', 'step': 14211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:36:06.290057', 'step': 14211, 'epoch': 2}
{'type': 'loss', 'content': 0.09534786641597748, 'timestamp': '2025-10-02 00:36:06.302799', 'step': 14212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:06.357511', 'step': 14212, 'epoch': 2}
{'type': 'loss', 'content': 0.052395984530448914, 'timestamp': '2025-10-02 00:36:06.359934', 'step': 14213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:36:06.430904', 'step': 14213, 'epoch': 2}
{'type': 'loss', 'content': 0.03138910233974457, 'timestamp': '2025-10-02 00:36:06.443574', 'step': 14214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:06.506001', 'step': 14214, 'epoch': 2}
{'type': 'loss', 'content': 0.023405274376273155, 'timestamp': '2025-10-02 00:36:06.516505', 'step': 14215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:06.571187', 'step': 14215, 'epoch': 2}
{'type': 'loss', 'content': 0.03439251706004143, 'timestamp': '2025-10-02 00:36:06.577535', 'step': 14216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:06.630620', 'step': 14216, 'epoch': 2}
{'type': 'loss', 'content': 0.06545472890138626, 'timestamp': '2025-10-02 00:36:06.633126', 'step': 14217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:06.688081', 'step': 14217, 'epoch': 2}
{'type': 'loss', 'content': 0.07039177417755127, 'timestamp': '2025-10-02 00:36:06.697442', 'step': 14218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:06.751958', 'step': 14218, 'epoch': 2}
{'type': 'loss', 'content': 0.09733014553785324, 'timestamp': '2025-10-02 00:36:06.758027', 'step': 14219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:06.811903', 'step': 14219, 'epoch': 2}
{'type': 'loss', 'content': 0.03377082571387291, 'timestamp': '2025-10-02 00:36:06.818152', 'step': 14220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:36:06.884585', 'step': 14220, 'epoch': 2}
{'type': 'loss', 'content': 0.026512954384088516, 'timestamp': '2025-10-02 00:36:06.897571', 'step': 14221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:06.960302', 'step': 14221, 'epoch': 2}
{'type': 'loss', 'content': 0.024414455518126488, 'timestamp': '2025-10-02 00:36:06.970820', 'step': 14222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:07.026007', 'step': 14222, 'epoch': 2}
{'type': 'loss', 'content': 0.05543852970004082, 'timestamp': '2025-10-02 00:36:07.033663', 'step': 14223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:07.087654', 'step': 14223, 'epoch': 2}
{'type': 'loss', 'content': 0.09685178846120834, 'timestamp': '2025-10-02 00:36:07.093296', 'step': 14224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:07.146992', 'step': 14224, 'epoch': 2}
{'type': 'loss', 'content': 0.016186298802495003, 'timestamp': '2025-10-02 00:36:07.153085', 'step': 14225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:07.208153', 'step': 14225, 'epoch': 2}
{'type': 'loss', 'content': 0.0874985009431839, 'timestamp': '2025-10-02 00:36:07.210429', 'step': 14226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:36:07.264675', 'step': 14226, 'epoch': 2}
{'type': 'loss', 'content': 0.1169000044465065, 'timestamp': '2025-10-02 00:36:07.267395', 'step': 14227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:07.322128', 'step': 14227, 'epoch': 2}
{'type': 'loss', 'content': 0.02876744419336319, 'timestamp': '2025-10-02 00:36:07.329128', 'step': 14228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:07.382875', 'step': 14228, 'epoch': 2}
{'type': 'loss', 'content': 0.12939055263996124, 'timestamp': '2025-10-02 00:36:07.390430', 'step': 14229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:07.444224', 'step': 14229, 'epoch': 2}
{'type': 'loss', 'content': 0.05252733826637268, 'timestamp': '2025-10-02 00:36:07.450532', 'step': 14230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:07.507756', 'step': 14230, 'epoch': 2}
{'type': 'loss', 'content': 0.036006469279527664, 'timestamp': '2025-10-02 00:36:07.513773', 'step': 14231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:07.576767', 'step': 14231, 'epoch': 2}
{'type': 'loss', 'content': 0.030797122046351433, 'timestamp': '2025-10-02 00:36:07.583514', 'step': 14232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:07.649382', 'step': 14232, 'epoch': 2}
{'type': 'loss', 'content': 0.04039950668811798, 'timestamp': '2025-10-02 00:36:07.651829', 'step': 14233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:07.705956', 'step': 14233, 'epoch': 2}
{'type': 'loss', 'content': 0.05301457270979881, 'timestamp': '2025-10-02 00:36:07.708048', 'step': 14234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:07.762394', 'step': 14234, 'epoch': 2}
{'type': 'loss', 'content': 0.060956865549087524, 'timestamp': '2025-10-02 00:36:07.765281', 'step': 14235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:07.821924', 'step': 14235, 'epoch': 2}
{'type': 'loss', 'content': 0.14873047173023224, 'timestamp': '2025-10-02 00:36:07.830131', 'step': 14236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:07.884754', 'step': 14236, 'epoch': 2}
{'type': 'loss', 'content': 0.08131575584411621, 'timestamp': '2025-10-02 00:36:07.890972', 'step': 14237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:07.945846', 'step': 14237, 'epoch': 2}
{'type': 'loss', 'content': 0.19972515106201172, 'timestamp': '2025-10-02 00:36:07.947955', 'step': 14238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:08.005070', 'step': 14238, 'epoch': 2}
{'type': 'loss', 'content': 0.08746762573719025, 'timestamp': '2025-10-02 00:36:08.008334', 'step': 14239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:08.071871', 'step': 14239, 'epoch': 2}
{'type': 'loss', 'content': 0.02566440775990486, 'timestamp': '2025-10-02 00:36:08.083103', 'step': 14240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:36:08.138095', 'step': 14240, 'epoch': 2}
{'type': 'loss', 'content': 0.08037251979112625, 'timestamp': '2025-10-02 00:36:08.141154', 'step': 14241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:08.200109', 'step': 14241, 'epoch': 2}
{'type': 'loss', 'content': 0.034341078251600266, 'timestamp': '2025-10-02 00:36:08.210305', 'step': 14242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:08.266500', 'step': 14242, 'epoch': 2}
{'type': 'loss', 'content': 0.07656794041395187, 'timestamp': '2025-10-02 00:36:08.276042', 'step': 14243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:08.332002', 'step': 14243, 'epoch': 2}
{'type': 'loss', 'content': 0.06776827573776245, 'timestamp': '2025-10-02 00:36:08.339528', 'step': 14244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:08.397262', 'step': 14244, 'epoch': 2}
{'type': 'loss', 'content': 0.12577609717845917, 'timestamp': '2025-10-02 00:36:08.400793', 'step': 14245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:08.459352', 'step': 14245, 'epoch': 2}
{'type': 'loss', 'content': 0.19787687063217163, 'timestamp': '2025-10-02 00:36:08.462525', 'step': 14246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:08.520744', 'step': 14246, 'epoch': 2}
{'type': 'loss', 'content': 0.06156489998102188, 'timestamp': '2025-10-02 00:36:08.528321', 'step': 14247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:08.586369', 'step': 14247, 'epoch': 2}
{'type': 'loss', 'content': 0.023876355960965157, 'timestamp': '2025-10-02 00:36:08.596808', 'step': 14248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:36:08.671174', 'step': 14248, 'epoch': 2}
{'type': 'loss', 'content': 0.010861548595130444, 'timestamp': '2025-10-02 00:36:08.685589', 'step': 14249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:08.747239', 'step': 14249, 'epoch': 2}
{'type': 'loss', 'content': 0.06562869250774384, 'timestamp': '2025-10-02 00:36:08.755840', 'step': 14250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:08.823353', 'step': 14250, 'epoch': 2}
{'type': 'loss', 'content': 0.07713811844587326, 'timestamp': '2025-10-02 00:36:08.830554', 'step': 14251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:08.907327', 'step': 14251, 'epoch': 2}
{'type': 'loss', 'content': 0.02796025760471821, 'timestamp': '2025-10-02 00:36:08.917686', 'step': 14252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:08.973483', 'step': 14252, 'epoch': 2}
{'type': 'loss', 'content': 0.08409988880157471, 'timestamp': '2025-10-02 00:36:08.976860', 'step': 14253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:09.034063', 'step': 14253, 'epoch': 2}
{'type': 'loss', 'content': 0.1429140716791153, 'timestamp': '2025-10-02 00:36:09.037587', 'step': 14254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:09.095290', 'step': 14254, 'epoch': 2}
{'type': 'loss', 'content': 0.06449355185031891, 'timestamp': '2025-10-02 00:36:09.104842', 'step': 14255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:36:09.169293', 'step': 14255, 'epoch': 2}
{'type': 'loss', 'content': 0.022618185728788376, 'timestamp': '2025-10-02 00:36:09.180902', 'step': 14256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:09.235996', 'step': 14256, 'epoch': 2}
{'type': 'loss', 'content': 0.06360404938459396, 'timestamp': '2025-10-02 00:36:09.239060', 'step': 14257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:09.296035', 'step': 14257, 'epoch': 2}
{'type': 'loss', 'content': 0.02804115042090416, 'timestamp': '2025-10-02 00:36:09.298141', 'step': 14258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:09.353687', 'step': 14258, 'epoch': 2}
{'type': 'loss', 'content': 0.12629790604114532, 'timestamp': '2025-10-02 00:36:09.357074', 'step': 14259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:09.420016', 'step': 14259, 'epoch': 2}
{'type': 'loss', 'content': 0.01639753393828869, 'timestamp': '2025-10-02 00:36:09.430997', 'step': 14260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:09.486472', 'step': 14260, 'epoch': 2}
{'type': 'loss', 'content': 0.03230062872171402, 'timestamp': '2025-10-02 00:36:09.490363', 'step': 14261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:09.545048', 'step': 14261, 'epoch': 2}
{'type': 'loss', 'content': 0.05144093558192253, 'timestamp': '2025-10-02 00:36:09.547990', 'step': 14262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:09.604622', 'step': 14262, 'epoch': 2}
{'type': 'loss', 'content': 0.08356095850467682, 'timestamp': '2025-10-02 00:36:09.607538', 'step': 14263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:09.663355', 'step': 14263, 'epoch': 2}
{'type': 'loss', 'content': 0.045605212450027466, 'timestamp': '2025-10-02 00:36:09.670500', 'step': 14264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:09.725484', 'step': 14264, 'epoch': 2}
{'type': 'loss', 'content': 0.08572901785373688, 'timestamp': '2025-10-02 00:36:09.728711', 'step': 14265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:09.784345', 'step': 14265, 'epoch': 2}
{'type': 'loss', 'content': 0.053555916994810104, 'timestamp': '2025-10-02 00:36:09.790286', 'step': 14266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:09.847107', 'step': 14266, 'epoch': 2}
{'type': 'loss', 'content': 0.1166168600320816, 'timestamp': '2025-10-02 00:36:09.850124', 'step': 14267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:09.908828', 'step': 14267, 'epoch': 2}
{'type': 'loss', 'content': 0.03088187798857689, 'timestamp': '2025-10-02 00:36:09.918976', 'step': 14268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:09.973511', 'step': 14268, 'epoch': 2}
{'type': 'loss', 'content': 0.09444406628608704, 'timestamp': '2025-10-02 00:36:09.975571', 'step': 14269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:10.030162', 'step': 14269, 'epoch': 2}
{'type': 'loss', 'content': 0.07409702986478806, 'timestamp': '2025-10-02 00:36:10.039505', 'step': 14270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:10.093979', 'step': 14270, 'epoch': 2}
{'type': 'loss', 'content': 0.032710108906030655, 'timestamp': '2025-10-02 00:36:10.096476', 'step': 14271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:10.150607', 'step': 14271, 'epoch': 2}
{'type': 'loss', 'content': 0.11957418918609619, 'timestamp': '2025-10-02 00:36:10.156829', 'step': 14272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:10.211226', 'step': 14272, 'epoch': 2}
{'type': 'loss', 'content': 0.04657392576336861, 'timestamp': '2025-10-02 00:36:10.213586', 'step': 14273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:10.269846', 'step': 14273, 'epoch': 2}
{'type': 'loss', 'content': 0.018853073939681053, 'timestamp': '2025-10-02 00:36:10.275856', 'step': 14274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:10.330154', 'step': 14274, 'epoch': 2}
{'type': 'loss', 'content': 0.047159355133771896, 'timestamp': '2025-10-02 00:36:10.336035', 'step': 14275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:10.389766', 'step': 14275, 'epoch': 2}
{'type': 'loss', 'content': 0.06761818379163742, 'timestamp': '2025-10-02 00:36:10.398238', 'step': 14276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:10.451680', 'step': 14276, 'epoch': 2}
{'type': 'loss', 'content': 0.022045554593205452, 'timestamp': '2025-10-02 00:36:10.457129', 'step': 14277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:36:10.522088', 'step': 14277, 'epoch': 2}
{'type': 'loss', 'content': 0.06334458291530609, 'timestamp': '2025-10-02 00:36:10.532779', 'step': 14278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:10.586595', 'step': 14278, 'epoch': 2}
{'type': 'loss', 'content': 0.15344840288162231, 'timestamp': '2025-10-02 00:36:10.588777', 'step': 14279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:10.642633', 'step': 14279, 'epoch': 2}
{'type': 'loss', 'content': 0.08368802815675735, 'timestamp': '2025-10-02 00:36:10.648664', 'step': 14280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:10.701986', 'step': 14280, 'epoch': 2}
{'type': 'loss', 'content': 0.02471596747636795, 'timestamp': '2025-10-02 00:36:10.704650', 'step': 14281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:10.759648', 'step': 14281, 'epoch': 2}
{'type': 'loss', 'content': 0.024665091186761856, 'timestamp': '2025-10-02 00:36:10.769023', 'step': 14282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:10.823414', 'step': 14282, 'epoch': 2}
{'type': 'loss', 'content': 0.07114856690168381, 'timestamp': '2025-10-02 00:36:10.832781', 'step': 14283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:10.893617', 'step': 14283, 'epoch': 2}
{'type': 'loss', 'content': 0.2478068321943283, 'timestamp': '2025-10-02 00:36:10.899437', 'step': 14284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:10.952755', 'step': 14284, 'epoch': 2}
{'type': 'loss', 'content': 0.011451886966824532, 'timestamp': '2025-10-02 00:36:10.960632', 'step': 14285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:11.014642', 'step': 14285, 'epoch': 2}
{'type': 'loss', 'content': 0.031295571476221085, 'timestamp': '2025-10-02 00:36:11.017132', 'step': 14286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:11.070989', 'step': 14286, 'epoch': 2}
{'type': 'loss', 'content': 0.08173403143882751, 'timestamp': '2025-10-02 00:36:11.077152', 'step': 14287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:11.133142', 'step': 14287, 'epoch': 2}
{'type': 'loss', 'content': 0.07023545354604721, 'timestamp': '2025-10-02 00:36:11.138963', 'step': 14288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:11.193229', 'step': 14288, 'epoch': 2}
{'type': 'loss', 'content': 0.0272578876465559, 'timestamp': '2025-10-02 00:36:11.203291', 'step': 14289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:11.257393', 'step': 14289, 'epoch': 2}
{'type': 'loss', 'content': 0.0723215714097023, 'timestamp': '2025-10-02 00:36:11.260413', 'step': 14290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:11.313898', 'step': 14290, 'epoch': 2}
{'type': 'loss', 'content': 0.05142617225646973, 'timestamp': '2025-10-02 00:36:11.320060', 'step': 14291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:11.373841', 'step': 14291, 'epoch': 2}
{'type': 'loss', 'content': 0.04654353857040405, 'timestamp': '2025-10-02 00:36:11.379819', 'step': 14292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:11.433643', 'step': 14292, 'epoch': 2}
{'type': 'loss', 'content': 0.11946697533130646, 'timestamp': '2025-10-02 00:36:11.438658', 'step': 14293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:11.493310', 'step': 14293, 'epoch': 2}
{'type': 'loss', 'content': 0.1087435856461525, 'timestamp': '2025-10-02 00:36:11.495989', 'step': 14294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:11.550840', 'step': 14294, 'epoch': 2}
{'type': 'loss', 'content': 0.07236099243164062, 'timestamp': '2025-10-02 00:36:11.553063', 'step': 14295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:11.606811', 'step': 14295, 'epoch': 2}
{'type': 'loss', 'content': 0.04422914236783981, 'timestamp': '2025-10-02 00:36:11.613758', 'step': 14296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:36:11.667320', 'step': 14296, 'epoch': 2}
{'type': 'loss', 'content': 0.05724763125181198, 'timestamp': '2025-10-02 00:36:11.670348', 'step': 14297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:11.724618', 'step': 14297, 'epoch': 2}
{'type': 'loss', 'content': 0.07901784032583237, 'timestamp': '2025-10-02 00:36:11.727177', 'step': 14298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:11.782298', 'step': 14298, 'epoch': 2}
{'type': 'loss', 'content': 0.060481809079647064, 'timestamp': '2025-10-02 00:36:11.788343', 'step': 14299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:11.842428', 'step': 14299, 'epoch': 2}
{'type': 'loss', 'content': 0.09206557273864746, 'timestamp': '2025-10-02 00:36:11.852613', 'step': 14300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:11.906611', 'step': 14300, 'epoch': 2}
{'type': 'loss', 'content': 0.1714518666267395, 'timestamp': '2025-10-02 00:36:11.909237', 'step': 14301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:11.963151', 'step': 14301, 'epoch': 2}
{'type': 'loss', 'content': 0.0746321976184845, 'timestamp': '2025-10-02 00:36:11.966124', 'step': 14302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:12.019604', 'step': 14302, 'epoch': 2}
{'type': 'loss', 'content': 0.09477651119232178, 'timestamp': '2025-10-02 00:36:12.022319', 'step': 14303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:12.081315', 'step': 14303, 'epoch': 2}
{'type': 'loss', 'content': 0.056761037558317184, 'timestamp': '2025-10-02 00:36:12.092281', 'step': 14304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:12.144669', 'step': 14304, 'epoch': 2}
{'type': 'loss', 'content': 0.2113838493824005, 'timestamp': '2025-10-02 00:36:12.147114', 'step': 14305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:12.200650', 'step': 14305, 'epoch': 2}
{'type': 'loss', 'content': 0.042823418974876404, 'timestamp': '2025-10-02 00:36:12.203055', 'step': 14306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:36:12.271460', 'step': 14306, 'epoch': 2}
{'type': 'loss', 'content': 0.05460764467716217, 'timestamp': '2025-10-02 00:36:12.283796', 'step': 14307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:12.338353', 'step': 14307, 'epoch': 2}
{'type': 'loss', 'content': 0.047315675765275955, 'timestamp': '2025-10-02 00:36:12.344642', 'step': 14308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:12.398402', 'step': 14308, 'epoch': 2}
{'type': 'loss', 'content': 0.11528220772743225, 'timestamp': '2025-10-02 00:36:12.401460', 'step': 14309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:12.454724', 'step': 14309, 'epoch': 2}
{'type': 'loss', 'content': 0.08929891139268875, 'timestamp': '2025-10-02 00:36:12.457474', 'step': 14310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:12.512418', 'step': 14310, 'epoch': 2}
{'type': 'loss', 'content': 0.11860128492116928, 'timestamp': '2025-10-02 00:36:12.514708', 'step': 14311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:12.568357', 'step': 14311, 'epoch': 2}
{'type': 'loss', 'content': 0.05815109238028526, 'timestamp': '2025-10-02 00:36:12.574657', 'step': 14312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:12.630076', 'step': 14312, 'epoch': 2}
{'type': 'loss', 'content': 0.02726593427360058, 'timestamp': '2025-10-02 00:36:12.636270', 'step': 14313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:12.689190', 'step': 14313, 'epoch': 2}
{'type': 'loss', 'content': 0.1250932216644287, 'timestamp': '2025-10-02 00:36:12.691619', 'step': 14314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:12.745140', 'step': 14314, 'epoch': 2}
{'type': 'loss', 'content': 0.06556224077939987, 'timestamp': '2025-10-02 00:36:12.747677', 'step': 14315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:12.802132', 'step': 14315, 'epoch': 2}
{'type': 'loss', 'content': 0.12457489222288132, 'timestamp': '2025-10-02 00:36:12.808181', 'step': 14316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:12.861493', 'step': 14316, 'epoch': 2}
{'type': 'loss', 'content': 0.12811590731143951, 'timestamp': '2025-10-02 00:36:12.863885', 'step': 14317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:12.918223', 'step': 14317, 'epoch': 2}
{'type': 'loss', 'content': 0.019006293267011642, 'timestamp': '2025-10-02 00:36:12.925833', 'step': 14318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:12.979964', 'step': 14318, 'epoch': 2}
{'type': 'loss', 'content': 0.025691993534564972, 'timestamp': '2025-10-02 00:36:12.982461', 'step': 14319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:13.036635', 'step': 14319, 'epoch': 2}
{'type': 'loss', 'content': 0.042092300951480865, 'timestamp': '2025-10-02 00:36:13.042991', 'step': 14320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:36:13.103803', 'step': 14320, 'epoch': 2}
{'type': 'loss', 'content': 0.02447996661067009, 'timestamp': '2025-10-02 00:36:13.115273', 'step': 14321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:13.169900', 'step': 14321, 'epoch': 2}
{'type': 'loss', 'content': 0.1512654572725296, 'timestamp': '2025-10-02 00:36:13.172250', 'step': 14322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:13.226273', 'step': 14322, 'epoch': 2}
{'type': 'loss', 'content': 0.015288283117115498, 'timestamp': '2025-10-02 00:36:13.229029', 'step': 14323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:13.282552', 'step': 14323, 'epoch': 2}
{'type': 'loss', 'content': 0.07714690268039703, 'timestamp': '2025-10-02 00:36:13.288551', 'step': 14324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:13.342261', 'step': 14324, 'epoch': 2}
{'type': 'loss', 'content': 0.06823085248470306, 'timestamp': '2025-10-02 00:36:13.344607', 'step': 14325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:13.398800', 'step': 14325, 'epoch': 2}
{'type': 'loss', 'content': 0.03632323443889618, 'timestamp': '2025-10-02 00:36:13.401347', 'step': 14326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:13.455508', 'step': 14326, 'epoch': 2}
{'type': 'loss', 'content': 0.16821251809597015, 'timestamp': '2025-10-02 00:36:13.457805', 'step': 14327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:13.511653', 'step': 14327, 'epoch': 2}
{'type': 'loss', 'content': 0.04582781344652176, 'timestamp': '2025-10-02 00:36:13.517639', 'step': 14328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:13.571045', 'step': 14328, 'epoch': 2}
{'type': 'loss', 'content': 0.035978954285383224, 'timestamp': '2025-10-02 00:36:13.573364', 'step': 14329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:13.626599', 'step': 14329, 'epoch': 2}
{'type': 'loss', 'content': 0.12907862663269043, 'timestamp': '2025-10-02 00:36:13.629141', 'step': 14330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:13.683614', 'step': 14330, 'epoch': 2}
{'type': 'loss', 'content': 0.03147342428565025, 'timestamp': '2025-10-02 00:36:13.692930', 'step': 14331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:13.747850', 'step': 14331, 'epoch': 2}
{'type': 'loss', 'content': 0.11734364181756973, 'timestamp': '2025-10-02 00:36:13.753908', 'step': 14332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:13.807683', 'step': 14332, 'epoch': 2}
{'type': 'loss', 'content': 0.05597679316997528, 'timestamp': '2025-10-02 00:36:13.810188', 'step': 14333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:13.864240', 'step': 14333, 'epoch': 2}
{'type': 'loss', 'content': 0.04141727834939957, 'timestamp': '2025-10-02 00:36:13.873606', 'step': 14334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:13.928286', 'step': 14334, 'epoch': 2}
{'type': 'loss', 'content': 0.1291242390871048, 'timestamp': '2025-10-02 00:36:13.930454', 'step': 14335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:13.984717', 'step': 14335, 'epoch': 2}
{'type': 'loss', 'content': 0.04009098932147026, 'timestamp': '2025-10-02 00:36:13.993020', 'step': 14336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:14.046846', 'step': 14336, 'epoch': 2}
{'type': 'loss', 'content': 0.03381885215640068, 'timestamp': '2025-10-02 00:36:14.049239', 'step': 14337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:14.103615', 'step': 14337, 'epoch': 2}
{'type': 'loss', 'content': 0.175416499376297, 'timestamp': '2025-10-02 00:36:14.105964', 'step': 14338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:14.160030', 'step': 14338, 'epoch': 2}
{'type': 'loss', 'content': 0.10787345468997955, 'timestamp': '2025-10-02 00:36:14.166182', 'step': 14339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:14.221518', 'step': 14339, 'epoch': 2}
{'type': 'loss', 'content': 0.010151670314371586, 'timestamp': '2025-10-02 00:36:14.230166', 'step': 14340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:14.283525', 'step': 14340, 'epoch': 2}
{'type': 'loss', 'content': 0.09946548938751221, 'timestamp': '2025-10-02 00:36:14.286350', 'step': 14341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:14.342899', 'step': 14341, 'epoch': 2}
{'type': 'loss', 'content': 0.03178248926997185, 'timestamp': '2025-10-02 00:36:14.352467', 'step': 14342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:14.408400', 'step': 14342, 'epoch': 2}
{'type': 'loss', 'content': 0.04566119983792305, 'timestamp': '2025-10-02 00:36:14.412024', 'step': 14343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:14.470562', 'step': 14343, 'epoch': 2}
{'type': 'loss', 'content': 0.0889713317155838, 'timestamp': '2025-10-02 00:36:14.481488', 'step': 14344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:14.535664', 'step': 14344, 'epoch': 2}
{'type': 'loss', 'content': 0.04898810759186745, 'timestamp': '2025-10-02 00:36:14.538266', 'step': 14345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:14.591811', 'step': 14345, 'epoch': 2}
{'type': 'loss', 'content': 0.13797220587730408, 'timestamp': '2025-10-02 00:36:14.594062', 'step': 14346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:14.647620', 'step': 14346, 'epoch': 2}
{'type': 'loss', 'content': 0.14414571225643158, 'timestamp': '2025-10-02 00:36:14.650360', 'step': 14347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:14.711110', 'step': 14347, 'epoch': 2}
{'type': 'loss', 'content': 0.013564092107117176, 'timestamp': '2025-10-02 00:36:14.722358', 'step': 14348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:14.776291', 'step': 14348, 'epoch': 2}
{'type': 'loss', 'content': 0.128135085105896, 'timestamp': '2025-10-02 00:36:14.778600', 'step': 14349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:14.832481', 'step': 14349, 'epoch': 2}
{'type': 'loss', 'content': 0.035293832421302795, 'timestamp': '2025-10-02 00:36:14.835187', 'step': 14350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:14.888274', 'step': 14350, 'epoch': 2}
{'type': 'loss', 'content': 0.08594441413879395, 'timestamp': '2025-10-02 00:36:14.890510', 'step': 14351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:14.944602', 'step': 14351, 'epoch': 2}
{'type': 'loss', 'content': 0.08337190747261047, 'timestamp': '2025-10-02 00:36:14.952905', 'step': 14352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:15.007249', 'step': 14352, 'epoch': 2}
{'type': 'loss', 'content': 0.03292300924658775, 'timestamp': '2025-10-02 00:36:15.015261', 'step': 14353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:15.070179', 'step': 14353, 'epoch': 2}
{'type': 'loss', 'content': 0.14133089780807495, 'timestamp': '2025-10-02 00:36:15.072514', 'step': 14354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:15.126314', 'step': 14354, 'epoch': 2}
{'type': 'loss', 'content': 0.028206389397382736, 'timestamp': '2025-10-02 00:36:15.128845', 'step': 14355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:15.182379', 'step': 14355, 'epoch': 2}
{'type': 'loss', 'content': 0.012847909703850746, 'timestamp': '2025-10-02 00:36:15.191155', 'step': 14356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:36:15.244024', 'step': 14356, 'epoch': 2}
{'type': 'loss', 'content': 0.10672817379236221, 'timestamp': '2025-10-02 00:36:15.246412', 'step': 14357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:15.300377', 'step': 14357, 'epoch': 2}
{'type': 'loss', 'content': 0.05439625680446625, 'timestamp': '2025-10-02 00:36:15.303366', 'step': 14358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:15.357293', 'step': 14358, 'epoch': 2}
{'type': 'loss', 'content': 0.07573208957910538, 'timestamp': '2025-10-02 00:36:15.359998', 'step': 14359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:15.414286', 'step': 14359, 'epoch': 2}
{'type': 'loss', 'content': 0.059290945529937744, 'timestamp': '2025-10-02 00:36:15.421612', 'step': 14360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:15.475662', 'step': 14360, 'epoch': 2}
{'type': 'loss', 'content': 0.1715845912694931, 'timestamp': '2025-10-02 00:36:15.478312', 'step': 14361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:15.533155', 'step': 14361, 'epoch': 2}
{'type': 'loss', 'content': 0.011257276870310307, 'timestamp': '2025-10-02 00:36:15.536255', 'step': 14362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:15.591127', 'step': 14362, 'epoch': 2}
{'type': 'loss', 'content': 0.09865184128284454, 'timestamp': '2025-10-02 00:36:15.594240', 'step': 14363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:15.649893', 'step': 14363, 'epoch': 2}
{'type': 'loss', 'content': 0.06646960228681564, 'timestamp': '2025-10-02 00:36:15.655948', 'step': 14364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:15.709187', 'step': 14364, 'epoch': 2}
{'type': 'loss', 'content': 0.15937945246696472, 'timestamp': '2025-10-02 00:36:15.711443', 'step': 14365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:36:15.766096', 'step': 14365, 'epoch': 2}
{'type': 'loss', 'content': 0.12539972364902496, 'timestamp': '2025-10-02 00:36:15.768499', 'step': 14366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:15.822543', 'step': 14366, 'epoch': 2}
{'type': 'loss', 'content': 0.10629239678382874, 'timestamp': '2025-10-02 00:36:15.829746', 'step': 14367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:15.883819', 'step': 14367, 'epoch': 2}
{'type': 'loss', 'content': 0.1393192708492279, 'timestamp': '2025-10-02 00:36:15.890145', 'step': 14368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:15.943802', 'step': 14368, 'epoch': 2}
{'type': 'loss', 'content': 0.05811625346541405, 'timestamp': '2025-10-02 00:36:15.946405', 'step': 14369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:16.001737', 'step': 14369, 'epoch': 2}
{'type': 'loss', 'content': 0.034015171229839325, 'timestamp': '2025-10-02 00:36:16.011078', 'step': 14370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:16.066109', 'step': 14370, 'epoch': 2}
{'type': 'loss', 'content': 0.18219788372516632, 'timestamp': '2025-10-02 00:36:16.068405', 'step': 14371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:16.122319', 'step': 14371, 'epoch': 2}
{'type': 'loss', 'content': 0.04106969013810158, 'timestamp': '2025-10-02 00:36:16.128114', 'step': 14372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:16.185513', 'step': 14372, 'epoch': 2}
{'type': 'loss', 'content': 0.07045690715312958, 'timestamp': '2025-10-02 00:36:16.196426', 'step': 14373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:16.250904', 'step': 14373, 'epoch': 2}
{'type': 'loss', 'content': 0.04284548759460449, 'timestamp': '2025-10-02 00:36:16.253239', 'step': 14374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:16.307597', 'step': 14374, 'epoch': 2}
{'type': 'loss', 'content': 0.017336297780275345, 'timestamp': '2025-10-02 00:36:16.310445', 'step': 14375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:36:16.372317', 'step': 14375, 'epoch': 2}
{'type': 'loss', 'content': 0.02030719444155693, 'timestamp': '2025-10-02 00:36:16.383768', 'step': 14376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:16.436660', 'step': 14376, 'epoch': 2}
{'type': 'loss', 'content': 0.21777567267417908, 'timestamp': '2025-10-02 00:36:16.439343', 'step': 14377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:16.495177', 'step': 14377, 'epoch': 2}
{'type': 'loss', 'content': 0.023480404168367386, 'timestamp': '2025-10-02 00:36:16.504720', 'step': 14378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:16.558605', 'step': 14378, 'epoch': 2}
{'type': 'loss', 'content': 0.06365705281496048, 'timestamp': '2025-10-02 00:36:16.564296', 'step': 14379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:36:16.631255', 'step': 14379, 'epoch': 2}
{'type': 'loss', 'content': 0.03295057639479637, 'timestamp': '2025-10-02 00:36:16.643988', 'step': 14380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:36:16.719104', 'step': 14380, 'epoch': 2}
{'type': 'loss', 'content': 0.006945598404854536, 'timestamp': '2025-10-02 00:36:16.734263', 'step': 14381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:16.793697', 'step': 14381, 'epoch': 2}
{'type': 'loss', 'content': 0.0326850563287735, 'timestamp': '2025-10-02 00:36:16.801257', 'step': 14382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:16.855614', 'step': 14382, 'epoch': 2}
{'type': 'loss', 'content': 0.19433791935443878, 'timestamp': '2025-10-02 00:36:16.857919', 'step': 14383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:16.911772', 'step': 14383, 'epoch': 2}
{'type': 'loss', 'content': 0.07270149141550064, 'timestamp': '2025-10-02 00:36:16.917580', 'step': 14384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:16.972259', 'step': 14384, 'epoch': 2}
{'type': 'loss', 'content': 0.10334836691617966, 'timestamp': '2025-10-02 00:36:16.974380', 'step': 14385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:17.028458', 'step': 14385, 'epoch': 2}
{'type': 'loss', 'content': 0.050020355731248856, 'timestamp': '2025-10-02 00:36:17.034456', 'step': 14386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:17.089656', 'step': 14386, 'epoch': 2}
{'type': 'loss', 'content': 0.03692572936415672, 'timestamp': '2025-10-02 00:36:17.096954', 'step': 14387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:17.151085', 'step': 14387, 'epoch': 2}
{'type': 'loss', 'content': 0.12598051130771637, 'timestamp': '2025-10-02 00:36:17.157354', 'step': 14388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:17.211091', 'step': 14388, 'epoch': 2}
{'type': 'loss', 'content': 0.028067750856280327, 'timestamp': '2025-10-02 00:36:17.220491', 'step': 14389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:17.274339', 'step': 14389, 'epoch': 2}
{'type': 'loss', 'content': 0.0497405044734478, 'timestamp': '2025-10-02 00:36:17.283710', 'step': 14390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:17.338710', 'step': 14390, 'epoch': 2}
{'type': 'loss', 'content': 0.041600339114665985, 'timestamp': '2025-10-02 00:36:17.344554', 'step': 14391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:17.404195', 'step': 14391, 'epoch': 2}
{'type': 'loss', 'content': 0.03230646997690201, 'timestamp': '2025-10-02 00:36:17.415096', 'step': 14392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:17.473092', 'step': 14392, 'epoch': 2}
{'type': 'loss', 'content': 0.050472334027290344, 'timestamp': '2025-10-02 00:36:17.475721', 'step': 14393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:17.532607', 'step': 14393, 'epoch': 2}
{'type': 'loss', 'content': 0.1083393320441246, 'timestamp': '2025-10-02 00:36:17.535434', 'step': 14394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:17.590114', 'step': 14394, 'epoch': 2}
{'type': 'loss', 'content': 0.059267304837703705, 'timestamp': '2025-10-02 00:36:17.593618', 'step': 14395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:17.650175', 'step': 14395, 'epoch': 2}
{'type': 'loss', 'content': 0.04394260048866272, 'timestamp': '2025-10-02 00:36:17.658452', 'step': 14396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:17.720279', 'step': 14396, 'epoch': 2}
{'type': 'loss', 'content': 0.04390121251344681, 'timestamp': '2025-10-02 00:36:17.731596', 'step': 14397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:17.789754', 'step': 14397, 'epoch': 2}
{'type': 'loss', 'content': 0.06977346539497375, 'timestamp': '2025-10-02 00:36:17.792770', 'step': 14398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:36:17.864612', 'step': 14398, 'epoch': 2}
{'type': 'loss', 'content': 0.02725164219737053, 'timestamp': '2025-10-02 00:36:17.877040', 'step': 14399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:17.935147', 'step': 14399, 'epoch': 2}
{'type': 'loss', 'content': 0.07820690423250198, 'timestamp': '2025-10-02 00:36:17.941770', 'step': 14400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:17.999792', 'step': 14400, 'epoch': 2}
{'type': 'loss', 'content': 0.13429342210292816, 'timestamp': '2025-10-02 00:36:18.002851', 'step': 14401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:18.058371', 'step': 14401, 'epoch': 2}
{'type': 'loss', 'content': 0.10722728073596954, 'timestamp': '2025-10-02 00:36:18.062531', 'step': 14402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:18.119229', 'step': 14402, 'epoch': 2}
{'type': 'loss', 'content': 0.0055302404798567295, 'timestamp': '2025-10-02 00:36:18.126545', 'step': 14403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:18.182504', 'step': 14403, 'epoch': 2}
{'type': 'loss', 'content': 0.06425252556800842, 'timestamp': '2025-10-02 00:36:18.189201', 'step': 14404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:36:18.250602', 'step': 14404, 'epoch': 2}
{'type': 'loss', 'content': 0.032757800072431564, 'timestamp': '2025-10-02 00:36:18.262147', 'step': 14405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:18.318417', 'step': 14405, 'epoch': 2}
{'type': 'loss', 'content': 0.02838125079870224, 'timestamp': '2025-10-02 00:36:18.325896', 'step': 14406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:18.384861', 'step': 14406, 'epoch': 2}
{'type': 'loss', 'content': 0.08470391482114792, 'timestamp': '2025-10-02 00:36:18.387998', 'step': 14407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:18.444276', 'step': 14407, 'epoch': 2}
{'type': 'loss', 'content': 0.07031897455453873, 'timestamp': '2025-10-02 00:36:18.454613', 'step': 14408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:18.510707', 'step': 14408, 'epoch': 2}
{'type': 'loss', 'content': 0.01778675802052021, 'timestamp': '2025-10-02 00:36:18.513458', 'step': 14409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:36:18.585123', 'step': 14409, 'epoch': 2}
{'type': 'loss', 'content': 0.04139532148838043, 'timestamp': '2025-10-02 00:36:18.597588', 'step': 14410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:18.652734', 'step': 14410, 'epoch': 2}
{'type': 'loss', 'content': 0.012670218013226986, 'timestamp': '2025-10-02 00:36:18.660086', 'step': 14411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:18.716381', 'step': 14411, 'epoch': 2}
{'type': 'loss', 'content': 0.18662779033184052, 'timestamp': '2025-10-02 00:36:18.722304', 'step': 14412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:18.777265', 'step': 14412, 'epoch': 2}
{'type': 'loss', 'content': 0.06202760711312294, 'timestamp': '2025-10-02 00:36:18.779652', 'step': 14413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:18.834031', 'step': 14413, 'epoch': 2}
{'type': 'loss', 'content': 0.028134599328041077, 'timestamp': '2025-10-02 00:36:18.837381', 'step': 14414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:18.892901', 'step': 14414, 'epoch': 2}
{'type': 'loss', 'content': 0.10718385875225067, 'timestamp': '2025-10-02 00:36:18.895874', 'step': 14415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:18.956957', 'step': 14415, 'epoch': 2}
{'type': 'loss', 'content': 0.03350106254220009, 'timestamp': '2025-10-02 00:36:18.967937', 'step': 14416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:19.025432', 'step': 14416, 'epoch': 2}
{'type': 'loss', 'content': 0.08861034363508224, 'timestamp': '2025-10-02 00:36:19.031038', 'step': 14417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:19.087280', 'step': 14417, 'epoch': 2}
{'type': 'loss', 'content': 0.012607398442924023, 'timestamp': '2025-10-02 00:36:19.089763', 'step': 14418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:19.151242', 'step': 14418, 'epoch': 2}
{'type': 'loss', 'content': 0.06817588955163956, 'timestamp': '2025-10-02 00:36:19.161705', 'step': 14419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:19.217337', 'step': 14419, 'epoch': 2}
{'type': 'loss', 'content': 0.04004821553826332, 'timestamp': '2025-10-02 00:36:19.223124', 'step': 14420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:19.277601', 'step': 14420, 'epoch': 2}
{'type': 'loss', 'content': 0.023951290175318718, 'timestamp': '2025-10-02 00:36:19.285038', 'step': 14421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:19.339731', 'step': 14421, 'epoch': 2}
{'type': 'loss', 'content': 0.04349885880947113, 'timestamp': '2025-10-02 00:36:19.345501', 'step': 14422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:19.400086', 'step': 14422, 'epoch': 2}
{'type': 'loss', 'content': 0.1246204823255539, 'timestamp': '2025-10-02 00:36:19.402556', 'step': 14423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:19.457418', 'step': 14423, 'epoch': 2}
{'type': 'loss', 'content': 0.0979279950261116, 'timestamp': '2025-10-02 00:36:19.463502', 'step': 14424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:19.517489', 'step': 14424, 'epoch': 2}
{'type': 'loss', 'content': 0.015528015792369843, 'timestamp': '2025-10-02 00:36:19.526959', 'step': 14425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:19.582236', 'step': 14425, 'epoch': 2}
{'type': 'loss', 'content': 0.07421159744262695, 'timestamp': '2025-10-02 00:36:19.587524', 'step': 14426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:19.641952', 'step': 14426, 'epoch': 2}
{'type': 'loss', 'content': 0.08654201030731201, 'timestamp': '2025-10-02 00:36:19.644598', 'step': 14427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:19.703439', 'step': 14427, 'epoch': 2}
{'type': 'loss', 'content': 0.12452659755945206, 'timestamp': '2025-10-02 00:36:19.714442', 'step': 14428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:19.767509', 'step': 14428, 'epoch': 2}
{'type': 'loss', 'content': 0.09312085807323456, 'timestamp': '2025-10-02 00:36:19.769814', 'step': 14429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:19.824206', 'step': 14429, 'epoch': 2}
{'type': 'loss', 'content': 0.12744875252246857, 'timestamp': '2025-10-02 00:36:19.826645', 'step': 14430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:19.880860', 'step': 14430, 'epoch': 2}
{'type': 'loss', 'content': 0.15410441160202026, 'timestamp': '2025-10-02 00:36:19.884114', 'step': 14431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:19.939188', 'step': 14431, 'epoch': 2}
{'type': 'loss', 'content': 0.11732875555753708, 'timestamp': '2025-10-02 00:36:19.945306', 'step': 14432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:19.999440', 'step': 14432, 'epoch': 2}
{'type': 'loss', 'content': 0.11466386169195175, 'timestamp': '2025-10-02 00:36:20.001784', 'step': 14433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:20.064044', 'step': 14433, 'epoch': 2}
{'type': 'loss', 'content': 0.027618354186415672, 'timestamp': '2025-10-02 00:36:20.074553', 'step': 14434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:20.129915', 'step': 14434, 'epoch': 2}
{'type': 'loss', 'content': 0.09195364266633987, 'timestamp': '2025-10-02 00:36:20.135699', 'step': 14435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:20.197428', 'step': 14435, 'epoch': 2}
{'type': 'loss', 'content': 0.029222523793578148, 'timestamp': '2025-10-02 00:36:20.208716', 'step': 14436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:20.264044', 'step': 14436, 'epoch': 2}
{'type': 'loss', 'content': 0.08780641108751297, 'timestamp': '2025-10-02 00:36:20.267136', 'step': 14437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:20.321804', 'step': 14437, 'epoch': 2}
{'type': 'loss', 'content': 0.13581965863704681, 'timestamp': '2025-10-02 00:36:20.324481', 'step': 14438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:20.379927', 'step': 14438, 'epoch': 2}
{'type': 'loss', 'content': 0.040356140583753586, 'timestamp': '2025-10-02 00:36:20.387440', 'step': 14439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:20.444046', 'step': 14439, 'epoch': 2}
{'type': 'loss', 'content': 0.021859383210539818, 'timestamp': '2025-10-02 00:36:20.450416', 'step': 14440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:20.504036', 'step': 14440, 'epoch': 2}
{'type': 'loss', 'content': 0.042650312185287476, 'timestamp': '2025-10-02 00:36:20.506586', 'step': 14441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:36:20.569314', 'step': 14441, 'epoch': 2}
{'type': 'loss', 'content': 0.05116662383079529, 'timestamp': '2025-10-02 00:36:20.580198', 'step': 14442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:20.634591', 'step': 14442, 'epoch': 2}
{'type': 'loss', 'content': 0.018317826092243195, 'timestamp': '2025-10-02 00:36:20.640328', 'step': 14443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:20.695094', 'step': 14443, 'epoch': 2}
{'type': 'loss', 'content': 0.0384160652756691, 'timestamp': '2025-10-02 00:36:20.703198', 'step': 14444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:20.763428', 'step': 14444, 'epoch': 2}
{'type': 'loss', 'content': 0.04808453842997551, 'timestamp': '2025-10-02 00:36:20.772754', 'step': 14445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:20.837722', 'step': 14445, 'epoch': 2}
{'type': 'loss', 'content': 0.1350656896829605, 'timestamp': '2025-10-02 00:36:20.855329', 'step': 14446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:20.916928', 'step': 14446, 'epoch': 2}
{'type': 'loss', 'content': 0.04969940334558487, 'timestamp': '2025-10-02 00:36:20.921702', 'step': 14447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:21.004858', 'step': 14447, 'epoch': 2}
{'type': 'loss', 'content': 0.15749284625053406, 'timestamp': '2025-10-02 00:36:21.012734', 'step': 14448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:21.093353', 'step': 14448, 'epoch': 2}
{'type': 'loss', 'content': 0.047048117965459824, 'timestamp': '2025-10-02 00:36:21.111738', 'step': 14449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:21.186817', 'step': 14449, 'epoch': 2}
{'type': 'loss', 'content': 0.10781070590019226, 'timestamp': '2025-10-02 00:36:21.190806', 'step': 14450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:21.294880', 'step': 14450, 'epoch': 2}
{'type': 'loss', 'content': 0.030864275991916656, 'timestamp': '2025-10-02 00:36:21.298836', 'step': 14451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:21.358504', 'step': 14451, 'epoch': 2}
{'type': 'loss', 'content': 0.02863687463104725, 'timestamp': '2025-10-02 00:36:21.366309', 'step': 14452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:21.434864', 'step': 14452, 'epoch': 2}
{'type': 'loss', 'content': 0.13093258440494537, 'timestamp': '2025-10-02 00:36:21.440520', 'step': 14453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:21.512272', 'step': 14453, 'epoch': 2}
{'type': 'loss', 'content': 0.10205800831317902, 'timestamp': '2025-10-02 00:36:21.516199', 'step': 14454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:21.586091', 'step': 14454, 'epoch': 2}
{'type': 'loss', 'content': 0.14803358912467957, 'timestamp': '2025-10-02 00:36:21.589394', 'step': 14455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:21.660600', 'step': 14455, 'epoch': 2}
{'type': 'loss', 'content': 0.03710997849702835, 'timestamp': '2025-10-02 00:36:21.674223', 'step': 14456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:36:21.752733', 'step': 14456, 'epoch': 2}
{'type': 'loss', 'content': 0.019007768481969833, 'timestamp': '2025-10-02 00:36:21.764485', 'step': 14457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:21.830435', 'step': 14457, 'epoch': 2}
{'type': 'loss', 'content': 0.057788800448179245, 'timestamp': '2025-10-02 00:36:21.839986', 'step': 14458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:21.908599', 'step': 14458, 'epoch': 2}
{'type': 'loss', 'content': 0.056781455874443054, 'timestamp': '2025-10-02 00:36:21.914117', 'step': 14459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:21.987486', 'step': 14459, 'epoch': 2}
{'type': 'loss', 'content': 0.032489437609910965, 'timestamp': '2025-10-02 00:36:22.003022', 'step': 14460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:22.069309', 'step': 14460, 'epoch': 2}
{'type': 'loss', 'content': 0.04686940088868141, 'timestamp': '2025-10-02 00:36:22.072903', 'step': 14461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:22.138926', 'step': 14461, 'epoch': 2}
{'type': 'loss', 'content': 0.015159601345658302, 'timestamp': '2025-10-02 00:36:22.148328', 'step': 14462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:36:22.215143', 'step': 14462, 'epoch': 2}
{'type': 'loss', 'content': 0.059257570654153824, 'timestamp': '2025-10-02 00:36:22.219887', 'step': 14463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:22.287154', 'step': 14463, 'epoch': 2}
{'type': 'loss', 'content': 0.02266181819140911, 'timestamp': '2025-10-02 00:36:22.301180', 'step': 14464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:22.374197', 'step': 14464, 'epoch': 2}
{'type': 'loss', 'content': 0.07964915037155151, 'timestamp': '2025-10-02 00:36:22.377858', 'step': 14465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:22.459978', 'step': 14465, 'epoch': 2}
{'type': 'loss', 'content': 0.02723716013133526, 'timestamp': '2025-10-02 00:36:22.469653', 'step': 14466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:22.536136', 'step': 14466, 'epoch': 2}
{'type': 'loss', 'content': 0.08862194418907166, 'timestamp': '2025-10-02 00:36:22.547513', 'step': 14467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:22.611401', 'step': 14467, 'epoch': 2}
{'type': 'loss', 'content': 0.014675035141408443, 'timestamp': '2025-10-02 00:36:22.626371', 'step': 14468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:22.695663', 'step': 14468, 'epoch': 2}
{'type': 'loss', 'content': 0.08850626647472382, 'timestamp': '2025-10-02 00:36:22.708562', 'step': 14469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:22.777803', 'step': 14469, 'epoch': 2}
{'type': 'loss', 'content': 0.10573429614305496, 'timestamp': '2025-10-02 00:36:22.789330', 'step': 14470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:22.852901', 'step': 14470, 'epoch': 2}
{'type': 'loss', 'content': 0.06095552444458008, 'timestamp': '2025-10-02 00:36:22.862124', 'step': 14471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:22.926239', 'step': 14471, 'epoch': 2}
{'type': 'loss', 'content': 0.11351142078638077, 'timestamp': '2025-10-02 00:36:22.940796', 'step': 14472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:22.997177', 'step': 14472, 'epoch': 2}
{'type': 'loss', 'content': 0.11682858318090439, 'timestamp': '2025-10-02 00:36:23.013257', 'step': 14473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:23.076031', 'step': 14473, 'epoch': 2}
{'type': 'loss', 'content': 0.10703487694263458, 'timestamp': '2025-10-02 00:36:23.080224', 'step': 14474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:23.140156', 'step': 14474, 'epoch': 2}
{'type': 'loss', 'content': 0.08040781319141388, 'timestamp': '2025-10-02 00:36:23.149947', 'step': 14475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:36:23.211332', 'step': 14475, 'epoch': 2}
{'type': 'loss', 'content': 0.07493456453084946, 'timestamp': '2025-10-02 00:36:23.217902', 'step': 14476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:23.275160', 'step': 14476, 'epoch': 2}
{'type': 'loss', 'content': 0.02013496123254299, 'timestamp': '2025-10-02 00:36:23.286503', 'step': 14477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:36:23.360025', 'step': 14477, 'epoch': 2}
{'type': 'loss', 'content': 0.009370528161525726, 'timestamp': '2025-10-02 00:36:23.373663', 'step': 14478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:23.432304', 'step': 14478, 'epoch': 2}
{'type': 'loss', 'content': 0.049948759377002716, 'timestamp': '2025-10-02 00:36:23.437761', 'step': 14479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:23.501613', 'step': 14479, 'epoch': 2}
{'type': 'loss', 'content': 0.05553403124213219, 'timestamp': '2025-10-02 00:36:23.518709', 'step': 14480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:23.606533', 'step': 14480, 'epoch': 2}
{'type': 'loss', 'content': 0.02222643420100212, 'timestamp': '2025-10-02 00:36:23.617787', 'step': 14481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:23.682612', 'step': 14481, 'epoch': 2}
{'type': 'loss', 'content': 0.11451929807662964, 'timestamp': '2025-10-02 00:36:23.687628', 'step': 14482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:23.746354', 'step': 14482, 'epoch': 2}
{'type': 'loss', 'content': 0.07488597184419632, 'timestamp': '2025-10-02 00:36:23.750166', 'step': 14483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:23.809773', 'step': 14483, 'epoch': 2}
{'type': 'loss', 'content': 0.031451545655727386, 'timestamp': '2025-10-02 00:36:23.817563', 'step': 14484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:23.882154', 'step': 14484, 'epoch': 2}
{'type': 'loss', 'content': 0.10776682198047638, 'timestamp': '2025-10-02 00:36:23.887100', 'step': 14485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:23.963474', 'step': 14485, 'epoch': 2}
{'type': 'loss', 'content': 0.07055560499429703, 'timestamp': '2025-10-02 00:36:23.966534', 'step': 14486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:24.029601', 'step': 14486, 'epoch': 2}
{'type': 'loss', 'content': 0.061584051698446274, 'timestamp': '2025-10-02 00:36:24.036625', 'step': 14487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:24.105105', 'step': 14487, 'epoch': 2}
{'type': 'loss', 'content': 0.005118899513036013, 'timestamp': '2025-10-02 00:36:24.113310', 'step': 14488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:24.178738', 'step': 14488, 'epoch': 2}
{'type': 'loss', 'content': 0.026391098275780678, 'timestamp': '2025-10-02 00:36:24.184610', 'step': 14489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:36:24.255744', 'step': 14489, 'epoch': 2}
{'type': 'loss', 'content': 0.02836494892835617, 'timestamp': '2025-10-02 00:36:24.267729', 'step': 14490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:24.327656', 'step': 14490, 'epoch': 2}
{'type': 'loss', 'content': 0.0730181410908699, 'timestamp': '2025-10-02 00:36:24.334990', 'step': 14491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:24.406964', 'step': 14491, 'epoch': 2}
{'type': 'loss', 'content': 0.037387948483228683, 'timestamp': '2025-10-02 00:36:24.418901', 'step': 14492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:24.489415', 'step': 14492, 'epoch': 2}
{'type': 'loss', 'content': 0.15638157725334167, 'timestamp': '2025-10-02 00:36:24.496625', 'step': 14493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:24.558942', 'step': 14493, 'epoch': 2}
{'type': 'loss', 'content': 0.02014155499637127, 'timestamp': '2025-10-02 00:36:24.561648', 'step': 14494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:24.618330', 'step': 14494, 'epoch': 2}
{'type': 'loss', 'content': 0.011358293704688549, 'timestamp': '2025-10-02 00:36:24.621386', 'step': 14495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:24.684077', 'step': 14495, 'epoch': 2}
{'type': 'loss', 'content': 0.13219568133354187, 'timestamp': '2025-10-02 00:36:24.691144', 'step': 14496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:24.745248', 'step': 14496, 'epoch': 2}
{'type': 'loss', 'content': 0.09918823838233948, 'timestamp': '2025-10-02 00:36:24.747566', 'step': 14497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:24.807856', 'step': 14497, 'epoch': 2}
{'type': 'loss', 'content': 0.053277529776096344, 'timestamp': '2025-10-02 00:36:24.811332', 'step': 14498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:24.870749', 'step': 14498, 'epoch': 2}
{'type': 'loss', 'content': 0.12637346982955933, 'timestamp': '2025-10-02 00:36:24.873571', 'step': 14499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:24.929755', 'step': 14499, 'epoch': 2}
{'type': 'loss', 'content': 0.03917030245065689, 'timestamp': '2025-10-02 00:36:24.935514', 'step': 14500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 14500', 'timestamp': '2025-10-02 00:36:25.346181', 'step': 14500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:25.405156', 'step': 14500, 'epoch': 2}
{'type': 'loss', 'content': 0.02416936308145523, 'timestamp': '2025-10-02 00:36:25.408878', 'step': 14501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:25.468267', 'step': 14501, 'epoch': 2}
{'type': 'loss', 'content': 0.07846689969301224, 'timestamp': '2025-10-02 00:36:25.471495', 'step': 14502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:25.526838', 'step': 14502, 'epoch': 2}
{'type': 'loss', 'content': 0.15791763365268707, 'timestamp': '2025-10-02 00:36:25.529841', 'step': 14503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:25.591773', 'step': 14503, 'epoch': 2}
{'type': 'loss', 'content': 0.07777518779039383, 'timestamp': '2025-10-02 00:36:25.598299', 'step': 14504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:25.661365', 'step': 14504, 'epoch': 2}
{'type': 'loss', 'content': 0.05959579721093178, 'timestamp': '2025-10-02 00:36:25.670661', 'step': 14505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:25.725709', 'step': 14505, 'epoch': 2}
{'type': 'loss', 'content': 0.09274507313966751, 'timestamp': '2025-10-02 00:36:25.734989', 'step': 14506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:25.792748', 'step': 14506, 'epoch': 2}
{'type': 'loss', 'content': 0.1528138816356659, 'timestamp': '2025-10-02 00:36:25.797816', 'step': 14507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:36:25.861378', 'step': 14507, 'epoch': 2}
{'type': 'loss', 'content': 0.07060229778289795, 'timestamp': '2025-10-02 00:36:25.867048', 'step': 14508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:25.925409', 'step': 14508, 'epoch': 2}
{'type': 'loss', 'content': 0.047925904393196106, 'timestamp': '2025-10-02 00:36:25.931272', 'step': 14509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:25.987275', 'step': 14509, 'epoch': 2}
{'type': 'loss', 'content': 0.05055886134505272, 'timestamp': '2025-10-02 00:36:25.990238', 'step': 14510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:26.054504', 'step': 14510, 'epoch': 2}
{'type': 'loss', 'content': 0.030159994959831238, 'timestamp': '2025-10-02 00:36:26.062677', 'step': 14511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:26.123335', 'step': 14511, 'epoch': 2}
{'type': 'loss', 'content': 0.06177335977554321, 'timestamp': '2025-10-02 00:36:26.129770', 'step': 14512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:26.189375', 'step': 14512, 'epoch': 2}
{'type': 'loss', 'content': 0.16384081542491913, 'timestamp': '2025-10-02 00:36:26.192904', 'step': 14513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:26.253132', 'step': 14513, 'epoch': 2}
{'type': 'loss', 'content': 0.07054395228624344, 'timestamp': '2025-10-02 00:36:26.255235', 'step': 14514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:26.314734', 'step': 14514, 'epoch': 2}
{'type': 'loss', 'content': 0.060360316187143326, 'timestamp': '2025-10-02 00:36:26.317355', 'step': 14515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:26.381522', 'step': 14515, 'epoch': 2}
{'type': 'loss', 'content': 0.09388983249664307, 'timestamp': '2025-10-02 00:36:26.388853', 'step': 14516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:26.447685', 'step': 14516, 'epoch': 2}
{'type': 'loss', 'content': 0.19271406531333923, 'timestamp': '2025-10-02 00:36:26.450716', 'step': 14517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:26.511304', 'step': 14517, 'epoch': 2}
{'type': 'loss', 'content': 0.0195748433470726, 'timestamp': '2025-10-02 00:36:26.520872', 'step': 14518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:26.577138', 'step': 14518, 'epoch': 2}
{'type': 'loss', 'content': 0.1469181627035141, 'timestamp': '2025-10-02 00:36:26.580382', 'step': 14519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:26.647557', 'step': 14519, 'epoch': 2}
{'type': 'loss', 'content': 0.04776911437511444, 'timestamp': '2025-10-02 00:36:26.669226', 'step': 14520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:26.733899', 'step': 14520, 'epoch': 2}
{'type': 'loss', 'content': 0.08072693645954132, 'timestamp': '2025-10-02 00:36:26.737715', 'step': 14521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:26.803057', 'step': 14521, 'epoch': 2}
{'type': 'loss', 'content': 0.03049960918724537, 'timestamp': '2025-10-02 00:36:26.809029', 'step': 14522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:26.867640', 'step': 14522, 'epoch': 2}
{'type': 'loss', 'content': 0.10939066857099533, 'timestamp': '2025-10-02 00:36:26.879725', 'step': 14523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:26.938225', 'step': 14523, 'epoch': 2}
{'type': 'loss', 'content': 0.11342500150203705, 'timestamp': '2025-10-02 00:36:26.945817', 'step': 14524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:27.002481', 'step': 14524, 'epoch': 2}
{'type': 'loss', 'content': 0.13633403182029724, 'timestamp': '2025-10-02 00:36:27.005218', 'step': 14525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:27.069851', 'step': 14525, 'epoch': 2}
{'type': 'loss', 'content': 0.18127422034740448, 'timestamp': '2025-10-02 00:36:27.077676', 'step': 14526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:27.133923', 'step': 14526, 'epoch': 2}
{'type': 'loss', 'content': 0.18181230127811432, 'timestamp': '2025-10-02 00:36:27.143484', 'step': 14527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:27.205314', 'step': 14527, 'epoch': 2}
{'type': 'loss', 'content': 0.14665958285331726, 'timestamp': '2025-10-02 00:36:27.211492', 'step': 14528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:27.273258', 'step': 14528, 'epoch': 2}
{'type': 'loss', 'content': 0.0007366820937022567, 'timestamp': '2025-10-02 00:36:27.275705', 'step': 14529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:27.337960', 'step': 14529, 'epoch': 2}
{'type': 'loss', 'content': 0.11380361020565033, 'timestamp': '2025-10-02 00:36:27.340450', 'step': 14530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:27.401724', 'step': 14530, 'epoch': 2}
{'type': 'loss', 'content': 0.02419874258339405, 'timestamp': '2025-10-02 00:36:27.407300', 'step': 14531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:27.470993', 'step': 14531, 'epoch': 2}
{'type': 'loss', 'content': 0.07444148510694504, 'timestamp': '2025-10-02 00:36:27.478612', 'step': 14532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:27.535429', 'step': 14532, 'epoch': 2}
{'type': 'loss', 'content': 0.005016351584345102, 'timestamp': '2025-10-02 00:36:27.545659', 'step': 14533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:27.602482', 'step': 14533, 'epoch': 2}
{'type': 'loss', 'content': 0.16070252656936646, 'timestamp': '2025-10-02 00:36:27.611143', 'step': 14534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:27.674913', 'step': 14534, 'epoch': 2}
{'type': 'loss', 'content': 0.11187273263931274, 'timestamp': '2025-10-02 00:36:27.684453', 'step': 14535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:27.745806', 'step': 14535, 'epoch': 2}
{'type': 'loss', 'content': 0.17816925048828125, 'timestamp': '2025-10-02 00:36:27.752061', 'step': 14536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:27.814103', 'step': 14536, 'epoch': 2}
{'type': 'loss', 'content': 0.1612553745508194, 'timestamp': '2025-10-02 00:36:27.817933', 'step': 14537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:27.874924', 'step': 14537, 'epoch': 2}
{'type': 'loss', 'content': 0.08675185590982437, 'timestamp': '2025-10-02 00:36:27.883926', 'step': 14538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:27.958026', 'step': 14538, 'epoch': 2}
{'type': 'loss', 'content': 0.14367793500423431, 'timestamp': '2025-10-02 00:36:27.961464', 'step': 14539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:28.036791', 'step': 14539, 'epoch': 2}
{'type': 'loss', 'content': 0.03214851766824722, 'timestamp': '2025-10-02 00:36:28.046955', 'step': 14540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:28.123417', 'step': 14540, 'epoch': 2}
{'type': 'loss', 'content': 0.03237304463982582, 'timestamp': '2025-10-02 00:36:28.134051', 'step': 14541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:28.204816', 'step': 14541, 'epoch': 2}
{'type': 'loss', 'content': 0.022450784221291542, 'timestamp': '2025-10-02 00:36:28.216593', 'step': 14542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:28.294638', 'step': 14542, 'epoch': 2}
{'type': 'loss', 'content': 0.028175165876746178, 'timestamp': '2025-10-02 00:36:28.298547', 'step': 14543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:28.373954', 'step': 14543, 'epoch': 2}
{'type': 'loss', 'content': 0.05408567935228348, 'timestamp': '2025-10-02 00:36:28.380456', 'step': 14544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:28.452684', 'step': 14544, 'epoch': 2}
{'type': 'loss', 'content': 0.025217542424798012, 'timestamp': '2025-10-02 00:36:28.463751', 'step': 14545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:28.531212', 'step': 14545, 'epoch': 2}
{'type': 'loss', 'content': 0.0841614380478859, 'timestamp': '2025-10-02 00:36:28.540591', 'step': 14546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:28.602697', 'step': 14546, 'epoch': 2}
{'type': 'loss', 'content': 0.08063720911741257, 'timestamp': '2025-10-02 00:36:28.611209', 'step': 14547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:28.674470', 'step': 14547, 'epoch': 2}
{'type': 'loss', 'content': 0.02814676985144615, 'timestamp': '2025-10-02 00:36:28.681164', 'step': 14548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:28.739781', 'step': 14548, 'epoch': 2}
{'type': 'loss', 'content': 0.06269372999668121, 'timestamp': '2025-10-02 00:36:28.743432', 'step': 14549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:28.809860', 'step': 14549, 'epoch': 2}
{'type': 'loss', 'content': 0.062022555619478226, 'timestamp': '2025-10-02 00:36:28.817199', 'step': 14550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:36:28.877780', 'step': 14550, 'epoch': 2}
{'type': 'loss', 'content': 0.1235029399394989, 'timestamp': '2025-10-02 00:36:28.881444', 'step': 14551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:28.937196', 'step': 14551, 'epoch': 2}
{'type': 'loss', 'content': 0.03532025218009949, 'timestamp': '2025-10-02 00:36:28.944610', 'step': 14552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:29.007655', 'step': 14552, 'epoch': 2}
{'type': 'loss', 'content': 0.060285430401563644, 'timestamp': '2025-10-02 00:36:29.017049', 'step': 14553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:36:29.084334', 'step': 14553, 'epoch': 2}
{'type': 'loss', 'content': 0.060630910098552704, 'timestamp': '2025-10-02 00:36:29.095183', 'step': 14554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:29.164915', 'step': 14554, 'epoch': 2}
{'type': 'loss', 'content': 0.010329755023121834, 'timestamp': '2025-10-02 00:36:29.167685', 'step': 14555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:29.228735', 'step': 14555, 'epoch': 2}
{'type': 'loss', 'content': 0.059210166335105896, 'timestamp': '2025-10-02 00:36:29.253561', 'step': 14556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:29.322159', 'step': 14556, 'epoch': 2}
{'type': 'loss', 'content': 0.043798841536045074, 'timestamp': '2025-10-02 00:36:29.333503', 'step': 14557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:29.404983', 'step': 14557, 'epoch': 2}
{'type': 'loss', 'content': 0.11687599867582321, 'timestamp': '2025-10-02 00:36:29.414510', 'step': 14558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:29.470268', 'step': 14558, 'epoch': 2}
{'type': 'loss', 'content': 0.03226742893457413, 'timestamp': '2025-10-02 00:36:29.474348', 'step': 14559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:29.535804', 'step': 14559, 'epoch': 2}
{'type': 'loss', 'content': 0.044593118131160736, 'timestamp': '2025-10-02 00:36:29.545192', 'step': 14560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:36:29.604626', 'step': 14560, 'epoch': 2}
{'type': 'loss', 'content': 0.11552372574806213, 'timestamp': '2025-10-02 00:36:29.607640', 'step': 14561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:29.677317', 'step': 14561, 'epoch': 2}
{'type': 'loss', 'content': 0.012004579417407513, 'timestamp': '2025-10-02 00:36:29.688213', 'step': 14562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:29.770356', 'step': 14562, 'epoch': 2}
{'type': 'loss', 'content': 0.03648707643151283, 'timestamp': '2025-10-02 00:36:29.779694', 'step': 14563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:29.836362', 'step': 14563, 'epoch': 2}
{'type': 'loss', 'content': 0.03770900145173073, 'timestamp': '2025-10-02 00:36:29.846815', 'step': 14564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:29.903493', 'step': 14564, 'epoch': 2}
{'type': 'loss', 'content': 0.018924659118056297, 'timestamp': '2025-10-02 00:36:29.913791', 'step': 14565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:29.984619', 'step': 14565, 'epoch': 2}
{'type': 'loss', 'content': 0.1400717943906784, 'timestamp': '2025-10-02 00:36:29.992686', 'step': 14566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:36:30.063493', 'step': 14566, 'epoch': 2}
{'type': 'loss', 'content': 0.007924835197627544, 'timestamp': '2025-10-02 00:36:30.074252', 'step': 14567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:30.153291', 'step': 14567, 'epoch': 2}
{'type': 'loss', 'content': 0.02306297980248928, 'timestamp': '2025-10-02 00:36:30.163384', 'step': 14568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:30.230647', 'step': 14568, 'epoch': 2}
{'type': 'loss', 'content': 0.049535177648067474, 'timestamp': '2025-10-02 00:36:30.236473', 'step': 14569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:36:30.338775', 'step': 14569, 'epoch': 2}
{'type': 'loss', 'content': 0.021368078887462616, 'timestamp': '2025-10-02 00:36:30.352632', 'step': 14570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:30.427654', 'step': 14570, 'epoch': 2}
{'type': 'loss', 'content': 0.09962550550699234, 'timestamp': '2025-10-02 00:36:30.440648', 'step': 14571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:30.521629', 'step': 14571, 'epoch': 2}
{'type': 'loss', 'content': 0.13730423152446747, 'timestamp': '2025-10-02 00:36:30.536078', 'step': 14572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:30.612464', 'step': 14572, 'epoch': 2}
{'type': 'loss', 'content': 0.05406702682375908, 'timestamp': '2025-10-02 00:36:30.624628', 'step': 14573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:30.684518', 'step': 14573, 'epoch': 2}
{'type': 'loss', 'content': 0.06497950851917267, 'timestamp': '2025-10-02 00:36:30.695048', 'step': 14574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:36:30.765304', 'step': 14574, 'epoch': 2}
{'type': 'loss', 'content': 0.10459940135478973, 'timestamp': '2025-10-02 00:36:30.768689', 'step': 14575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:30.843618', 'step': 14575, 'epoch': 2}
{'type': 'loss', 'content': 0.054015789180994034, 'timestamp': '2025-10-02 00:36:30.850266', 'step': 14576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:30.912882', 'step': 14576, 'epoch': 2}
{'type': 'loss', 'content': 0.09714950621128082, 'timestamp': '2025-10-02 00:36:30.922437', 'step': 14577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:30.980866', 'step': 14577, 'epoch': 2}
{'type': 'loss', 'content': 0.051575012505054474, 'timestamp': '2025-10-02 00:36:30.990166', 'step': 14578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:31.066334', 'step': 14578, 'epoch': 2}
{'type': 'loss', 'content': 0.014786181040108204, 'timestamp': '2025-10-02 00:36:31.077383', 'step': 14579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:31.148135', 'step': 14579, 'epoch': 2}
{'type': 'loss', 'content': 0.06428195536136627, 'timestamp': '2025-10-02 00:36:31.155116', 'step': 14580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:31.221472', 'step': 14580, 'epoch': 2}
{'type': 'loss', 'content': 0.0983586236834526, 'timestamp': '2025-10-02 00:36:31.230456', 'step': 14581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:31.298228', 'step': 14581, 'epoch': 2}
{'type': 'loss', 'content': 0.04000966623425484, 'timestamp': '2025-10-02 00:36:31.307937', 'step': 14582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:31.377931', 'step': 14582, 'epoch': 2}
{'type': 'loss', 'content': 0.045973263680934906, 'timestamp': '2025-10-02 00:36:31.381529', 'step': 14583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:31.446808', 'step': 14583, 'epoch': 2}
{'type': 'loss', 'content': 0.10426493734121323, 'timestamp': '2025-10-02 00:36:31.459226', 'step': 14584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:31.520359', 'step': 14584, 'epoch': 2}
{'type': 'loss', 'content': 0.04619012773036957, 'timestamp': '2025-10-02 00:36:31.524622', 'step': 14585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:31.597021', 'step': 14585, 'epoch': 2}
{'type': 'loss', 'content': 0.056045450270175934, 'timestamp': '2025-10-02 00:36:31.608931', 'step': 14586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:36:31.684953', 'step': 14586, 'epoch': 2}
{'type': 'loss', 'content': 0.11034824699163437, 'timestamp': '2025-10-02 00:36:31.700502', 'step': 14587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:31.783503', 'step': 14587, 'epoch': 2}
{'type': 'loss', 'content': 0.08539699018001556, 'timestamp': '2025-10-02 00:36:31.797316', 'step': 14588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:36:31.872891', 'step': 14588, 'epoch': 2}
{'type': 'loss', 'content': 0.12716442346572876, 'timestamp': '2025-10-02 00:36:31.882150', 'step': 14589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:31.952427', 'step': 14589, 'epoch': 2}
{'type': 'loss', 'content': 0.06552885472774506, 'timestamp': '2025-10-02 00:36:31.962112', 'step': 14590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:32.039319', 'step': 14590, 'epoch': 2}
{'type': 'loss', 'content': 0.020090708509087563, 'timestamp': '2025-10-02 00:36:32.048870', 'step': 14591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:32.113553', 'step': 14591, 'epoch': 2}
{'type': 'loss', 'content': 0.10844601690769196, 'timestamp': '2025-10-02 00:36:32.123841', 'step': 14592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:32.179576', 'step': 14592, 'epoch': 2}
{'type': 'loss', 'content': 0.095955491065979, 'timestamp': '2025-10-02 00:36:32.183223', 'step': 14593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:32.253568', 'step': 14593, 'epoch': 2}
{'type': 'loss', 'content': 0.06997577846050262, 'timestamp': '2025-10-02 00:36:32.263464', 'step': 14594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:32.327587', 'step': 14594, 'epoch': 2}
{'type': 'loss', 'content': 0.025952599942684174, 'timestamp': '2025-10-02 00:36:32.333472', 'step': 14595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:36:32.401360', 'step': 14595, 'epoch': 2}
{'type': 'loss', 'content': 0.07890278846025467, 'timestamp': '2025-10-02 00:36:32.408102', 'step': 14596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:36:32.483816', 'step': 14596, 'epoch': 2}
{'type': 'loss', 'content': 0.02928820438683033, 'timestamp': '2025-10-02 00:36:32.495130', 'step': 14597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:32.561439', 'step': 14597, 'epoch': 2}
{'type': 'loss', 'content': 0.026324063539505005, 'timestamp': '2025-10-02 00:36:32.568763', 'step': 14598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:32.636527', 'step': 14598, 'epoch': 2}
{'type': 'loss', 'content': 0.054347146302461624, 'timestamp': '2025-10-02 00:36:32.643082', 'step': 14599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:36:32.714307', 'step': 14599, 'epoch': 2}
{'type': 'loss', 'content': 0.10880466550588608, 'timestamp': '2025-10-02 00:36:32.721322', 'step': 14600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:32.786303', 'step': 14600, 'epoch': 2}
{'type': 'loss', 'content': 0.0853135958313942, 'timestamp': '2025-10-02 00:36:32.789201', 'step': 14601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:32.857608', 'step': 14601, 'epoch': 2}
{'type': 'loss', 'content': 0.16836614906787872, 'timestamp': '2025-10-02 00:36:32.864858', 'step': 14602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:36:32.924723', 'step': 14602, 'epoch': 2}
{'type': 'loss', 'content': 0.041578441858291626, 'timestamp': '2025-10-02 00:36:32.927633', 'step': 14603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:32.995117', 'step': 14603, 'epoch': 2}
{'type': 'loss', 'content': 0.04120013117790222, 'timestamp': '2025-10-02 00:36:33.007118', 'step': 14604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:33.079458', 'step': 14604, 'epoch': 2}
{'type': 'loss', 'content': 0.06120755895972252, 'timestamp': '2025-10-02 00:36:33.089883', 'step': 14605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:33.154721', 'step': 14605, 'epoch': 2}
{'type': 'loss', 'content': 0.1642674207687378, 'timestamp': '2025-10-02 00:36:33.158607', 'step': 14606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:33.228381', 'step': 14606, 'epoch': 2}
{'type': 'loss', 'content': 0.04429413005709648, 'timestamp': '2025-10-02 00:36:33.231550', 'step': 14607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:36:33.292432', 'step': 14607, 'epoch': 2}
{'type': 'loss', 'content': 0.2333987057209015, 'timestamp': '2025-10-02 00:36:33.298991', 'step': 14608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:33.368121', 'step': 14608, 'epoch': 2}
{'type': 'loss', 'content': 0.14306415617465973, 'timestamp': '2025-10-02 00:36:33.376234', 'step': 14609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:36:33.437484', 'step': 14609, 'epoch': 2}
{'type': 'loss', 'content': 0.032693050801754, 'timestamp': '2025-10-02 00:36:33.445934', 'step': 14610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:36:33.512413', 'step': 14610, 'epoch': 2}
{'type': 'loss', 'content': 0.052121683955192566, 'timestamp': '2025-10-02 00:36:33.520282', 'step': 14611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:36:33.587143', 'step': 14611, 'epoch': 2}
{'type': 'loss', 'content': 0.11792339384555817, 'timestamp': '2025-10-02 00:36:33.596863', 'step': 14612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:36:33.660145', 'step': 14612, 'epoch': 2}
{'type': 'loss', 'content': 0.033848825842142105, 'timestamp': '2025-10-02 00:36:33.669783', 'step': 14613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:36:33.734298', 'step': 14613, 'epoch': 2}
{'type': 'loss', 'content': 0.0489821694791317, 'timestamp': '2025-10-02 00:36:33.741526', 'step': 14614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:36:33.809604', 'step': 14614, 'epoch': 2}
{'type': 'loss', 'content': 0.10808061808347702, 'timestamp': '2025-10-02 00:36:33.816023', 'step': 14615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:36:33.882057', 'step': 14615, 'epoch': 2}
{'type': 'loss', 'content': 0.018454890698194504, 'timestamp': '2025-10-02 00:36:33.892410', 'step': 14616, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:37:03.389814', 'step': 14616, 'epoch': 2}
{'type': 'pplx', 'content': 102.60767036155063, 'timestamp': '2025-10-02 00:37:03.402822', 'step': 14616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:03.459896', 'step': 14616, 'epoch': 2}
{'type': 'loss', 'content': 0.038048163056373596, 'timestamp': '2025-10-02 00:37:03.465355', 'step': 14617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:03.534683', 'step': 14617, 'epoch': 2}
{'type': 'loss', 'content': 0.060440972447395325, 'timestamp': '2025-10-02 00:37:03.543606', 'step': 14618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:03.615184', 'step': 14618, 'epoch': 2}
{'type': 'loss', 'content': 0.08307014405727386, 'timestamp': '2025-10-02 00:37:03.623817', 'step': 14619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:03.696387', 'step': 14619, 'epoch': 2}
{'type': 'loss', 'content': 0.05941734462976456, 'timestamp': '2025-10-02 00:37:03.708486', 'step': 14620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:03.769578', 'step': 14620, 'epoch': 2}
{'type': 'loss', 'content': 0.09461316466331482, 'timestamp': '2025-10-02 00:37:03.778001', 'step': 14621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:03.840814', 'step': 14621, 'epoch': 2}
{'type': 'loss', 'content': 0.13380061089992523, 'timestamp': '2025-10-02 00:37:03.847948', 'step': 14622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:03.911222', 'step': 14622, 'epoch': 2}
{'type': 'loss', 'content': 0.03485121950507164, 'timestamp': '2025-10-02 00:37:03.918384', 'step': 14623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:03.997160', 'step': 14623, 'epoch': 2}
{'type': 'loss', 'content': 0.05762837082147598, 'timestamp': '2025-10-02 00:37:04.008392', 'step': 14624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:04.066680', 'step': 14624, 'epoch': 2}
{'type': 'loss', 'content': 0.10571978241205215, 'timestamp': '2025-10-02 00:37:04.070045', 'step': 14625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:04.127788', 'step': 14625, 'epoch': 2}
{'type': 'loss', 'content': 0.20168624818325043, 'timestamp': '2025-10-02 00:37:04.135142', 'step': 14626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:04.207001', 'step': 14626, 'epoch': 2}
{'type': 'loss', 'content': 0.1588464081287384, 'timestamp': '2025-10-02 00:37:04.214954', 'step': 14627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:04.285900', 'step': 14627, 'epoch': 2}
{'type': 'loss', 'content': 0.04813724383711815, 'timestamp': '2025-10-02 00:37:04.296261', 'step': 14628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:04.370965', 'step': 14628, 'epoch': 2}
{'type': 'loss', 'content': 0.06554633378982544, 'timestamp': '2025-10-02 00:37:04.374255', 'step': 14629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:04.448037', 'step': 14629, 'epoch': 2}
{'type': 'loss', 'content': 0.07789973169565201, 'timestamp': '2025-10-02 00:37:04.455494', 'step': 14630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:04.525856', 'step': 14630, 'epoch': 2}
{'type': 'loss', 'content': 0.020112015306949615, 'timestamp': '2025-10-02 00:37:04.537727', 'step': 14631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:04.616966', 'step': 14631, 'epoch': 2}
{'type': 'loss', 'content': 0.06684096902608871, 'timestamp': '2025-10-02 00:37:04.626931', 'step': 14632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:04.704849', 'step': 14632, 'epoch': 2}
{'type': 'loss', 'content': 0.04274218901991844, 'timestamp': '2025-10-02 00:37:04.716188', 'step': 14633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:04.788216', 'step': 14633, 'epoch': 2}
{'type': 'loss', 'content': 0.01168953999876976, 'timestamp': '2025-10-02 00:37:04.793921', 'step': 14634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:04.866247', 'step': 14634, 'epoch': 2}
{'type': 'loss', 'content': 0.02103479951620102, 'timestamp': '2025-10-02 00:37:04.876456', 'step': 14635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:04.968320', 'step': 14635, 'epoch': 2}
{'type': 'loss', 'content': 0.014958448708057404, 'timestamp': '2025-10-02 00:37:04.978471', 'step': 14636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:37:05.038584', 'step': 14636, 'epoch': 2}
{'type': 'loss', 'content': 0.1270681917667389, 'timestamp': '2025-10-02 00:37:05.043695', 'step': 14637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:05.116403', 'step': 14637, 'epoch': 2}
{'type': 'loss', 'content': 0.02528170309960842, 'timestamp': '2025-10-02 00:37:05.126811', 'step': 14638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:05.194603', 'step': 14638, 'epoch': 2}
{'type': 'loss', 'content': 0.019733218476176262, 'timestamp': '2025-10-02 00:37:05.202596', 'step': 14639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:05.262084', 'step': 14639, 'epoch': 2}
{'type': 'loss', 'content': 0.07030101865530014, 'timestamp': '2025-10-02 00:37:05.268953', 'step': 14640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:05.324976', 'step': 14640, 'epoch': 2}
{'type': 'loss', 'content': 0.16665269434452057, 'timestamp': '2025-10-02 00:37:05.335227', 'step': 14641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:05.391941', 'step': 14641, 'epoch': 2}
{'type': 'loss', 'content': 0.04345213994383812, 'timestamp': '2025-10-02 00:37:05.399407', 'step': 14642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:05.459937', 'step': 14642, 'epoch': 2}
{'type': 'loss', 'content': 0.11817578226327896, 'timestamp': '2025-10-02 00:37:05.469514', 'step': 14643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:05.524539', 'step': 14643, 'epoch': 2}
{'type': 'loss', 'content': 0.026933899149298668, 'timestamp': '2025-10-02 00:37:05.532900', 'step': 14644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:05.591072', 'step': 14644, 'epoch': 2}
{'type': 'loss', 'content': 0.047435227781534195, 'timestamp': '2025-10-02 00:37:05.598803', 'step': 14645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:05.658602', 'step': 14645, 'epoch': 2}
{'type': 'loss', 'content': 0.09388599544763565, 'timestamp': '2025-10-02 00:37:05.661491', 'step': 14646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:05.717713', 'step': 14646, 'epoch': 2}
{'type': 'loss', 'content': 0.05060948431491852, 'timestamp': '2025-10-02 00:37:05.725500', 'step': 14647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:05.799505', 'step': 14647, 'epoch': 2}
{'type': 'loss', 'content': 0.046988893300294876, 'timestamp': '2025-10-02 00:37:05.809684', 'step': 14648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:05.879671', 'step': 14648, 'epoch': 2}
{'type': 'loss', 'content': 0.12545821070671082, 'timestamp': '2025-10-02 00:37:05.882109', 'step': 14649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:05.943935', 'step': 14649, 'epoch': 2}
{'type': 'loss', 'content': 0.059595853090286255, 'timestamp': '2025-10-02 00:37:05.952979', 'step': 14650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:06.029348', 'step': 14650, 'epoch': 2}
{'type': 'loss', 'content': 0.08449801802635193, 'timestamp': '2025-10-02 00:37:06.032342', 'step': 14651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:06.101548', 'step': 14651, 'epoch': 2}
{'type': 'loss', 'content': 0.022567449137568474, 'timestamp': '2025-10-02 00:37:06.111775', 'step': 14652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:06.175272', 'step': 14652, 'epoch': 2}
{'type': 'loss', 'content': 0.0995069071650505, 'timestamp': '2025-10-02 00:37:06.183319', 'step': 14653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:06.257944', 'step': 14653, 'epoch': 2}
{'type': 'loss', 'content': 0.15249760448932648, 'timestamp': '2025-10-02 00:37:06.265331', 'step': 14654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:37:06.335546', 'step': 14654, 'epoch': 2}
{'type': 'loss', 'content': 0.15778063237667084, 'timestamp': '2025-10-02 00:37:06.339709', 'step': 14655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:06.413771', 'step': 14655, 'epoch': 2}
{'type': 'loss', 'content': 0.029645543545484543, 'timestamp': '2025-10-02 00:37:06.426819', 'step': 14656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:06.486323', 'step': 14656, 'epoch': 2}
{'type': 'loss', 'content': 0.06049659103155136, 'timestamp': '2025-10-02 00:37:06.495818', 'step': 14657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:06.568404', 'step': 14657, 'epoch': 2}
{'type': 'loss', 'content': 0.023674916476011276, 'timestamp': '2025-10-02 00:37:06.581891', 'step': 14658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:06.651202', 'step': 14658, 'epoch': 2}
{'type': 'loss', 'content': 0.0618395172059536, 'timestamp': '2025-10-02 00:37:06.655822', 'step': 14659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:06.713511', 'step': 14659, 'epoch': 2}
{'type': 'loss', 'content': 0.02103542909026146, 'timestamp': '2025-10-02 00:37:06.720270', 'step': 14660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:06.780882', 'step': 14660, 'epoch': 2}
{'type': 'loss', 'content': 0.05739835277199745, 'timestamp': '2025-10-02 00:37:06.784001', 'step': 14661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:06.840956', 'step': 14661, 'epoch': 2}
{'type': 'loss', 'content': 0.025821669027209282, 'timestamp': '2025-10-02 00:37:06.844781', 'step': 14662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:06.903262', 'step': 14662, 'epoch': 2}
{'type': 'loss', 'content': 0.07834798842668533, 'timestamp': '2025-10-02 00:37:06.910800', 'step': 14663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:06.972856', 'step': 14663, 'epoch': 2}
{'type': 'loss', 'content': 0.03441302850842476, 'timestamp': '2025-10-02 00:37:06.979488', 'step': 14664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:07.035751', 'step': 14664, 'epoch': 2}
{'type': 'loss', 'content': 0.13380441069602966, 'timestamp': '2025-10-02 00:37:07.039015', 'step': 14665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:07.095284', 'step': 14665, 'epoch': 2}
{'type': 'loss', 'content': 0.05194918066263199, 'timestamp': '2025-10-02 00:37:07.098336', 'step': 14666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:07.157834', 'step': 14666, 'epoch': 2}
{'type': 'loss', 'content': 0.07410340756177902, 'timestamp': '2025-10-02 00:37:07.160566', 'step': 14667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:07.222786', 'step': 14667, 'epoch': 2}
{'type': 'loss', 'content': 0.04933622479438782, 'timestamp': '2025-10-02 00:37:07.229795', 'step': 14668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:37:07.285429', 'step': 14668, 'epoch': 2}
{'type': 'loss', 'content': 0.08136168122291565, 'timestamp': '2025-10-02 00:37:07.288612', 'step': 14669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:07.351653', 'step': 14669, 'epoch': 2}
{'type': 'loss', 'content': 0.031787656247615814, 'timestamp': '2025-10-02 00:37:07.360392', 'step': 14670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:07.430860', 'step': 14670, 'epoch': 2}
{'type': 'loss', 'content': 0.04062727466225624, 'timestamp': '2025-10-02 00:37:07.440443', 'step': 14671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:07.504219', 'step': 14671, 'epoch': 2}
{'type': 'loss', 'content': 0.036421194672584534, 'timestamp': '2025-10-02 00:37:07.515387', 'step': 14672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:07.587569', 'step': 14672, 'epoch': 2}
{'type': 'loss', 'content': 0.05218731239438057, 'timestamp': '2025-10-02 00:37:07.598556', 'step': 14673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:07.660903', 'step': 14673, 'epoch': 2}
{'type': 'loss', 'content': 0.06390589475631714, 'timestamp': '2025-10-02 00:37:07.663624', 'step': 14674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:07.737633', 'step': 14674, 'epoch': 2}
{'type': 'loss', 'content': 0.0462774932384491, 'timestamp': '2025-10-02 00:37:07.746346', 'step': 14675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:07.812266', 'step': 14675, 'epoch': 2}
{'type': 'loss', 'content': 0.017021482810378075, 'timestamp': '2025-10-02 00:37:07.819169', 'step': 14676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:07.882368', 'step': 14676, 'epoch': 2}
{'type': 'loss', 'content': 0.11320643126964569, 'timestamp': '2025-10-02 00:37:07.887403', 'step': 14677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:07.945700', 'step': 14677, 'epoch': 2}
{'type': 'loss', 'content': 0.14221233129501343, 'timestamp': '2025-10-02 00:37:07.949290', 'step': 14678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:08.007375', 'step': 14678, 'epoch': 2}
{'type': 'loss', 'content': 0.12242276966571808, 'timestamp': '2025-10-02 00:37:08.010954', 'step': 14679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:08.071232', 'step': 14679, 'epoch': 2}
{'type': 'loss', 'content': 0.024674812331795692, 'timestamp': '2025-10-02 00:37:08.083558', 'step': 14680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:08.141499', 'step': 14680, 'epoch': 2}
{'type': 'loss', 'content': 0.03661322966217995, 'timestamp': '2025-10-02 00:37:08.148388', 'step': 14681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:08.213633', 'step': 14681, 'epoch': 2}
{'type': 'loss', 'content': 0.07758177816867828, 'timestamp': '2025-10-02 00:37:08.221939', 'step': 14682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:08.279364', 'step': 14682, 'epoch': 2}
{'type': 'loss', 'content': 0.08980318158864975, 'timestamp': '2025-10-02 00:37:08.287554', 'step': 14683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:08.359180', 'step': 14683, 'epoch': 2}
{'type': 'loss', 'content': 0.07240000367164612, 'timestamp': '2025-10-02 00:37:08.370712', 'step': 14684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:08.439209', 'step': 14684, 'epoch': 2}
{'type': 'loss', 'content': 0.06975673139095306, 'timestamp': '2025-10-02 00:37:08.447352', 'step': 14685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:08.513310', 'step': 14685, 'epoch': 2}
{'type': 'loss', 'content': 0.03845396637916565, 'timestamp': '2025-10-02 00:37:08.526238', 'step': 14686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:08.601397', 'step': 14686, 'epoch': 2}
{'type': 'loss', 'content': 0.09063796699047089, 'timestamp': '2025-10-02 00:37:08.610282', 'step': 14687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:08.678299', 'step': 14687, 'epoch': 2}
{'type': 'loss', 'content': 0.13851916790008545, 'timestamp': '2025-10-02 00:37:08.684561', 'step': 14688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:08.745534', 'step': 14688, 'epoch': 2}
{'type': 'loss', 'content': 0.07063025236129761, 'timestamp': '2025-10-02 00:37:08.747936', 'step': 14689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:08.816225', 'step': 14689, 'epoch': 2}
{'type': 'loss', 'content': 0.033640045672655106, 'timestamp': '2025-10-02 00:37:08.827793', 'step': 14690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:08.900399', 'step': 14690, 'epoch': 2}
{'type': 'loss', 'content': 0.1363603174686432, 'timestamp': '2025-10-02 00:37:08.903289', 'step': 14691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:08.974861', 'step': 14691, 'epoch': 2}
{'type': 'loss', 'content': 0.03359035402536392, 'timestamp': '2025-10-02 00:37:08.987759', 'step': 14692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:09.054461', 'step': 14692, 'epoch': 2}
{'type': 'loss', 'content': 0.025984514504671097, 'timestamp': '2025-10-02 00:37:09.064761', 'step': 14693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:09.121136', 'step': 14693, 'epoch': 2}
{'type': 'loss', 'content': 0.11115965992212296, 'timestamp': '2025-10-02 00:37:09.124347', 'step': 14694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:09.185216', 'step': 14694, 'epoch': 2}
{'type': 'loss', 'content': 0.02842618338763714, 'timestamp': '2025-10-02 00:37:09.195427', 'step': 14695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:09.252421', 'step': 14695, 'epoch': 2}
{'type': 'loss', 'content': 0.025998417288064957, 'timestamp': '2025-10-02 00:37:09.262767', 'step': 14696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:09.318987', 'step': 14696, 'epoch': 2}
{'type': 'loss', 'content': 0.022066693753004074, 'timestamp': '2025-10-02 00:37:09.322226', 'step': 14697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:09.377923', 'step': 14697, 'epoch': 2}
{'type': 'loss', 'content': 0.12002510577440262, 'timestamp': '2025-10-02 00:37:09.383825', 'step': 14698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:09.439254', 'step': 14698, 'epoch': 2}
{'type': 'loss', 'content': 0.09252315014600754, 'timestamp': '2025-10-02 00:37:09.441412', 'step': 14699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:09.497407', 'step': 14699, 'epoch': 2}
{'type': 'loss', 'content': 0.07665903121232986, 'timestamp': '2025-10-02 00:37:09.504248', 'step': 14700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:09.562117', 'step': 14700, 'epoch': 2}
{'type': 'loss', 'content': 0.05153491720557213, 'timestamp': '2025-10-02 00:37:09.567521', 'step': 14701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:09.621737', 'step': 14701, 'epoch': 2}
{'type': 'loss', 'content': 0.08800297975540161, 'timestamp': '2025-10-02 00:37:09.624223', 'step': 14702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:37:09.693269', 'step': 14702, 'epoch': 2}
{'type': 'loss', 'content': 0.005094464868307114, 'timestamp': '2025-10-02 00:37:09.705607', 'step': 14703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:09.759618', 'step': 14703, 'epoch': 2}
{'type': 'loss', 'content': 0.04242260754108429, 'timestamp': '2025-10-02 00:37:09.765562', 'step': 14704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:09.818780', 'step': 14704, 'epoch': 2}
{'type': 'loss', 'content': 0.043230485171079636, 'timestamp': '2025-10-02 00:37:09.821224', 'step': 14705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:09.882487', 'step': 14705, 'epoch': 2}
{'type': 'loss', 'content': 0.027331631630659103, 'timestamp': '2025-10-02 00:37:09.893013', 'step': 14706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:09.947350', 'step': 14706, 'epoch': 2}
{'type': 'loss', 'content': 0.048817120492458344, 'timestamp': '2025-10-02 00:37:09.954871', 'step': 14707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:10.011124', 'step': 14707, 'epoch': 2}
{'type': 'loss', 'content': 0.013379495590925217, 'timestamp': '2025-10-02 00:37:10.021462', 'step': 14708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:10.075294', 'step': 14708, 'epoch': 2}
{'type': 'loss', 'content': 0.097569040954113, 'timestamp': '2025-10-02 00:37:10.077620', 'step': 14709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:10.133086', 'step': 14709, 'epoch': 2}
{'type': 'loss', 'content': 0.07545562088489532, 'timestamp': '2025-10-02 00:37:10.135460', 'step': 14710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:10.189695', 'step': 14710, 'epoch': 2}
{'type': 'loss', 'content': 0.19029346108436584, 'timestamp': '2025-10-02 00:37:10.192055', 'step': 14711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:10.247188', 'step': 14711, 'epoch': 2}
{'type': 'loss', 'content': 0.0782618448138237, 'timestamp': '2025-10-02 00:37:10.257494', 'step': 14712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:10.312189', 'step': 14712, 'epoch': 2}
{'type': 'loss', 'content': 0.032845087349414825, 'timestamp': '2025-10-02 00:37:10.319557', 'step': 14713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:37:10.383386', 'step': 14713, 'epoch': 2}
{'type': 'loss', 'content': 0.04388003423810005, 'timestamp': '2025-10-02 00:37:10.394236', 'step': 14714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:10.449418', 'step': 14714, 'epoch': 2}
{'type': 'loss', 'content': 0.05297422409057617, 'timestamp': '2025-10-02 00:37:10.456758', 'step': 14715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:10.513425', 'step': 14715, 'epoch': 2}
{'type': 'loss', 'content': 0.019840890541672707, 'timestamp': '2025-10-02 00:37:10.523803', 'step': 14716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:10.579896', 'step': 14716, 'epoch': 2}
{'type': 'loss', 'content': 0.00964032206684351, 'timestamp': '2025-10-02 00:37:10.585629', 'step': 14717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:10.639780', 'step': 14717, 'epoch': 2}
{'type': 'loss', 'content': 0.06470227986574173, 'timestamp': '2025-10-02 00:37:10.642056', 'step': 14718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:10.696289', 'step': 14718, 'epoch': 2}
{'type': 'loss', 'content': 0.0679863691329956, 'timestamp': '2025-10-02 00:37:10.698694', 'step': 14719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:10.753076', 'step': 14719, 'epoch': 2}
{'type': 'loss', 'content': 0.11122738569974899, 'timestamp': '2025-10-02 00:37:10.759811', 'step': 14720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:10.814055', 'step': 14720, 'epoch': 2}
{'type': 'loss', 'content': 0.05805402249097824, 'timestamp': '2025-10-02 00:37:10.816648', 'step': 14721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:10.870565', 'step': 14721, 'epoch': 2}
{'type': 'loss', 'content': 0.1214674711227417, 'timestamp': '2025-10-02 00:37:10.873059', 'step': 14722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:10.927397', 'step': 14722, 'epoch': 2}
{'type': 'loss', 'content': 0.06782834976911545, 'timestamp': '2025-10-02 00:37:10.930210', 'step': 14723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:10.984796', 'step': 14723, 'epoch': 2}
{'type': 'loss', 'content': 0.08133858442306519, 'timestamp': '2025-10-02 00:37:10.990563', 'step': 14724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:11.044477', 'step': 14724, 'epoch': 2}
{'type': 'loss', 'content': 0.06824687123298645, 'timestamp': '2025-10-02 00:37:11.052012', 'step': 14725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:11.106208', 'step': 14725, 'epoch': 2}
{'type': 'loss', 'content': 0.10441089421510696, 'timestamp': '2025-10-02 00:37:11.108777', 'step': 14726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:11.165458', 'step': 14726, 'epoch': 2}
{'type': 'loss', 'content': 0.05518580228090286, 'timestamp': '2025-10-02 00:37:11.174977', 'step': 14727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:11.231296', 'step': 14727, 'epoch': 2}
{'type': 'loss', 'content': 0.03506633639335632, 'timestamp': '2025-10-02 00:37:11.237190', 'step': 14728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:11.290635', 'step': 14728, 'epoch': 2}
{'type': 'loss', 'content': 0.10697001963853836, 'timestamp': '2025-10-02 00:37:11.292907', 'step': 14729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:11.347897', 'step': 14729, 'epoch': 2}
{'type': 'loss', 'content': 0.14410990476608276, 'timestamp': '2025-10-02 00:37:11.350475', 'step': 14730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:11.404727', 'step': 14730, 'epoch': 2}
{'type': 'loss', 'content': 0.055821821093559265, 'timestamp': '2025-10-02 00:37:11.410666', 'step': 14731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:11.464870', 'step': 14731, 'epoch': 2}
{'type': 'loss', 'content': 0.045797016471624374, 'timestamp': '2025-10-02 00:37:11.471446', 'step': 14732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:11.524841', 'step': 14732, 'epoch': 2}
{'type': 'loss', 'content': 0.13245874643325806, 'timestamp': '2025-10-02 00:37:11.527113', 'step': 14733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:11.581050', 'step': 14733, 'epoch': 2}
{'type': 'loss', 'content': 0.06459735333919525, 'timestamp': '2025-10-02 00:37:11.583582', 'step': 14734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:11.640105', 'step': 14734, 'epoch': 2}
{'type': 'loss', 'content': 0.04458055645227432, 'timestamp': '2025-10-02 00:37:11.645845', 'step': 14735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:11.699490', 'step': 14735, 'epoch': 2}
{'type': 'loss', 'content': 0.16374550759792328, 'timestamp': '2025-10-02 00:37:11.705266', 'step': 14736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:11.770600', 'step': 14736, 'epoch': 2}
{'type': 'loss', 'content': 0.11457367241382599, 'timestamp': '2025-10-02 00:37:11.772844', 'step': 14737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:11.826672', 'step': 14737, 'epoch': 2}
{'type': 'loss', 'content': 0.021098323166370392, 'timestamp': '2025-10-02 00:37:11.829228', 'step': 14738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:11.883346', 'step': 14738, 'epoch': 2}
{'type': 'loss', 'content': 0.06313125044107437, 'timestamp': '2025-10-02 00:37:11.885712', 'step': 14739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:11.940078', 'step': 14739, 'epoch': 2}
{'type': 'loss', 'content': 0.0557144470512867, 'timestamp': '2025-10-02 00:37:11.948124', 'step': 14740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:12.002524', 'step': 14740, 'epoch': 2}
{'type': 'loss', 'content': 0.011255132034420967, 'timestamp': '2025-10-02 00:37:12.012009', 'step': 14741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:12.066222', 'step': 14741, 'epoch': 2}
{'type': 'loss', 'content': 0.01859768107533455, 'timestamp': '2025-10-02 00:37:12.068392', 'step': 14742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:12.123532', 'step': 14742, 'epoch': 2}
{'type': 'loss', 'content': 0.04605123773217201, 'timestamp': '2025-10-02 00:37:12.133038', 'step': 14743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:12.187607', 'step': 14743, 'epoch': 2}
{'type': 'loss', 'content': 0.08658452332019806, 'timestamp': '2025-10-02 00:37:12.193637', 'step': 14744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:12.255115', 'step': 14744, 'epoch': 2}
{'type': 'loss', 'content': 0.015249207615852356, 'timestamp': '2025-10-02 00:37:12.266436', 'step': 14745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:12.321505', 'step': 14745, 'epoch': 2}
{'type': 'loss', 'content': 0.05091610550880432, 'timestamp': '2025-10-02 00:37:12.323876', 'step': 14746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:12.378429', 'step': 14746, 'epoch': 2}
{'type': 'loss', 'content': 0.04573908448219299, 'timestamp': '2025-10-02 00:37:12.384149', 'step': 14747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:12.438681', 'step': 14747, 'epoch': 2}
{'type': 'loss', 'content': 0.05703696608543396, 'timestamp': '2025-10-02 00:37:12.444393', 'step': 14748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:12.497738', 'step': 14748, 'epoch': 2}
{'type': 'loss', 'content': 0.040739916265010834, 'timestamp': '2025-10-02 00:37:12.503640', 'step': 14749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:12.557375', 'step': 14749, 'epoch': 2}
{'type': 'loss', 'content': 0.17720898985862732, 'timestamp': '2025-10-02 00:37:12.559746', 'step': 14750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:12.621103', 'step': 14750, 'epoch': 2}
{'type': 'loss', 'content': 0.04092036187648773, 'timestamp': '2025-10-02 00:37:12.631534', 'step': 14751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:12.685498', 'step': 14751, 'epoch': 2}
{'type': 'loss', 'content': 0.0619625560939312, 'timestamp': '2025-10-02 00:37:12.691899', 'step': 14752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:12.752324', 'step': 14752, 'epoch': 2}
{'type': 'loss', 'content': 0.01194633450359106, 'timestamp': '2025-10-02 00:37:12.763856', 'step': 14753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:12.819233', 'step': 14753, 'epoch': 2}
{'type': 'loss', 'content': 0.049904219806194305, 'timestamp': '2025-10-02 00:37:12.828754', 'step': 14754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:12.883228', 'step': 14754, 'epoch': 2}
{'type': 'loss', 'content': 0.06772960722446442, 'timestamp': '2025-10-02 00:37:12.885589', 'step': 14755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:12.939665', 'step': 14755, 'epoch': 2}
{'type': 'loss', 'content': 0.07685762643814087, 'timestamp': '2025-10-02 00:37:12.948941', 'step': 14756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:13.017636', 'step': 14756, 'epoch': 2}
{'type': 'loss', 'content': 0.05828968808054924, 'timestamp': '2025-10-02 00:37:13.028924', 'step': 14757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:13.083726', 'step': 14757, 'epoch': 2}
{'type': 'loss', 'content': 0.01421615295112133, 'timestamp': '2025-10-02 00:37:13.086108', 'step': 14758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:13.140292', 'step': 14758, 'epoch': 2}
{'type': 'loss', 'content': 0.07500971108675003, 'timestamp': '2025-10-02 00:37:13.147589', 'step': 14759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:13.203484', 'step': 14759, 'epoch': 2}
{'type': 'loss', 'content': 0.08574244379997253, 'timestamp': '2025-10-02 00:37:13.209178', 'step': 14760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:13.262791', 'step': 14760, 'epoch': 2}
{'type': 'loss', 'content': 0.027162211015820503, 'timestamp': '2025-10-02 00:37:13.264977', 'step': 14761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:13.319245', 'step': 14761, 'epoch': 2}
{'type': 'loss', 'content': 0.10536206513643265, 'timestamp': '2025-10-02 00:37:13.321526', 'step': 14762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:13.375502', 'step': 14762, 'epoch': 2}
{'type': 'loss', 'content': 0.02324897237122059, 'timestamp': '2025-10-02 00:37:13.384842', 'step': 14763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:13.438765', 'step': 14763, 'epoch': 2}
{'type': 'loss', 'content': 0.09057391434907913, 'timestamp': '2025-10-02 00:37:13.444595', 'step': 14764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:13.501741', 'step': 14764, 'epoch': 2}
{'type': 'loss', 'content': 0.05505713075399399, 'timestamp': '2025-10-02 00:37:13.512746', 'step': 14765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:13.574656', 'step': 14765, 'epoch': 2}
{'type': 'loss', 'content': 0.0580199658870697, 'timestamp': '2025-10-02 00:37:13.585244', 'step': 14766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:13.645902', 'step': 14766, 'epoch': 2}
{'type': 'loss', 'content': 0.18391065299510956, 'timestamp': '2025-10-02 00:37:13.648120', 'step': 14767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:13.703550', 'step': 14767, 'epoch': 2}
{'type': 'loss', 'content': 0.07869253307580948, 'timestamp': '2025-10-02 00:37:13.722098', 'step': 14768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:13.775098', 'step': 14768, 'epoch': 2}
{'type': 'loss', 'content': 0.14647793769836426, 'timestamp': '2025-10-02 00:37:13.777466', 'step': 14769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:13.836179', 'step': 14769, 'epoch': 2}
{'type': 'loss', 'content': 0.02131832204759121, 'timestamp': '2025-10-02 00:37:13.846373', 'step': 14770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:13.900073', 'step': 14770, 'epoch': 2}
{'type': 'loss', 'content': 0.14482754468917847, 'timestamp': '2025-10-02 00:37:13.902410', 'step': 14771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:13.956421', 'step': 14771, 'epoch': 2}
{'type': 'loss', 'content': 0.04173227772116661, 'timestamp': '2025-10-02 00:37:13.962227', 'step': 14772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:14.019698', 'step': 14772, 'epoch': 2}
{'type': 'loss', 'content': 0.04342959448695183, 'timestamp': '2025-10-02 00:37:14.030719', 'step': 14773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:14.085128', 'step': 14773, 'epoch': 2}
{'type': 'loss', 'content': 0.04660070315003395, 'timestamp': '2025-10-02 00:37:14.087845', 'step': 14774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:14.142322', 'step': 14774, 'epoch': 2}
{'type': 'loss', 'content': 0.09960994869470596, 'timestamp': '2025-10-02 00:37:14.144966', 'step': 14775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:14.200223', 'step': 14775, 'epoch': 2}
{'type': 'loss', 'content': 0.12634189426898956, 'timestamp': '2025-10-02 00:37:14.206166', 'step': 14776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:14.260342', 'step': 14776, 'epoch': 2}
{'type': 'loss', 'content': 0.04367678239941597, 'timestamp': '2025-10-02 00:37:14.267662', 'step': 14777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:14.322385', 'step': 14777, 'epoch': 2}
{'type': 'loss', 'content': 0.08394559472799301, 'timestamp': '2025-10-02 00:37:14.324733', 'step': 14778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:14.379061', 'step': 14778, 'epoch': 2}
{'type': 'loss', 'content': 0.08462213724851608, 'timestamp': '2025-10-02 00:37:14.383353', 'step': 14779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:14.438261', 'step': 14779, 'epoch': 2}
{'type': 'loss', 'content': 0.015224619768559933, 'timestamp': '2025-10-02 00:37:14.448386', 'step': 14780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:14.501680', 'step': 14780, 'epoch': 2}
{'type': 'loss', 'content': 0.08961818367242813, 'timestamp': '2025-10-02 00:37:14.503975', 'step': 14781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:14.557883', 'step': 14781, 'epoch': 2}
{'type': 'loss', 'content': 0.09177351742982864, 'timestamp': '2025-10-02 00:37:14.560220', 'step': 14782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:14.614644', 'step': 14782, 'epoch': 2}
{'type': 'loss', 'content': 0.043942201882600784, 'timestamp': '2025-10-02 00:37:14.617454', 'step': 14783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:37:14.687158', 'step': 14783, 'epoch': 2}
{'type': 'loss', 'content': 0.00954708643257618, 'timestamp': '2025-10-02 00:37:14.699961', 'step': 14784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:14.757336', 'step': 14784, 'epoch': 2}
{'type': 'loss', 'content': 0.053311485797166824, 'timestamp': '2025-10-02 00:37:14.762386', 'step': 14785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:14.822203', 'step': 14785, 'epoch': 2}
{'type': 'loss', 'content': 0.17950686812400818, 'timestamp': '2025-10-02 00:37:14.824593', 'step': 14786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:14.882514', 'step': 14786, 'epoch': 2}
{'type': 'loss', 'content': 0.1326529085636139, 'timestamp': '2025-10-02 00:37:14.884933', 'step': 14787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:14.939372', 'step': 14787, 'epoch': 2}
{'type': 'loss', 'content': 0.07816947251558304, 'timestamp': '2025-10-02 00:37:14.946828', 'step': 14788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:15.000977', 'step': 14788, 'epoch': 2}
{'type': 'loss', 'content': 0.004566194023936987, 'timestamp': '2025-10-02 00:37:15.003232', 'step': 14789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:15.056804', 'step': 14789, 'epoch': 2}
{'type': 'loss', 'content': 0.18414950370788574, 'timestamp': '2025-10-02 00:37:15.059372', 'step': 14790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:15.113416', 'step': 14790, 'epoch': 2}
{'type': 'loss', 'content': 0.07796167582273483, 'timestamp': '2025-10-02 00:37:15.116008', 'step': 14791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:15.175264', 'step': 14791, 'epoch': 2}
{'type': 'loss', 'content': 0.04244530573487282, 'timestamp': '2025-10-02 00:37:15.186573', 'step': 14792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:15.282916', 'step': 14792, 'epoch': 2}
{'type': 'loss', 'content': 0.03893904387950897, 'timestamp': '2025-10-02 00:37:15.290480', 'step': 14793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:15.387672', 'step': 14793, 'epoch': 2}
{'type': 'loss', 'content': 0.04083051159977913, 'timestamp': '2025-10-02 00:37:15.405176', 'step': 14794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:37:15.505756', 'step': 14794, 'epoch': 2}
{'type': 'loss', 'content': 0.0017351818969473243, 'timestamp': '2025-10-02 00:37:15.517738', 'step': 14795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:37:15.612099', 'step': 14795, 'epoch': 2}
{'type': 'loss', 'content': 0.2213437408208847, 'timestamp': '2025-10-02 00:37:15.619748', 'step': 14796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:15.677866', 'step': 14796, 'epoch': 2}
{'type': 'loss', 'content': 0.051267359405756, 'timestamp': '2025-10-02 00:37:15.693214', 'step': 14797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:15.781862', 'step': 14797, 'epoch': 2}
{'type': 'loss', 'content': 0.06177273020148277, 'timestamp': '2025-10-02 00:37:15.789276', 'step': 14798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:15.849403', 'step': 14798, 'epoch': 2}
{'type': 'loss', 'content': 0.034802090376615524, 'timestamp': '2025-10-02 00:37:15.859001', 'step': 14799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:15.937252', 'step': 14799, 'epoch': 2}
{'type': 'loss', 'content': 0.10760176181793213, 'timestamp': '2025-10-02 00:37:15.949954', 'step': 14800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:16.023263', 'step': 14800, 'epoch': 2}
{'type': 'loss', 'content': 0.05482760816812515, 'timestamp': '2025-10-02 00:37:16.027588', 'step': 14801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:16.103606', 'step': 14801, 'epoch': 2}
{'type': 'loss', 'content': 0.13467679917812347, 'timestamp': '2025-10-02 00:37:16.107738', 'step': 14802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:16.173835', 'step': 14802, 'epoch': 2}
{'type': 'loss', 'content': 0.0763673484325409, 'timestamp': '2025-10-02 00:37:16.183195', 'step': 14803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:16.277317', 'step': 14803, 'epoch': 2}
{'type': 'loss', 'content': 0.0393083281815052, 'timestamp': '2025-10-02 00:37:16.285751', 'step': 14804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:16.349002', 'step': 14804, 'epoch': 2}
{'type': 'loss', 'content': 0.1216123178601265, 'timestamp': '2025-10-02 00:37:16.358550', 'step': 14805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:16.427041', 'step': 14805, 'epoch': 2}
{'type': 'loss', 'content': 0.027527010068297386, 'timestamp': '2025-10-02 00:37:16.433453', 'step': 14806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:16.499348', 'step': 14806, 'epoch': 2}
{'type': 'loss', 'content': 0.059916965663433075, 'timestamp': '2025-10-02 00:37:16.502822', 'step': 14807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:16.560725', 'step': 14807, 'epoch': 2}
{'type': 'loss', 'content': 0.06812361627817154, 'timestamp': '2025-10-02 00:37:16.567388', 'step': 14808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:16.630943', 'step': 14808, 'epoch': 2}
{'type': 'loss', 'content': 0.037588782608509064, 'timestamp': '2025-10-02 00:37:16.638441', 'step': 14809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:16.695711', 'step': 14809, 'epoch': 2}
{'type': 'loss', 'content': 0.034521255642175674, 'timestamp': '2025-10-02 00:37:16.703380', 'step': 14810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:16.771290', 'step': 14810, 'epoch': 2}
{'type': 'loss', 'content': 0.1930398792028427, 'timestamp': '2025-10-02 00:37:16.780301', 'step': 14811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:16.852613', 'step': 14811, 'epoch': 2}
{'type': 'loss', 'content': 0.06554755568504333, 'timestamp': '2025-10-02 00:37:16.863338', 'step': 14812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:16.935388', 'step': 14812, 'epoch': 2}
{'type': 'loss', 'content': 0.022365601733326912, 'timestamp': '2025-10-02 00:37:16.943470', 'step': 14813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:17.005650', 'step': 14813, 'epoch': 2}
{'type': 'loss', 'content': 0.0890604555606842, 'timestamp': '2025-10-02 00:37:17.008610', 'step': 14814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:37:17.078296', 'step': 14814, 'epoch': 2}
{'type': 'loss', 'content': 0.028855938464403152, 'timestamp': '2025-10-02 00:37:17.089199', 'step': 14815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:17.157654', 'step': 14815, 'epoch': 2}
{'type': 'loss', 'content': 0.05178973078727722, 'timestamp': '2025-10-02 00:37:17.170942', 'step': 14816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:17.227297', 'step': 14816, 'epoch': 2}
{'type': 'loss', 'content': 0.03480172157287598, 'timestamp': '2025-10-02 00:37:17.230259', 'step': 14817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:17.309881', 'step': 14817, 'epoch': 2}
{'type': 'loss', 'content': 0.0457182414829731, 'timestamp': '2025-10-02 00:37:17.312658', 'step': 14818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:17.394729', 'step': 14818, 'epoch': 2}
{'type': 'loss', 'content': 0.0069936406798660755, 'timestamp': '2025-10-02 00:37:17.407853', 'step': 14819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:17.485090', 'step': 14819, 'epoch': 2}
{'type': 'loss', 'content': 0.02111230045557022, 'timestamp': '2025-10-02 00:37:17.511163', 'step': 14820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:17.580410', 'step': 14820, 'epoch': 2}
{'type': 'loss', 'content': 0.022102676331996918, 'timestamp': '2025-10-02 00:37:17.587562', 'step': 14821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:17.653838', 'step': 14821, 'epoch': 2}
{'type': 'loss', 'content': 0.03997322544455528, 'timestamp': '2025-10-02 00:37:17.664971', 'step': 14822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:17.739863', 'step': 14822, 'epoch': 2}
{'type': 'loss', 'content': 0.04411129653453827, 'timestamp': '2025-10-02 00:37:17.743521', 'step': 14823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:17.813282', 'step': 14823, 'epoch': 2}
{'type': 'loss', 'content': 0.012396945618093014, 'timestamp': '2025-10-02 00:37:17.823855', 'step': 14824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:17.884431', 'step': 14824, 'epoch': 2}
{'type': 'loss', 'content': 0.04516484588384628, 'timestamp': '2025-10-02 00:37:17.887441', 'step': 14825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:17.944509', 'step': 14825, 'epoch': 2}
{'type': 'loss', 'content': 0.1451655775308609, 'timestamp': '2025-10-02 00:37:17.947435', 'step': 14826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:18.007958', 'step': 14826, 'epoch': 2}
{'type': 'loss', 'content': 0.09695152193307877, 'timestamp': '2025-10-02 00:37:18.010779', 'step': 14827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:18.067865', 'step': 14827, 'epoch': 2}
{'type': 'loss', 'content': 0.10211107134819031, 'timestamp': '2025-10-02 00:37:18.074826', 'step': 14828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:18.137711', 'step': 14828, 'epoch': 2}
{'type': 'loss', 'content': 0.03927735239267349, 'timestamp': '2025-10-02 00:37:18.147338', 'step': 14829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:18.211381', 'step': 14829, 'epoch': 2}
{'type': 'loss', 'content': 0.04693910852074623, 'timestamp': '2025-10-02 00:37:18.214642', 'step': 14830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:18.274140', 'step': 14830, 'epoch': 2}
{'type': 'loss', 'content': 0.05522967875003815, 'timestamp': '2025-10-02 00:37:18.281580', 'step': 14831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:18.344335', 'step': 14831, 'epoch': 2}
{'type': 'loss', 'content': 0.007418373599648476, 'timestamp': '2025-10-02 00:37:18.354480', 'step': 14832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:18.410827', 'step': 14832, 'epoch': 2}
{'type': 'loss', 'content': 0.1572660207748413, 'timestamp': '2025-10-02 00:37:18.413817', 'step': 14833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:18.489137', 'step': 14833, 'epoch': 2}
{'type': 'loss', 'content': 0.034976404160261154, 'timestamp': '2025-10-02 00:37:18.499288', 'step': 14834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:18.582031', 'step': 14834, 'epoch': 2}
{'type': 'loss', 'content': 0.03575558587908745, 'timestamp': '2025-10-02 00:37:18.592704', 'step': 14835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:18.659355', 'step': 14835, 'epoch': 2}
{'type': 'loss', 'content': 0.0711275115609169, 'timestamp': '2025-10-02 00:37:18.671692', 'step': 14836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:18.732232', 'step': 14836, 'epoch': 2}
{'type': 'loss', 'content': 0.11229714006185532, 'timestamp': '2025-10-02 00:37:18.735179', 'step': 14837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:18.790964', 'step': 14837, 'epoch': 2}
{'type': 'loss', 'content': 0.08840501308441162, 'timestamp': '2025-10-02 00:37:18.800377', 'step': 14838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:18.869617', 'step': 14838, 'epoch': 2}
{'type': 'loss', 'content': 0.12402807921171188, 'timestamp': '2025-10-02 00:37:18.873361', 'step': 14839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:18.935356', 'step': 14839, 'epoch': 2}
{'type': 'loss', 'content': 0.08631711453199387, 'timestamp': '2025-10-02 00:37:18.941775', 'step': 14840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:19.007724', 'step': 14840, 'epoch': 2}
{'type': 'loss', 'content': 0.015337354503571987, 'timestamp': '2025-10-02 00:37:19.019305', 'step': 14841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:19.092016', 'step': 14841, 'epoch': 2}
{'type': 'loss', 'content': 0.053851980715990067, 'timestamp': '2025-10-02 00:37:19.101572', 'step': 14842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:19.170390', 'step': 14842, 'epoch': 2}
{'type': 'loss', 'content': 0.11786860972642899, 'timestamp': '2025-10-02 00:37:19.178635', 'step': 14843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:19.246730', 'step': 14843, 'epoch': 2}
{'type': 'loss', 'content': 0.09539255499839783, 'timestamp': '2025-10-02 00:37:19.259134', 'step': 14844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:19.330076', 'step': 14844, 'epoch': 2}
{'type': 'loss', 'content': 0.03242819011211395, 'timestamp': '2025-10-02 00:37:19.337713', 'step': 14845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:19.406173', 'step': 14845, 'epoch': 2}
{'type': 'loss', 'content': 0.17545965313911438, 'timestamp': '2025-10-02 00:37:19.409412', 'step': 14846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:19.473611', 'step': 14846, 'epoch': 2}
{'type': 'loss', 'content': 0.03205382078886032, 'timestamp': '2025-10-02 00:37:19.483797', 'step': 14847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:19.557719', 'step': 14847, 'epoch': 2}
{'type': 'loss', 'content': 0.14381761848926544, 'timestamp': '2025-10-02 00:37:19.567513', 'step': 14848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:19.630832', 'step': 14848, 'epoch': 2}
{'type': 'loss', 'content': 0.074264295399189, 'timestamp': '2025-10-02 00:37:19.636686', 'step': 14849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:19.707442', 'step': 14849, 'epoch': 2}
{'type': 'loss', 'content': 0.09148198366165161, 'timestamp': '2025-10-02 00:37:19.712625', 'step': 14850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:19.771886', 'step': 14850, 'epoch': 2}
{'type': 'loss', 'content': 0.18954990804195404, 'timestamp': '2025-10-02 00:37:19.775605', 'step': 14851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:19.836502', 'step': 14851, 'epoch': 2}
{'type': 'loss', 'content': 0.1842649281024933, 'timestamp': '2025-10-02 00:37:19.843354', 'step': 14852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:19.902612', 'step': 14852, 'epoch': 2}
{'type': 'loss', 'content': 0.08669040352106094, 'timestamp': '2025-10-02 00:37:19.913233', 'step': 14853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:19.983883', 'step': 14853, 'epoch': 2}
{'type': 'loss', 'content': 0.03584941476583481, 'timestamp': '2025-10-02 00:37:19.991387', 'step': 14854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:20.056377', 'step': 14854, 'epoch': 2}
{'type': 'loss', 'content': 0.08465322852134705, 'timestamp': '2025-10-02 00:37:20.058980', 'step': 14855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:20.119001', 'step': 14855, 'epoch': 2}
{'type': 'loss', 'content': 0.049039725214242935, 'timestamp': '2025-10-02 00:37:20.131012', 'step': 14856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:20.204401', 'step': 14856, 'epoch': 2}
{'type': 'loss', 'content': 0.10732927173376083, 'timestamp': '2025-10-02 00:37:20.212239', 'step': 14857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:20.286957', 'step': 14857, 'epoch': 2}
{'type': 'loss', 'content': 0.12209568917751312, 'timestamp': '2025-10-02 00:37:20.291226', 'step': 14858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:20.362095', 'step': 14858, 'epoch': 2}
{'type': 'loss', 'content': 0.024937747046351433, 'timestamp': '2025-10-02 00:37:20.371619', 'step': 14859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:20.435353', 'step': 14859, 'epoch': 2}
{'type': 'loss', 'content': 0.05701698735356331, 'timestamp': '2025-10-02 00:37:20.448557', 'step': 14860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:20.511137', 'step': 14860, 'epoch': 2}
{'type': 'loss', 'content': 0.04975200816988945, 'timestamp': '2025-10-02 00:37:20.517013', 'step': 14861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:20.588003', 'step': 14861, 'epoch': 2}
{'type': 'loss', 'content': 0.02524629607796669, 'timestamp': '2025-10-02 00:37:20.594476', 'step': 14862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:20.662175', 'step': 14862, 'epoch': 2}
{'type': 'loss', 'content': 0.0740445926785469, 'timestamp': '2025-10-02 00:37:20.670153', 'step': 14863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:20.742334', 'step': 14863, 'epoch': 2}
{'type': 'loss', 'content': 0.04188251122832298, 'timestamp': '2025-10-02 00:37:20.752593', 'step': 14864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:20.830353', 'step': 14864, 'epoch': 2}
{'type': 'loss', 'content': 0.03851361572742462, 'timestamp': '2025-10-02 00:37:20.841373', 'step': 14865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:20.913268', 'step': 14865, 'epoch': 2}
{'type': 'loss', 'content': 0.05584900081157684, 'timestamp': '2025-10-02 00:37:20.916699', 'step': 14866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:20.982738', 'step': 14866, 'epoch': 2}
{'type': 'loss', 'content': 0.020937517285346985, 'timestamp': '2025-10-02 00:37:20.992013', 'step': 14867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:21.060042', 'step': 14867, 'epoch': 2}
{'type': 'loss', 'content': 0.04339553415775299, 'timestamp': '2025-10-02 00:37:21.071492', 'step': 14868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:21.143339', 'step': 14868, 'epoch': 2}
{'type': 'loss', 'content': 0.018287016078829765, 'timestamp': '2025-10-02 00:37:21.149778', 'step': 14869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:21.222139', 'step': 14869, 'epoch': 2}
{'type': 'loss', 'content': 0.024943098425865173, 'timestamp': '2025-10-02 00:37:21.231681', 'step': 14870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:21.304739', 'step': 14870, 'epoch': 2}
{'type': 'loss', 'content': 0.05445249378681183, 'timestamp': '2025-10-02 00:37:21.314530', 'step': 14871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:21.384072', 'step': 14871, 'epoch': 2}
{'type': 'loss', 'content': 0.02207566797733307, 'timestamp': '2025-10-02 00:37:21.395177', 'step': 14872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:21.461769', 'step': 14872, 'epoch': 2}
{'type': 'loss', 'content': 0.0993732139468193, 'timestamp': '2025-10-02 00:37:21.471915', 'step': 14873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:21.546685', 'step': 14873, 'epoch': 2}
{'type': 'loss', 'content': 0.21517953276634216, 'timestamp': '2025-10-02 00:37:21.555291', 'step': 14874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:21.623628', 'step': 14874, 'epoch': 2}
{'type': 'loss', 'content': 0.11127832531929016, 'timestamp': '2025-10-02 00:37:21.632864', 'step': 14875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:21.698448', 'step': 14875, 'epoch': 2}
{'type': 'loss', 'content': 0.06960626691579819, 'timestamp': '2025-10-02 00:37:21.708223', 'step': 14876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:21.780099', 'step': 14876, 'epoch': 2}
{'type': 'loss', 'content': 0.03517076000571251, 'timestamp': '2025-10-02 00:37:21.786468', 'step': 14877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:21.858513', 'step': 14877, 'epoch': 2}
{'type': 'loss', 'content': 0.020965637639164925, 'timestamp': '2025-10-02 00:37:21.864898', 'step': 14878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:21.922048', 'step': 14878, 'epoch': 2}
{'type': 'loss', 'content': 0.042371928691864014, 'timestamp': '2025-10-02 00:37:21.931399', 'step': 14879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:22.001135', 'step': 14879, 'epoch': 2}
{'type': 'loss', 'content': 0.13238802552223206, 'timestamp': '2025-10-02 00:37:22.012211', 'step': 14880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:22.068855', 'step': 14880, 'epoch': 2}
{'type': 'loss', 'content': 0.08194738626480103, 'timestamp': '2025-10-02 00:37:22.072457', 'step': 14881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:22.134654', 'step': 14881, 'epoch': 2}
{'type': 'loss', 'content': 0.045736271888017654, 'timestamp': '2025-10-02 00:37:22.137599', 'step': 14882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:22.195320', 'step': 14882, 'epoch': 2}
{'type': 'loss', 'content': 0.10552988946437836, 'timestamp': '2025-10-02 00:37:22.198115', 'step': 14883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:22.268891', 'step': 14883, 'epoch': 2}
{'type': 'loss', 'content': 0.0207325741648674, 'timestamp': '2025-10-02 00:37:22.275369', 'step': 14884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:22.339679', 'step': 14884, 'epoch': 2}
{'type': 'loss', 'content': 0.04859282821416855, 'timestamp': '2025-10-02 00:37:22.348431', 'step': 14885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:22.419009', 'step': 14885, 'epoch': 2}
{'type': 'loss', 'content': 0.09702818095684052, 'timestamp': '2025-10-02 00:37:22.422337', 'step': 14886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:22.493376', 'step': 14886, 'epoch': 2}
{'type': 'loss', 'content': 0.08248380571603775, 'timestamp': '2025-10-02 00:37:22.501766', 'step': 14887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:22.575435', 'step': 14887, 'epoch': 2}
{'type': 'loss', 'content': 0.07644354552030563, 'timestamp': '2025-10-02 00:37:22.583055', 'step': 14888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:22.646713', 'step': 14888, 'epoch': 2}
{'type': 'loss', 'content': 0.1604885309934616, 'timestamp': '2025-10-02 00:37:22.661111', 'step': 14889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:22.735553', 'step': 14889, 'epoch': 2}
{'type': 'loss', 'content': 0.10781553387641907, 'timestamp': '2025-10-02 00:37:22.738549', 'step': 14890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:22.809177', 'step': 14890, 'epoch': 2}
{'type': 'loss', 'content': 0.0726475939154625, 'timestamp': '2025-10-02 00:37:22.822801', 'step': 14891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:22.885546', 'step': 14891, 'epoch': 2}
{'type': 'loss', 'content': 0.05277913063764572, 'timestamp': '2025-10-02 00:37:22.900002', 'step': 14892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:22.971757', 'step': 14892, 'epoch': 2}
{'type': 'loss', 'content': 0.03615688905119896, 'timestamp': '2025-10-02 00:37:22.983065', 'step': 14893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:23.047623', 'step': 14893, 'epoch': 2}
{'type': 'loss', 'content': 0.05098699405789375, 'timestamp': '2025-10-02 00:37:23.051394', 'step': 14894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:23.115914', 'step': 14894, 'epoch': 2}
{'type': 'loss', 'content': 0.024294555187225342, 'timestamp': '2025-10-02 00:37:23.121773', 'step': 14895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:23.178007', 'step': 14895, 'epoch': 2}
{'type': 'loss', 'content': 0.0676613375544548, 'timestamp': '2025-10-02 00:37:23.188370', 'step': 14896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:23.255713', 'step': 14896, 'epoch': 2}
{'type': 'loss', 'content': 0.10461006313562393, 'timestamp': '2025-10-02 00:37:23.259268', 'step': 14897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:23.321853', 'step': 14897, 'epoch': 2}
{'type': 'loss', 'content': 0.02040114998817444, 'timestamp': '2025-10-02 00:37:23.331252', 'step': 14898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:23.402092', 'step': 14898, 'epoch': 2}
{'type': 'loss', 'content': 0.11138522624969482, 'timestamp': '2025-10-02 00:37:23.406222', 'step': 14899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:23.470702', 'step': 14899, 'epoch': 2}
{'type': 'loss', 'content': 0.07312773168087006, 'timestamp': '2025-10-02 00:37:23.478698', 'step': 14900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:23.545599', 'step': 14900, 'epoch': 2}
{'type': 'loss', 'content': 0.016656547784805298, 'timestamp': '2025-10-02 00:37:23.552865', 'step': 14901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:23.616808', 'step': 14901, 'epoch': 2}
{'type': 'loss', 'content': 0.08879673480987549, 'timestamp': '2025-10-02 00:37:23.621076', 'step': 14902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:23.691588', 'step': 14902, 'epoch': 2}
{'type': 'loss', 'content': 0.09870664030313492, 'timestamp': '2025-10-02 00:37:23.694782', 'step': 14903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:23.762940', 'step': 14903, 'epoch': 2}
{'type': 'loss', 'content': 0.07839429378509521, 'timestamp': '2025-10-02 00:37:23.770355', 'step': 14904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:23.832751', 'step': 14904, 'epoch': 2}
{'type': 'loss', 'content': 0.007273525930941105, 'timestamp': '2025-10-02 00:37:23.842195', 'step': 14905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:23.909826', 'step': 14905, 'epoch': 2}
{'type': 'loss', 'content': 0.022730523720383644, 'timestamp': '2025-10-02 00:37:23.919152', 'step': 14906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:23.981221', 'step': 14906, 'epoch': 2}
{'type': 'loss', 'content': 0.1289251744747162, 'timestamp': '2025-10-02 00:37:23.988788', 'step': 14907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:24.053077', 'step': 14907, 'epoch': 2}
{'type': 'loss', 'content': 0.09328676760196686, 'timestamp': '2025-10-02 00:37:24.065677', 'step': 14908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:24.137389', 'step': 14908, 'epoch': 2}
{'type': 'loss', 'content': 0.055652402341365814, 'timestamp': '2025-10-02 00:37:24.148404', 'step': 14909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:24.228639', 'step': 14909, 'epoch': 2}
{'type': 'loss', 'content': 0.025363121181726456, 'timestamp': '2025-10-02 00:37:24.234406', 'step': 14910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:24.322881', 'step': 14910, 'epoch': 2}
{'type': 'loss', 'content': 0.03125759959220886, 'timestamp': '2025-10-02 00:37:24.333483', 'step': 14911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:24.403641', 'step': 14911, 'epoch': 2}
{'type': 'loss', 'content': 0.1630108505487442, 'timestamp': '2025-10-02 00:37:24.416556', 'step': 14912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:24.484067', 'step': 14912, 'epoch': 2}
{'type': 'loss', 'content': 0.0756986141204834, 'timestamp': '2025-10-02 00:37:24.495231', 'step': 14913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:24.557496', 'step': 14913, 'epoch': 2}
{'type': 'loss', 'content': 0.004577793180942535, 'timestamp': '2025-10-02 00:37:24.561034', 'step': 14914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:24.630347', 'step': 14914, 'epoch': 2}
{'type': 'loss', 'content': 0.17396222054958344, 'timestamp': '2025-10-02 00:37:24.641349', 'step': 14915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:24.714624', 'step': 14915, 'epoch': 2}
{'type': 'loss', 'content': 0.005354498513042927, 'timestamp': '2025-10-02 00:37:24.733068', 'step': 14916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:24.805823', 'step': 14916, 'epoch': 2}
{'type': 'loss', 'content': 0.052927032113075256, 'timestamp': '2025-10-02 00:37:24.817686', 'step': 14917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:37:24.879316', 'step': 14917, 'epoch': 2}
{'type': 'loss', 'content': 0.1845335215330124, 'timestamp': '2025-10-02 00:37:24.881857', 'step': 14918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:24.942192', 'step': 14918, 'epoch': 2}
{'type': 'loss', 'content': 0.05861302837729454, 'timestamp': '2025-10-02 00:37:24.944920', 'step': 14919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:25.002050', 'step': 14919, 'epoch': 2}
{'type': 'loss', 'content': 0.02372865192592144, 'timestamp': '2025-10-02 00:37:25.008492', 'step': 14920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:25.063300', 'step': 14920, 'epoch': 2}
{'type': 'loss', 'content': 0.026233188807964325, 'timestamp': '2025-10-02 00:37:25.066504', 'step': 14921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:25.127512', 'step': 14921, 'epoch': 2}
{'type': 'loss', 'content': 0.05446505919098854, 'timestamp': '2025-10-02 00:37:25.131143', 'step': 14922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:25.193105', 'step': 14922, 'epoch': 2}
{'type': 'loss', 'content': 0.047368332743644714, 'timestamp': '2025-10-02 00:37:25.196778', 'step': 14923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:25.253618', 'step': 14923, 'epoch': 2}
{'type': 'loss', 'content': 0.08490870147943497, 'timestamp': '2025-10-02 00:37:25.261677', 'step': 14924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:25.322885', 'step': 14924, 'epoch': 2}
{'type': 'loss', 'content': 0.07077259570360184, 'timestamp': '2025-10-02 00:37:25.328824', 'step': 14925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:25.386643', 'step': 14925, 'epoch': 2}
{'type': 'loss', 'content': 0.10302227735519409, 'timestamp': '2025-10-02 00:37:25.394155', 'step': 14926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:25.458736', 'step': 14926, 'epoch': 2}
{'type': 'loss', 'content': 0.07528428733348846, 'timestamp': '2025-10-02 00:37:25.469809', 'step': 14927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:25.535197', 'step': 14927, 'epoch': 2}
{'type': 'loss', 'content': 0.07141739130020142, 'timestamp': '2025-10-02 00:37:25.542858', 'step': 14928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:25.608947', 'step': 14928, 'epoch': 2}
{'type': 'loss', 'content': 0.0869767963886261, 'timestamp': '2025-10-02 00:37:25.611996', 'step': 14929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:25.669424', 'step': 14929, 'epoch': 2}
{'type': 'loss', 'content': 0.11316299438476562, 'timestamp': '2025-10-02 00:37:25.673234', 'step': 14930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:37:25.750056', 'step': 14930, 'epoch': 2}
{'type': 'loss', 'content': 0.01671629399061203, 'timestamp': '2025-10-02 00:37:25.760921', 'step': 14931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:25.818493', 'step': 14931, 'epoch': 2}
{'type': 'loss', 'content': 0.07981854677200317, 'timestamp': '2025-10-02 00:37:25.826746', 'step': 14932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:25.892074', 'step': 14932, 'epoch': 2}
{'type': 'loss', 'content': 0.014434135518968105, 'timestamp': '2025-10-02 00:37:25.901496', 'step': 14933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:25.966318', 'step': 14933, 'epoch': 2}
{'type': 'loss', 'content': 0.046215660870075226, 'timestamp': '2025-10-02 00:37:25.976541', 'step': 14934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:26.038028', 'step': 14934, 'epoch': 2}
{'type': 'loss', 'content': 0.12685568630695343, 'timestamp': '2025-10-02 00:37:26.041606', 'step': 14935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:26.104734', 'step': 14935, 'epoch': 2}
{'type': 'loss', 'content': 0.06961102038621902, 'timestamp': '2025-10-02 00:37:26.122828', 'step': 14936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:26.183477', 'step': 14936, 'epoch': 2}
{'type': 'loss', 'content': 0.021760260686278343, 'timestamp': '2025-10-02 00:37:26.195630', 'step': 14937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:26.261275', 'step': 14937, 'epoch': 2}
{'type': 'loss', 'content': 0.08698323369026184, 'timestamp': '2025-10-02 00:37:26.264074', 'step': 14938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:26.320377', 'step': 14938, 'epoch': 2}
{'type': 'loss', 'content': 0.0695762112736702, 'timestamp': '2025-10-02 00:37:26.323323', 'step': 14939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:26.381809', 'step': 14939, 'epoch': 2}
{'type': 'loss', 'content': 0.055785711854696274, 'timestamp': '2025-10-02 00:37:26.387804', 'step': 14940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:26.445483', 'step': 14940, 'epoch': 2}
{'type': 'loss', 'content': 0.03234773501753807, 'timestamp': '2025-10-02 00:37:26.451425', 'step': 14941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:26.507750', 'step': 14941, 'epoch': 2}
{'type': 'loss', 'content': 0.10901223123073578, 'timestamp': '2025-10-02 00:37:26.510535', 'step': 14942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:26.569536', 'step': 14942, 'epoch': 2}
{'type': 'loss', 'content': 0.02409147098660469, 'timestamp': '2025-10-02 00:37:26.575226', 'step': 14943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:26.643453', 'step': 14943, 'epoch': 2}
{'type': 'loss', 'content': 0.03065556474030018, 'timestamp': '2025-10-02 00:37:26.654432', 'step': 14944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:26.718836', 'step': 14944, 'epoch': 2}
{'type': 'loss', 'content': 0.011886038817465305, 'timestamp': '2025-10-02 00:37:26.726408', 'step': 14945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:26.792429', 'step': 14945, 'epoch': 2}
{'type': 'loss', 'content': 0.031237566843628883, 'timestamp': '2025-10-02 00:37:26.802016', 'step': 14946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:26.866304', 'step': 14946, 'epoch': 2}
{'type': 'loss', 'content': 0.04089970886707306, 'timestamp': '2025-10-02 00:37:26.875831', 'step': 14947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:26.943003', 'step': 14947, 'epoch': 2}
{'type': 'loss', 'content': 0.029203984886407852, 'timestamp': '2025-10-02 00:37:26.950963', 'step': 14948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:27.012487', 'step': 14948, 'epoch': 2}
{'type': 'loss', 'content': 0.04356985166668892, 'timestamp': '2025-10-02 00:37:27.021453', 'step': 14949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:27.094429', 'step': 14949, 'epoch': 2}
{'type': 'loss', 'content': 0.050020888447761536, 'timestamp': '2025-10-02 00:37:27.105165', 'step': 14950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:27.181360', 'step': 14950, 'epoch': 2}
{'type': 'loss', 'content': 0.013758573681116104, 'timestamp': '2025-10-02 00:37:27.191877', 'step': 14951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:27.252615', 'step': 14951, 'epoch': 2}
{'type': 'loss', 'content': 0.021959705278277397, 'timestamp': '2025-10-02 00:37:27.262887', 'step': 14952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:27.332310', 'step': 14952, 'epoch': 2}
{'type': 'loss', 'content': 0.016616685315966606, 'timestamp': '2025-10-02 00:37:27.341944', 'step': 14953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:27.414816', 'step': 14953, 'epoch': 2}
{'type': 'loss', 'content': 0.012944185175001621, 'timestamp': '2025-10-02 00:37:27.418113', 'step': 14954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:27.478549', 'step': 14954, 'epoch': 2}
{'type': 'loss', 'content': 0.06204012408852577, 'timestamp': '2025-10-02 00:37:27.485998', 'step': 14955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:27.542618', 'step': 14955, 'epoch': 2}
{'type': 'loss', 'content': 0.09788043051958084, 'timestamp': '2025-10-02 00:37:27.549528', 'step': 14956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:27.614471', 'step': 14956, 'epoch': 2}
{'type': 'loss', 'content': 0.08584864437580109, 'timestamp': '2025-10-02 00:37:27.623340', 'step': 14957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:27.687633', 'step': 14957, 'epoch': 2}
{'type': 'loss', 'content': 0.1531735062599182, 'timestamp': '2025-10-02 00:37:27.695700', 'step': 14958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:27.763536', 'step': 14958, 'epoch': 2}
{'type': 'loss', 'content': 0.05204012989997864, 'timestamp': '2025-10-02 00:37:27.772704', 'step': 14959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:27.851374', 'step': 14959, 'epoch': 2}
{'type': 'loss', 'content': 0.013316627591848373, 'timestamp': '2025-10-02 00:37:27.862818', 'step': 14960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:27.927425', 'step': 14960, 'epoch': 2}
{'type': 'loss', 'content': 0.014931350946426392, 'timestamp': '2025-10-02 00:37:27.934611', 'step': 14961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:27.995669', 'step': 14961, 'epoch': 2}
{'type': 'loss', 'content': 0.23276086151599884, 'timestamp': '2025-10-02 00:37:28.003557', 'step': 14962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:37:28.091187', 'step': 14962, 'epoch': 2}
{'type': 'loss', 'content': 0.017158007249236107, 'timestamp': '2025-10-02 00:37:28.104391', 'step': 14963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:28.163772', 'step': 14963, 'epoch': 2}
{'type': 'loss', 'content': 0.02196967788040638, 'timestamp': '2025-10-02 00:37:28.175639', 'step': 14964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:28.241886', 'step': 14964, 'epoch': 2}
{'type': 'loss', 'content': 0.0672815665602684, 'timestamp': '2025-10-02 00:37:28.248359', 'step': 14965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:28.310842', 'step': 14965, 'epoch': 2}
{'type': 'loss', 'content': 0.17047558724880219, 'timestamp': '2025-10-02 00:37:28.314534', 'step': 14966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:28.370617', 'step': 14966, 'epoch': 2}
{'type': 'loss', 'content': 0.12021560221910477, 'timestamp': '2025-10-02 00:37:28.374333', 'step': 14967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:28.439489', 'step': 14967, 'epoch': 2}
{'type': 'loss', 'content': 0.10045365989208221, 'timestamp': '2025-10-02 00:37:28.446182', 'step': 14968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:28.500412', 'step': 14968, 'epoch': 2}
{'type': 'loss', 'content': 0.12410849332809448, 'timestamp': '2025-10-02 00:37:28.504767', 'step': 14969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:28.560487', 'step': 14969, 'epoch': 2}
{'type': 'loss', 'content': 0.08679934591054916, 'timestamp': '2025-10-02 00:37:28.563264', 'step': 14970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:28.629228', 'step': 14970, 'epoch': 2}
{'type': 'loss', 'content': 0.02570783719420433, 'timestamp': '2025-10-02 00:37:28.639446', 'step': 14971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:28.700770', 'step': 14971, 'epoch': 2}
{'type': 'loss', 'content': 0.15035675466060638, 'timestamp': '2025-10-02 00:37:28.708000', 'step': 14972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:28.764799', 'step': 14972, 'epoch': 2}
{'type': 'loss', 'content': 0.05227966979146004, 'timestamp': '2025-10-02 00:37:28.767581', 'step': 14973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:28.841830', 'step': 14973, 'epoch': 2}
{'type': 'loss', 'content': 0.04221806675195694, 'timestamp': '2025-10-02 00:37:28.852035', 'step': 14974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:28.922212', 'step': 14974, 'epoch': 2}
{'type': 'loss', 'content': 0.1173633262515068, 'timestamp': '2025-10-02 00:37:28.926457', 'step': 14975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:28.990415', 'step': 14975, 'epoch': 2}
{'type': 'loss', 'content': 0.018546905368566513, 'timestamp': '2025-10-02 00:37:29.000526', 'step': 14976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:29.056854', 'step': 14976, 'epoch': 2}
{'type': 'loss', 'content': 0.10068705677986145, 'timestamp': '2025-10-02 00:37:29.060001', 'step': 14977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:29.117617', 'step': 14977, 'epoch': 2}
{'type': 'loss', 'content': 0.062892846763134, 'timestamp': '2025-10-02 00:37:29.119995', 'step': 14978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:29.182166', 'step': 14978, 'epoch': 2}
{'type': 'loss', 'content': 0.049366217106580734, 'timestamp': '2025-10-02 00:37:29.192624', 'step': 14979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:29.249050', 'step': 14979, 'epoch': 2}
{'type': 'loss', 'content': 0.06106357276439667, 'timestamp': '2025-10-02 00:37:29.255574', 'step': 14980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:29.312427', 'step': 14980, 'epoch': 2}
{'type': 'loss', 'content': 0.09537004679441452, 'timestamp': '2025-10-02 00:37:29.316395', 'step': 14981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:29.373270', 'step': 14981, 'epoch': 2}
{'type': 'loss', 'content': 0.21634629368782043, 'timestamp': '2025-10-02 00:37:29.378886', 'step': 14982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:29.435626', 'step': 14982, 'epoch': 2}
{'type': 'loss', 'content': 0.22856852412223816, 'timestamp': '2025-10-02 00:37:29.438779', 'step': 14983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:29.502194', 'step': 14983, 'epoch': 2}
{'type': 'loss', 'content': 0.04970218241214752, 'timestamp': '2025-10-02 00:37:29.513467', 'step': 14984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:29.568957', 'step': 14984, 'epoch': 2}
{'type': 'loss', 'content': 0.07353581488132477, 'timestamp': '2025-10-02 00:37:29.571690', 'step': 14985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:29.628666', 'step': 14985, 'epoch': 2}
{'type': 'loss', 'content': 0.08976895362138748, 'timestamp': '2025-10-02 00:37:29.638218', 'step': 14986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:29.697721', 'step': 14986, 'epoch': 2}
{'type': 'loss', 'content': 0.06865157186985016, 'timestamp': '2025-10-02 00:37:29.707232', 'step': 14987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:29.765986', 'step': 14987, 'epoch': 2}
{'type': 'loss', 'content': 0.02881491743028164, 'timestamp': '2025-10-02 00:37:29.772638', 'step': 14988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:29.833150', 'step': 14988, 'epoch': 2}
{'type': 'loss', 'content': 0.046772319823503494, 'timestamp': '2025-10-02 00:37:29.844457', 'step': 14989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:29.910543', 'step': 14989, 'epoch': 2}
{'type': 'loss', 'content': 0.04248042404651642, 'timestamp': '2025-10-02 00:37:29.920684', 'step': 14990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:29.980105', 'step': 14990, 'epoch': 2}
{'type': 'loss', 'content': 0.11853143572807312, 'timestamp': '2025-10-02 00:37:29.982628', 'step': 14991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:30.037320', 'step': 14991, 'epoch': 2}
{'type': 'loss', 'content': 0.12600235641002655, 'timestamp': '2025-10-02 00:37:30.043061', 'step': 14992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:30.097846', 'step': 14992, 'epoch': 2}
{'type': 'loss', 'content': 0.09617306292057037, 'timestamp': '2025-10-02 00:37:30.107362', 'step': 14993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:30.166428', 'step': 14993, 'epoch': 2}
{'type': 'loss', 'content': 0.13625232875347137, 'timestamp': '2025-10-02 00:37:30.168985', 'step': 14994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:30.226260', 'step': 14994, 'epoch': 2}
{'type': 'loss', 'content': 0.040774378925561905, 'timestamp': '2025-10-02 00:37:30.233420', 'step': 14995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:30.291581', 'step': 14995, 'epoch': 2}
{'type': 'loss', 'content': 0.12396113574504852, 'timestamp': '2025-10-02 00:37:30.297516', 'step': 14996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:30.354578', 'step': 14996, 'epoch': 2}
{'type': 'loss', 'content': 0.06487437337636948, 'timestamp': '2025-10-02 00:37:30.358071', 'step': 14997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:30.425313', 'step': 14997, 'epoch': 2}
{'type': 'loss', 'content': 0.048158254474401474, 'timestamp': '2025-10-02 00:37:30.435767', 'step': 14998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:30.493885', 'step': 14998, 'epoch': 2}
{'type': 'loss', 'content': 0.08385100215673447, 'timestamp': '2025-10-02 00:37:30.497765', 'step': 14999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:30.552595', 'step': 14999, 'epoch': 2}
{'type': 'loss', 'content': 0.1375076323747635, 'timestamp': '2025-10-02 00:37:30.560241', 'step': 15000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 15000', 'timestamp': '2025-10-02 00:37:30.983023', 'step': 15000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:31.040567', 'step': 15000, 'epoch': 2}
{'type': 'loss', 'content': 0.0064726159907877445, 'timestamp': '2025-10-02 00:37:31.043057', 'step': 15001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:31.104060', 'step': 15001, 'epoch': 2}
{'type': 'loss', 'content': 0.05502225086092949, 'timestamp': '2025-10-02 00:37:31.114482', 'step': 15002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:31.170284', 'step': 15002, 'epoch': 2}
{'type': 'loss', 'content': 0.03085355833172798, 'timestamp': '2025-10-02 00:37:31.172475', 'step': 15003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:31.226729', 'step': 15003, 'epoch': 2}
{'type': 'loss', 'content': 0.02983531542122364, 'timestamp': '2025-10-02 00:37:31.233054', 'step': 15004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:31.287102', 'step': 15004, 'epoch': 2}
{'type': 'loss', 'content': 0.1314087063074112, 'timestamp': '2025-10-02 00:37:31.289427', 'step': 15005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:37:31.357013', 'step': 15005, 'epoch': 2}
{'type': 'loss', 'content': 0.022895578294992447, 'timestamp': '2025-10-02 00:37:31.369005', 'step': 15006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:31.423869', 'step': 15006, 'epoch': 2}
{'type': 'loss', 'content': 0.12822981178760529, 'timestamp': '2025-10-02 00:37:31.426156', 'step': 15007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:31.485430', 'step': 15007, 'epoch': 2}
{'type': 'loss', 'content': 0.16387754678726196, 'timestamp': '2025-10-02 00:37:31.490993', 'step': 15008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:31.548409', 'step': 15008, 'epoch': 2}
{'type': 'loss', 'content': 0.09939638525247574, 'timestamp': '2025-10-02 00:37:31.559440', 'step': 15009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:31.613330', 'step': 15009, 'epoch': 2}
{'type': 'loss', 'content': 0.1279938668012619, 'timestamp': '2025-10-02 00:37:31.615292', 'step': 15010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:37:31.677489', 'step': 15010, 'epoch': 2}
{'type': 'loss', 'content': 0.027796709910035133, 'timestamp': '2025-10-02 00:37:31.688397', 'step': 15011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:31.742833', 'step': 15011, 'epoch': 2}
{'type': 'loss', 'content': 0.05955848842859268, 'timestamp': '2025-10-02 00:37:31.748808', 'step': 15012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:31.802398', 'step': 15012, 'epoch': 2}
{'type': 'loss', 'content': 0.11009293049573898, 'timestamp': '2025-10-02 00:37:31.804905', 'step': 15013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:31.860036', 'step': 15013, 'epoch': 2}
{'type': 'loss', 'content': 0.05643313005566597, 'timestamp': '2025-10-02 00:37:31.869396', 'step': 15014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:31.924823', 'step': 15014, 'epoch': 2}
{'type': 'loss', 'content': 0.051548365503549576, 'timestamp': '2025-10-02 00:37:31.932355', 'step': 15015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:31.986503', 'step': 15015, 'epoch': 2}
{'type': 'loss', 'content': 0.08511493355035782, 'timestamp': '2025-10-02 00:37:31.993066', 'step': 15016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:32.046603', 'step': 15016, 'epoch': 2}
{'type': 'loss', 'content': 0.10721475630998611, 'timestamp': '2025-10-02 00:37:32.048663', 'step': 15017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:32.103304', 'step': 15017, 'epoch': 2}
{'type': 'loss', 'content': 0.03060842864215374, 'timestamp': '2025-10-02 00:37:32.109018', 'step': 15018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:32.164237', 'step': 15018, 'epoch': 2}
{'type': 'loss', 'content': 0.04250456765294075, 'timestamp': '2025-10-02 00:37:32.171390', 'step': 15019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:32.225649', 'step': 15019, 'epoch': 2}
{'type': 'loss', 'content': 0.039154212921857834, 'timestamp': '2025-10-02 00:37:32.231100', 'step': 15020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:32.285216', 'step': 15020, 'epoch': 2}
{'type': 'loss', 'content': 0.04951710253953934, 'timestamp': '2025-10-02 00:37:32.292410', 'step': 15021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:32.346874', 'step': 15021, 'epoch': 2}
{'type': 'loss', 'content': 0.0966731458902359, 'timestamp': '2025-10-02 00:37:32.349315', 'step': 15022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:32.403710', 'step': 15022, 'epoch': 2}
{'type': 'loss', 'content': 0.09519549459218979, 'timestamp': '2025-10-02 00:37:32.407937', 'step': 15023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:32.464800', 'step': 15023, 'epoch': 2}
{'type': 'loss', 'content': 0.027191009372472763, 'timestamp': '2025-10-02 00:37:32.471432', 'step': 15024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:32.525774', 'step': 15024, 'epoch': 2}
{'type': 'loss', 'content': 0.060942187905311584, 'timestamp': '2025-10-02 00:37:32.528308', 'step': 15025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:32.586358', 'step': 15025, 'epoch': 2}
{'type': 'loss', 'content': 0.2364380955696106, 'timestamp': '2025-10-02 00:37:32.588401', 'step': 15026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:32.644430', 'step': 15026, 'epoch': 2}
{'type': 'loss', 'content': 0.09772155433893204, 'timestamp': '2025-10-02 00:37:32.646597', 'step': 15027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:32.702406', 'step': 15027, 'epoch': 2}
{'type': 'loss', 'content': 0.12460435926914215, 'timestamp': '2025-10-02 00:37:32.708322', 'step': 15028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:32.764049', 'step': 15028, 'epoch': 2}
{'type': 'loss', 'content': 0.03944673761725426, 'timestamp': '2025-10-02 00:37:32.766479', 'step': 15029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:32.822225', 'step': 15029, 'epoch': 2}
{'type': 'loss', 'content': 0.07459025084972382, 'timestamp': '2025-10-02 00:37:32.825146', 'step': 15030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:32.881594', 'step': 15030, 'epoch': 2}
{'type': 'loss', 'content': 0.16166262328624725, 'timestamp': '2025-10-02 00:37:32.884041', 'step': 15031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:32.938216', 'step': 15031, 'epoch': 2}
{'type': 'loss', 'content': 0.05241633951663971, 'timestamp': '2025-10-02 00:37:32.944010', 'step': 15032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:33.002357', 'step': 15032, 'epoch': 2}
{'type': 'loss', 'content': 0.03686906397342682, 'timestamp': '2025-10-02 00:37:33.013317', 'step': 15033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:37:33.084138', 'step': 15033, 'epoch': 2}
{'type': 'loss', 'content': 0.014086601324379444, 'timestamp': '2025-10-02 00:37:33.096785', 'step': 15034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:33.150845', 'step': 15034, 'epoch': 2}
{'type': 'loss', 'content': 0.07403557002544403, 'timestamp': '2025-10-02 00:37:33.153034', 'step': 15035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:33.207386', 'step': 15035, 'epoch': 2}
{'type': 'loss', 'content': 0.017537768930196762, 'timestamp': '2025-10-02 00:37:33.215420', 'step': 15036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:33.268598', 'step': 15036, 'epoch': 2}
{'type': 'loss', 'content': 0.09555472433567047, 'timestamp': '2025-10-02 00:37:33.271135', 'step': 15037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:33.325863', 'step': 15037, 'epoch': 2}
{'type': 'loss', 'content': 0.14662344753742218, 'timestamp': '2025-10-02 00:37:33.328525', 'step': 15038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:33.384435', 'step': 15038, 'epoch': 2}
{'type': 'loss', 'content': 0.07251056283712387, 'timestamp': '2025-10-02 00:37:33.390359', 'step': 15039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:33.454849', 'step': 15039, 'epoch': 2}
{'type': 'loss', 'content': 0.04473567008972168, 'timestamp': '2025-10-02 00:37:33.478743', 'step': 15040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:33.578422', 'step': 15040, 'epoch': 2}
{'type': 'loss', 'content': 0.05005389079451561, 'timestamp': '2025-10-02 00:37:33.605934', 'step': 15041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:33.715514', 'step': 15041, 'epoch': 2}
{'type': 'loss', 'content': 0.03583964332938194, 'timestamp': '2025-10-02 00:37:33.719421', 'step': 15042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:33.780081', 'step': 15042, 'epoch': 2}
{'type': 'loss', 'content': 0.050462666898965836, 'timestamp': '2025-10-02 00:37:33.785895', 'step': 15043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:33.888660', 'step': 15043, 'epoch': 2}
{'type': 'loss', 'content': 0.041801389306783676, 'timestamp': '2025-10-02 00:37:33.898952', 'step': 15044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:33.959857', 'step': 15044, 'epoch': 2}
{'type': 'loss', 'content': 0.05809168145060539, 'timestamp': '2025-10-02 00:37:33.964556', 'step': 15045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:34.039017', 'step': 15045, 'epoch': 2}
{'type': 'loss', 'content': 0.019346199929714203, 'timestamp': '2025-10-02 00:37:34.048570', 'step': 15046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:34.131222', 'step': 15046, 'epoch': 2}
{'type': 'loss', 'content': 0.04839945212006569, 'timestamp': '2025-10-02 00:37:34.134993', 'step': 15047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:34.199257', 'step': 15047, 'epoch': 2}
{'type': 'loss', 'content': 0.004721874371170998, 'timestamp': '2025-10-02 00:37:34.207922', 'step': 15048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:34.287712', 'step': 15048, 'epoch': 2}
{'type': 'loss', 'content': 0.02865150012075901, 'timestamp': '2025-10-02 00:37:34.295173', 'step': 15049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:34.373888', 'step': 15049, 'epoch': 2}
{'type': 'loss', 'content': 0.10644198954105377, 'timestamp': '2025-10-02 00:37:34.384857', 'step': 15050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:34.462965', 'step': 15050, 'epoch': 2}
{'type': 'loss', 'content': 0.039654117077589035, 'timestamp': '2025-10-02 00:37:34.476012', 'step': 15051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:34.554879', 'step': 15051, 'epoch': 2}
{'type': 'loss', 'content': 0.15379835665225983, 'timestamp': '2025-10-02 00:37:34.562424', 'step': 15052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:34.646757', 'step': 15052, 'epoch': 2}
{'type': 'loss', 'content': 0.008154177106916904, 'timestamp': '2025-10-02 00:37:34.659946', 'step': 15053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:34.741622', 'step': 15053, 'epoch': 2}
{'type': 'loss', 'content': 0.0875096395611763, 'timestamp': '2025-10-02 00:37:34.745410', 'step': 15054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:34.826159', 'step': 15054, 'epoch': 2}
{'type': 'loss', 'content': 0.021862221881747246, 'timestamp': '2025-10-02 00:37:34.828875', 'step': 15055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:34.893457', 'step': 15055, 'epoch': 2}
{'type': 'loss', 'content': 0.08703243732452393, 'timestamp': '2025-10-02 00:37:34.911261', 'step': 15056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:35.002339', 'step': 15056, 'epoch': 2}
{'type': 'loss', 'content': 0.07624656707048416, 'timestamp': '2025-10-02 00:37:35.011869', 'step': 15057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:35.091264', 'step': 15057, 'epoch': 2}
{'type': 'loss', 'content': 0.02598607912659645, 'timestamp': '2025-10-02 00:37:35.105444', 'step': 15058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:35.189925', 'step': 15058, 'epoch': 2}
{'type': 'loss', 'content': 0.07893756031990051, 'timestamp': '2025-10-02 00:37:35.195870', 'step': 15059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:35.281384', 'step': 15059, 'epoch': 2}
{'type': 'loss', 'content': 0.039731454104185104, 'timestamp': '2025-10-02 00:37:35.295610', 'step': 15060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:35.371715', 'step': 15060, 'epoch': 2}
{'type': 'loss', 'content': 0.05575209856033325, 'timestamp': '2025-10-02 00:37:35.384608', 'step': 15061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:35.465885', 'step': 15061, 'epoch': 2}
{'type': 'loss', 'content': 0.0732119157910347, 'timestamp': '2025-10-02 00:37:35.471391', 'step': 15062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:35.538827', 'step': 15062, 'epoch': 2}
{'type': 'loss', 'content': 0.08330003172159195, 'timestamp': '2025-10-02 00:37:35.548352', 'step': 15063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:37:35.644508', 'step': 15063, 'epoch': 2}
{'type': 'loss', 'content': 0.026075631380081177, 'timestamp': '2025-10-02 00:37:35.660013', 'step': 15064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:35.748068', 'step': 15064, 'epoch': 2}
{'type': 'loss', 'content': 0.04943829029798508, 'timestamp': '2025-10-02 00:37:35.759385', 'step': 15065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:35.839958', 'step': 15065, 'epoch': 2}
{'type': 'loss', 'content': 0.042263515293598175, 'timestamp': '2025-10-02 00:37:35.852835', 'step': 15066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:35.937823', 'step': 15066, 'epoch': 2}
{'type': 'loss', 'content': 0.04875777289271355, 'timestamp': '2025-10-02 00:37:35.941406', 'step': 15067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:36.026477', 'step': 15067, 'epoch': 2}
{'type': 'loss', 'content': 0.01914176531136036, 'timestamp': '2025-10-02 00:37:36.036826', 'step': 15068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:36.115751', 'step': 15068, 'epoch': 2}
{'type': 'loss', 'content': 0.008463607169687748, 'timestamp': '2025-10-02 00:37:36.127249', 'step': 15069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:36.204603', 'step': 15069, 'epoch': 2}
{'type': 'loss', 'content': 0.07852097600698471, 'timestamp': '2025-10-02 00:37:36.218559', 'step': 15070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:36.307676', 'step': 15070, 'epoch': 2}
{'type': 'loss', 'content': 0.19144244492053986, 'timestamp': '2025-10-02 00:37:36.311574', 'step': 15071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:36.374943', 'step': 15071, 'epoch': 2}
{'type': 'loss', 'content': 0.02287915162742138, 'timestamp': '2025-10-02 00:37:36.389673', 'step': 15072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:36.458656', 'step': 15072, 'epoch': 2}
{'type': 'loss', 'content': 0.1382138878107071, 'timestamp': '2025-10-02 00:37:36.474652', 'step': 15073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:36.549344', 'step': 15073, 'epoch': 2}
{'type': 'loss', 'content': 0.07066427171230316, 'timestamp': '2025-10-02 00:37:36.553233', 'step': 15074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:36.612346', 'step': 15074, 'epoch': 2}
{'type': 'loss', 'content': 0.007464108522981405, 'timestamp': '2025-10-02 00:37:36.625402', 'step': 15075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:36.721249', 'step': 15075, 'epoch': 2}
{'type': 'loss', 'content': 0.07470349222421646, 'timestamp': '2025-10-02 00:37:36.736157', 'step': 15076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:36.815672', 'step': 15076, 'epoch': 2}
{'type': 'loss', 'content': 0.10604224354028702, 'timestamp': '2025-10-02 00:37:36.828683', 'step': 15077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:36.906517', 'step': 15077, 'epoch': 2}
{'type': 'loss', 'content': 0.07731708139181137, 'timestamp': '2025-10-02 00:37:36.919741', 'step': 15078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:36.989184', 'step': 15078, 'epoch': 2}
{'type': 'loss', 'content': 0.026007365435361862, 'timestamp': '2025-10-02 00:37:36.992808', 'step': 15079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:37.056326', 'step': 15079, 'epoch': 2}
{'type': 'loss', 'content': 0.13187667727470398, 'timestamp': '2025-10-02 00:37:37.062904', 'step': 15080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:37.148404', 'step': 15080, 'epoch': 2}
{'type': 'loss', 'content': 0.06314194947481155, 'timestamp': '2025-10-02 00:37:37.160005', 'step': 15081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:37.242124', 'step': 15081, 'epoch': 2}
{'type': 'loss', 'content': 0.04828573018312454, 'timestamp': '2025-10-02 00:37:37.246386', 'step': 15082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:37.314242', 'step': 15082, 'epoch': 2}
{'type': 'loss', 'content': 0.03459322080016136, 'timestamp': '2025-10-02 00:37:37.317601', 'step': 15083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:37.376917', 'step': 15083, 'epoch': 2}
{'type': 'loss', 'content': 0.054423145949840546, 'timestamp': '2025-10-02 00:37:37.383640', 'step': 15084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:37.441724', 'step': 15084, 'epoch': 2}
{'type': 'loss', 'content': 0.12633641064167023, 'timestamp': '2025-10-02 00:37:37.445111', 'step': 15085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:37.526933', 'step': 15085, 'epoch': 2}
{'type': 'loss', 'content': 0.05179727450013161, 'timestamp': '2025-10-02 00:37:37.534514', 'step': 15086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:37.601790', 'step': 15086, 'epoch': 2}
{'type': 'loss', 'content': 0.1157645583152771, 'timestamp': '2025-10-02 00:37:37.606342', 'step': 15087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:37.685720', 'step': 15087, 'epoch': 2}
{'type': 'loss', 'content': 0.0012948029907420278, 'timestamp': '2025-10-02 00:37:37.705746', 'step': 15088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:37.782876', 'step': 15088, 'epoch': 2}
{'type': 'loss', 'content': 0.07053421437740326, 'timestamp': '2025-10-02 00:37:37.786033', 'step': 15089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:37.858074', 'step': 15089, 'epoch': 2}
{'type': 'loss', 'content': 0.11970490217208862, 'timestamp': '2025-10-02 00:37:37.873623', 'step': 15090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:37.941334', 'step': 15090, 'epoch': 2}
{'type': 'loss', 'content': 0.07044544070959091, 'timestamp': '2025-10-02 00:37:37.945474', 'step': 15091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:38.010989', 'step': 15091, 'epoch': 2}
{'type': 'loss', 'content': 0.05832437053322792, 'timestamp': '2025-10-02 00:37:38.026518', 'step': 15092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:38.117024', 'step': 15092, 'epoch': 2}
{'type': 'loss', 'content': 0.0843609869480133, 'timestamp': '2025-10-02 00:37:38.120945', 'step': 15093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:38.187982', 'step': 15093, 'epoch': 2}
{'type': 'loss', 'content': 0.075111024081707, 'timestamp': '2025-10-02 00:37:38.191625', 'step': 15094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:38.275421', 'step': 15094, 'epoch': 2}
{'type': 'loss', 'content': 0.09274041652679443, 'timestamp': '2025-10-02 00:37:38.286412', 'step': 15095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:38.372728', 'step': 15095, 'epoch': 2}
{'type': 'loss', 'content': 0.05882086604833603, 'timestamp': '2025-10-02 00:37:38.379915', 'step': 15096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:38.450262', 'step': 15096, 'epoch': 2}
{'type': 'loss', 'content': 0.06459053605794907, 'timestamp': '2025-10-02 00:37:38.453591', 'step': 15097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:38.522845', 'step': 15097, 'epoch': 2}
{'type': 'loss', 'content': 0.038902271538972855, 'timestamp': '2025-10-02 00:37:38.537482', 'step': 15098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:38.618450', 'step': 15098, 'epoch': 2}
{'type': 'loss', 'content': 0.07532968372106552, 'timestamp': '2025-10-02 00:37:38.632564', 'step': 15099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:38.713732', 'step': 15099, 'epoch': 2}
{'type': 'loss', 'content': 0.033073220402002335, 'timestamp': '2025-10-02 00:37:38.732863', 'step': 15100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:38.818046', 'step': 15100, 'epoch': 2}
{'type': 'loss', 'content': 0.0724007859826088, 'timestamp': '2025-10-02 00:37:38.830435', 'step': 15101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:38.904473', 'step': 15101, 'epoch': 2}
{'type': 'loss', 'content': 0.025926051661372185, 'timestamp': '2025-10-02 00:37:38.921265', 'step': 15102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:39.016087', 'step': 15102, 'epoch': 2}
{'type': 'loss', 'content': 0.10912050306797028, 'timestamp': '2025-10-02 00:37:39.032092', 'step': 15103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:39.111771', 'step': 15103, 'epoch': 2}
{'type': 'loss', 'content': 0.0823720321059227, 'timestamp': '2025-10-02 00:37:39.119400', 'step': 15104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:39.210212', 'step': 15104, 'epoch': 2}
{'type': 'loss', 'content': 0.040686607360839844, 'timestamp': '2025-10-02 00:37:39.217807', 'step': 15105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:39.284598', 'step': 15105, 'epoch': 2}
{'type': 'loss', 'content': 0.005939875263720751, 'timestamp': '2025-10-02 00:37:39.290587', 'step': 15106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:39.370128', 'step': 15106, 'epoch': 2}
{'type': 'loss', 'content': 0.1463528871536255, 'timestamp': '2025-10-02 00:37:39.375688', 'step': 15107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:39.450079', 'step': 15107, 'epoch': 2}
{'type': 'loss', 'content': 0.06467581540346146, 'timestamp': '2025-10-02 00:37:39.460429', 'step': 15108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:39.542315', 'step': 15108, 'epoch': 2}
{'type': 'loss', 'content': 0.05895872786641121, 'timestamp': '2025-10-02 00:37:39.556711', 'step': 15109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:39.641339', 'step': 15109, 'epoch': 2}
{'type': 'loss', 'content': 0.221147358417511, 'timestamp': '2025-10-02 00:37:39.645055', 'step': 15110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:39.710875', 'step': 15110, 'epoch': 2}
{'type': 'loss', 'content': 0.14403706789016724, 'timestamp': '2025-10-02 00:37:39.724578', 'step': 15111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:39.820018', 'step': 15111, 'epoch': 2}
{'type': 'loss', 'content': 0.10100623965263367, 'timestamp': '2025-10-02 00:37:39.838852', 'step': 15112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:39.896713', 'step': 15112, 'epoch': 2}
{'type': 'loss', 'content': 0.049117084592580795, 'timestamp': '2025-10-02 00:37:39.900620', 'step': 15113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:39.966216', 'step': 15113, 'epoch': 2}
{'type': 'loss', 'content': 0.010805574245750904, 'timestamp': '2025-10-02 00:37:39.973583', 'step': 15114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:40.058406', 'step': 15114, 'epoch': 2}
{'type': 'loss', 'content': 0.024923473596572876, 'timestamp': '2025-10-02 00:37:40.071589', 'step': 15115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:37:40.162708', 'step': 15115, 'epoch': 2}
{'type': 'loss', 'content': 0.003590474370867014, 'timestamp': '2025-10-02 00:37:40.175975', 'step': 15116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:40.261068', 'step': 15116, 'epoch': 2}
{'type': 'loss', 'content': 0.06118734925985336, 'timestamp': '2025-10-02 00:37:40.266146', 'step': 15117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:40.331016', 'step': 15117, 'epoch': 2}
{'type': 'loss', 'content': 0.09401591122150421, 'timestamp': '2025-10-02 00:37:40.344724', 'step': 15118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:37:40.447850', 'step': 15118, 'epoch': 2}
{'type': 'loss', 'content': 0.031437065452337265, 'timestamp': '2025-10-02 00:37:40.461680', 'step': 15119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:40.530845', 'step': 15119, 'epoch': 2}
{'type': 'loss', 'content': 0.017006143927574158, 'timestamp': '2025-10-02 00:37:40.537657', 'step': 15120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:40.627342', 'step': 15120, 'epoch': 2}
{'type': 'loss', 'content': 0.0433492586016655, 'timestamp': '2025-10-02 00:37:40.634788', 'step': 15121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:40.728613', 'step': 15121, 'epoch': 2}
{'type': 'loss', 'content': 0.019074223935604095, 'timestamp': '2025-10-02 00:37:40.744664', 'step': 15122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:40.821053', 'step': 15122, 'epoch': 2}
{'type': 'loss', 'content': 0.018144212663173676, 'timestamp': '2025-10-02 00:37:40.837745', 'step': 15123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:40.928205', 'step': 15123, 'epoch': 2}
{'type': 'loss', 'content': 0.12308263033628464, 'timestamp': '2025-10-02 00:37:40.936081', 'step': 15124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:41.021783', 'step': 15124, 'epoch': 2}
{'type': 'loss', 'content': 0.06833286583423615, 'timestamp': '2025-10-02 00:37:41.037024', 'step': 15125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:41.118730', 'step': 15125, 'epoch': 2}
{'type': 'loss', 'content': 0.10707510262727737, 'timestamp': '2025-10-02 00:37:41.132278', 'step': 15126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:37:41.238772', 'step': 15126, 'epoch': 2}
{'type': 'loss', 'content': 0.017660630866885185, 'timestamp': '2025-10-02 00:37:41.254230', 'step': 15127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:41.342613', 'step': 15127, 'epoch': 2}
{'type': 'loss', 'content': 0.13883619010448456, 'timestamp': '2025-10-02 00:37:41.352427', 'step': 15128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:37:41.442040', 'step': 15128, 'epoch': 2}
{'type': 'loss', 'content': 0.05359245091676712, 'timestamp': '2025-10-02 00:37:41.455406', 'step': 15129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:41.524792', 'step': 15129, 'epoch': 2}
{'type': 'loss', 'content': 0.05847558751702309, 'timestamp': '2025-10-02 00:37:41.539783', 'step': 15130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:41.630604', 'step': 15130, 'epoch': 2}
{'type': 'loss', 'content': 0.0416182242333889, 'timestamp': '2025-10-02 00:37:41.648332', 'step': 15131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:41.714844', 'step': 15131, 'epoch': 2}
{'type': 'loss', 'content': 0.04505974426865578, 'timestamp': '2025-10-02 00:37:41.733668', 'step': 15132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:41.821297', 'step': 15132, 'epoch': 2}
{'type': 'loss', 'content': 0.10576923191547394, 'timestamp': '2025-10-02 00:37:41.835684', 'step': 15133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:41.919311', 'step': 15133, 'epoch': 2}
{'type': 'loss', 'content': 0.10176496207714081, 'timestamp': '2025-10-02 00:37:41.941996', 'step': 15134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:42.042235', 'step': 15134, 'epoch': 2}
{'type': 'loss', 'content': 0.031231053173542023, 'timestamp': '2025-10-02 00:37:42.049465', 'step': 15135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:42.118355', 'step': 15135, 'epoch': 2}
{'type': 'loss', 'content': 0.120992511510849, 'timestamp': '2025-10-02 00:37:42.139209', 'step': 15136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:42.229425', 'step': 15136, 'epoch': 2}
{'type': 'loss', 'content': 0.09780342131853104, 'timestamp': '2025-10-02 00:37:42.245230', 'step': 15137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:42.341142', 'step': 15137, 'epoch': 2}
{'type': 'loss', 'content': 0.10648591071367264, 'timestamp': '2025-10-02 00:37:42.358232', 'step': 15138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:42.448302', 'step': 15138, 'epoch': 2}
{'type': 'loss', 'content': 0.042952485382556915, 'timestamp': '2025-10-02 00:37:42.453220', 'step': 15139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:42.533398', 'step': 15139, 'epoch': 2}
{'type': 'loss', 'content': 0.0906393826007843, 'timestamp': '2025-10-02 00:37:42.555802', 'step': 15140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:42.656529', 'step': 15140, 'epoch': 2}
{'type': 'loss', 'content': 0.022547150030732155, 'timestamp': '2025-10-02 00:37:42.667476', 'step': 15141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:42.746986', 'step': 15141, 'epoch': 2}
{'type': 'loss', 'content': 0.030643997713923454, 'timestamp': '2025-10-02 00:37:42.766467', 'step': 15142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:42.861502', 'step': 15142, 'epoch': 2}
{'type': 'loss', 'content': 0.031311288475990295, 'timestamp': '2025-10-02 00:37:42.865977', 'step': 15143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:42.962096', 'step': 15143, 'epoch': 2}
{'type': 'loss', 'content': 0.07508459687232971, 'timestamp': '2025-10-02 00:37:42.983145', 'step': 15144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:43.050922', 'step': 15144, 'epoch': 2}
{'type': 'loss', 'content': 0.06275764107704163, 'timestamp': '2025-10-02 00:37:43.054666', 'step': 15145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:43.135182', 'step': 15145, 'epoch': 2}
{'type': 'loss', 'content': 0.005786129739135504, 'timestamp': '2025-10-02 00:37:43.147652', 'step': 15146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:43.215745', 'step': 15146, 'epoch': 2}
{'type': 'loss', 'content': 0.13522887229919434, 'timestamp': '2025-10-02 00:37:43.233481', 'step': 15147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:43.332849', 'step': 15147, 'epoch': 2}
{'type': 'loss', 'content': 0.02926403097808361, 'timestamp': '2025-10-02 00:37:43.349705', 'step': 15148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:43.436481', 'step': 15148, 'epoch': 2}
{'type': 'loss', 'content': 0.09780745953321457, 'timestamp': '2025-10-02 00:37:43.448938', 'step': 15149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:43.528783', 'step': 15149, 'epoch': 2}
{'type': 'loss', 'content': 0.07533055543899536, 'timestamp': '2025-10-02 00:37:43.532645', 'step': 15150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:43.626381', 'step': 15150, 'epoch': 2}
{'type': 'loss', 'content': 0.059075597673654556, 'timestamp': '2025-10-02 00:37:43.639886', 'step': 15151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:43.719347', 'step': 15151, 'epoch': 2}
{'type': 'loss', 'content': 0.044683173298835754, 'timestamp': '2025-10-02 00:37:43.737948', 'step': 15152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:43.798299', 'step': 15152, 'epoch': 2}
{'type': 'loss', 'content': 0.055548060685396194, 'timestamp': '2025-10-02 00:37:43.815641', 'step': 15153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:43.896872', 'step': 15153, 'epoch': 2}
{'type': 'loss', 'content': 0.09616270661354065, 'timestamp': '2025-10-02 00:37:43.914400', 'step': 15154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:43.997059', 'step': 15154, 'epoch': 2}
{'type': 'loss', 'content': 0.0472450777888298, 'timestamp': '2025-10-02 00:37:44.012158', 'step': 15155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:44.105731', 'step': 15155, 'epoch': 2}
{'type': 'loss', 'content': 0.025255495682358742, 'timestamp': '2025-10-02 00:37:44.127514', 'step': 15156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:44.228510', 'step': 15156, 'epoch': 2}
{'type': 'loss', 'content': 0.04487108066678047, 'timestamp': '2025-10-02 00:37:44.232360', 'step': 15157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:44.313771', 'step': 15157, 'epoch': 2}
{'type': 'loss', 'content': 0.07633645832538605, 'timestamp': '2025-10-02 00:37:44.329899', 'step': 15158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:44.388033', 'step': 15158, 'epoch': 2}
{'type': 'loss', 'content': 0.1490306705236435, 'timestamp': '2025-10-02 00:37:44.409121', 'step': 15159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:44.510338', 'step': 15159, 'epoch': 2}
{'type': 'loss', 'content': 0.023104386404156685, 'timestamp': '2025-10-02 00:37:44.525800', 'step': 15160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:44.591943', 'step': 15160, 'epoch': 2}
{'type': 'loss', 'content': 0.095944844186306, 'timestamp': '2025-10-02 00:37:44.607259', 'step': 15161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:44.686993', 'step': 15161, 'epoch': 2}
{'type': 'loss', 'content': 0.03335486724972725, 'timestamp': '2025-10-02 00:37:44.690609', 'step': 15162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:44.773192', 'step': 15162, 'epoch': 2}
{'type': 'loss', 'content': 0.07208535820245743, 'timestamp': '2025-10-02 00:37:44.786131', 'step': 15163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:44.861833', 'step': 15163, 'epoch': 2}
{'type': 'loss', 'content': 0.07000058889389038, 'timestamp': '2025-10-02 00:37:44.870973', 'step': 15164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:44.948662', 'step': 15164, 'epoch': 2}
{'type': 'loss', 'content': 0.010615850798785686, 'timestamp': '2025-10-02 00:37:44.958947', 'step': 15165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:45.015953', 'step': 15165, 'epoch': 2}
{'type': 'loss', 'content': 0.017100147902965546, 'timestamp': '2025-10-02 00:37:45.031145', 'step': 15166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:45.123960', 'step': 15166, 'epoch': 2}
{'type': 'loss', 'content': 0.08662449568510056, 'timestamp': '2025-10-02 00:37:45.138833', 'step': 15167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:45.209491', 'step': 15167, 'epoch': 2}
{'type': 'loss', 'content': 0.03973759338259697, 'timestamp': '2025-10-02 00:37:45.229002', 'step': 15168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:45.305709', 'step': 15168, 'epoch': 2}
{'type': 'loss', 'content': 0.03533787280321121, 'timestamp': '2025-10-02 00:37:45.319853', 'step': 15169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 11520070000896.0}, 'timestamp': '2025-10-02 00:37:45.448799', 'step': 15169, 'epoch': 2}
{'type': 'loss', 'content': 0.010098414495587349, 'timestamp': '2025-10-02 00:37:45.464694', 'step': 15170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:45.549898', 'step': 15170, 'epoch': 2}
{'type': 'loss', 'content': 0.07567145675420761, 'timestamp': '2025-10-02 00:37:45.558883', 'step': 15171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:45.641958', 'step': 15171, 'epoch': 2}
{'type': 'loss', 'content': 0.01701938919723034, 'timestamp': '2025-10-02 00:37:45.658921', 'step': 15172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:45.735651', 'step': 15172, 'epoch': 2}
{'type': 'loss', 'content': 0.010127799585461617, 'timestamp': '2025-10-02 00:37:45.750037', 'step': 15173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:37:45.826255', 'step': 15173, 'epoch': 2}
{'type': 'loss', 'content': 0.011830189265310764, 'timestamp': '2025-10-02 00:37:45.838575', 'step': 15174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:45.936665', 'step': 15174, 'epoch': 2}
{'type': 'loss', 'content': 0.048866089433431625, 'timestamp': '2025-10-02 00:37:45.943652', 'step': 15175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:46.046428', 'step': 15175, 'epoch': 2}
{'type': 'loss', 'content': 0.01711173914372921, 'timestamp': '2025-10-02 00:37:46.054572', 'step': 15176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:46.125172', 'step': 15176, 'epoch': 2}
{'type': 'loss', 'content': 0.0638403594493866, 'timestamp': '2025-10-02 00:37:46.142826', 'step': 15177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:46.239662', 'step': 15177, 'epoch': 2}
{'type': 'loss', 'content': 0.042261023074388504, 'timestamp': '2025-10-02 00:37:46.258380', 'step': 15178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:46.357784', 'step': 15178, 'epoch': 2}
{'type': 'loss', 'content': 0.05502268671989441, 'timestamp': '2025-10-02 00:37:46.373685', 'step': 15179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:46.462892', 'step': 15179, 'epoch': 2}
{'type': 'loss', 'content': 0.17279799282550812, 'timestamp': '2025-10-02 00:37:46.470419', 'step': 15180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:46.546632', 'step': 15180, 'epoch': 2}
{'type': 'loss', 'content': 0.12525703012943268, 'timestamp': '2025-10-02 00:37:46.561014', 'step': 15181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:46.620227', 'step': 15181, 'epoch': 2}
{'type': 'loss', 'content': 0.10381005704402924, 'timestamp': '2025-10-02 00:37:46.636189', 'step': 15182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:46.739186', 'step': 15182, 'epoch': 2}
{'type': 'loss', 'content': 0.05056454986333847, 'timestamp': '2025-10-02 00:37:46.758595', 'step': 15183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:46.824438', 'step': 15183, 'epoch': 2}
{'type': 'loss', 'content': 0.044393815100193024, 'timestamp': '2025-10-02 00:37:46.845337', 'step': 15184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:46.935026', 'step': 15184, 'epoch': 2}
{'type': 'loss', 'content': 0.08705361932516098, 'timestamp': '2025-10-02 00:37:46.952032', 'step': 15185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:47.047580', 'step': 15185, 'epoch': 2}
{'type': 'loss', 'content': 0.10150083154439926, 'timestamp': '2025-10-02 00:37:47.067089', 'step': 15186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:47.160913', 'step': 15186, 'epoch': 2}
{'type': 'loss', 'content': 0.12806746363639832, 'timestamp': '2025-10-02 00:37:47.165839', 'step': 15187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:47.250834', 'step': 15187, 'epoch': 2}
{'type': 'loss', 'content': 0.10913558304309845, 'timestamp': '2025-10-02 00:37:47.258938', 'step': 15188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:47.318644', 'step': 15188, 'epoch': 2}
{'type': 'loss', 'content': 0.019669491797685623, 'timestamp': '2025-10-02 00:37:47.334149', 'step': 15189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:47.430154', 'step': 15189, 'epoch': 2}
{'type': 'loss', 'content': 0.06727970391511917, 'timestamp': '2025-10-02 00:37:47.446290', 'step': 15190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:47.533677', 'step': 15190, 'epoch': 2}
{'type': 'loss', 'content': 0.05987301468849182, 'timestamp': '2025-10-02 00:37:47.550932', 'step': 15191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:47.616481', 'step': 15191, 'epoch': 2}
{'type': 'loss', 'content': 0.00735160568729043, 'timestamp': '2025-10-02 00:37:47.634782', 'step': 15192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:47.717275', 'step': 15192, 'epoch': 2}
{'type': 'loss', 'content': 0.04601456969976425, 'timestamp': '2025-10-02 00:37:47.736125', 'step': 15193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:47.837492', 'step': 15193, 'epoch': 2}
{'type': 'loss', 'content': 0.02975316159427166, 'timestamp': '2025-10-02 00:37:47.852131', 'step': 15194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:47.926778', 'step': 15194, 'epoch': 2}
{'type': 'loss', 'content': 0.05741511657834053, 'timestamp': '2025-10-02 00:37:47.942866', 'step': 15195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:48.025445', 'step': 15195, 'epoch': 2}
{'type': 'loss', 'content': 0.043569959700107574, 'timestamp': '2025-10-02 00:37:48.047143', 'step': 15196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:48.144002', 'step': 15196, 'epoch': 2}
{'type': 'loss', 'content': 0.09495994448661804, 'timestamp': '2025-10-02 00:37:48.162812', 'step': 15197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:48.251166', 'step': 15197, 'epoch': 2}
{'type': 'loss', 'content': 0.041403744369745255, 'timestamp': '2025-10-02 00:37:48.265049', 'step': 15198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:48.339805', 'step': 15198, 'epoch': 2}
{'type': 'loss', 'content': 0.1177176758646965, 'timestamp': '2025-10-02 00:37:48.343129', 'step': 15199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:48.408425', 'step': 15199, 'epoch': 2}
{'type': 'loss', 'content': 0.07402801513671875, 'timestamp': '2025-10-02 00:37:48.419366', 'step': 15200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:48.485753', 'step': 15200, 'epoch': 2}
{'type': 'loss', 'content': 0.052200041711330414, 'timestamp': '2025-10-02 00:37:48.488860', 'step': 15201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:48.551588', 'step': 15201, 'epoch': 2}
{'type': 'loss', 'content': 0.10655152797698975, 'timestamp': '2025-10-02 00:37:48.557431', 'step': 15202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:48.612305', 'step': 15202, 'epoch': 2}
{'type': 'loss', 'content': 0.028083516284823418, 'timestamp': '2025-10-02 00:37:48.619817', 'step': 15203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:48.676606', 'step': 15203, 'epoch': 2}
{'type': 'loss', 'content': 0.06484678387641907, 'timestamp': '2025-10-02 00:37:48.684140', 'step': 15204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:48.751586', 'step': 15204, 'epoch': 2}
{'type': 'loss', 'content': 0.044282685965299606, 'timestamp': '2025-10-02 00:37:48.754937', 'step': 15205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:37:48.831842', 'step': 15205, 'epoch': 2}
{'type': 'loss', 'content': 0.026537051424384117, 'timestamp': '2025-10-02 00:37:48.842722', 'step': 15206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:48.897729', 'step': 15206, 'epoch': 2}
{'type': 'loss', 'content': 0.10325799137353897, 'timestamp': '2025-10-02 00:37:48.900940', 'step': 15207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:48.961872', 'step': 15207, 'epoch': 2}
{'type': 'loss', 'content': 0.07505936175584793, 'timestamp': '2025-10-02 00:37:48.970853', 'step': 15208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:49.027475', 'step': 15208, 'epoch': 2}
{'type': 'loss', 'content': 0.07138371467590332, 'timestamp': '2025-10-02 00:37:49.033399', 'step': 15209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:49.092641', 'step': 15209, 'epoch': 2}
{'type': 'loss', 'content': 0.061183542013168335, 'timestamp': '2025-10-02 00:37:49.095941', 'step': 15210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:49.154488', 'step': 15210, 'epoch': 2}
{'type': 'loss', 'content': 0.07468125969171524, 'timestamp': '2025-10-02 00:37:49.156816', 'step': 15211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:37:49.226891', 'step': 15211, 'epoch': 2}
{'type': 'loss', 'content': 0.017127471044659615, 'timestamp': '2025-10-02 00:37:49.238357', 'step': 15212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:49.296675', 'step': 15212, 'epoch': 2}
{'type': 'loss', 'content': 0.08048289269208908, 'timestamp': '2025-10-02 00:37:49.302809', 'step': 15213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:49.367796', 'step': 15213, 'epoch': 2}
{'type': 'loss', 'content': 0.016596103087067604, 'timestamp': '2025-10-02 00:37:49.375662', 'step': 15214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:49.444777', 'step': 15214, 'epoch': 2}
{'type': 'loss', 'content': 0.058190904557704926, 'timestamp': '2025-10-02 00:37:49.453446', 'step': 15215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:49.522673', 'step': 15215, 'epoch': 2}
{'type': 'loss', 'content': 0.04222838208079338, 'timestamp': '2025-10-02 00:37:49.533018', 'step': 15216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:49.597839', 'step': 15216, 'epoch': 2}
{'type': 'loss', 'content': 0.041631847620010376, 'timestamp': '2025-10-02 00:37:49.605903', 'step': 15217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:49.677587', 'step': 15217, 'epoch': 2}
{'type': 'loss', 'content': 0.10419643670320511, 'timestamp': '2025-10-02 00:37:49.683911', 'step': 15218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:49.742000', 'step': 15218, 'epoch': 2}
{'type': 'loss', 'content': 0.033526528626680374, 'timestamp': '2025-10-02 00:37:49.749757', 'step': 15219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:49.810187', 'step': 15219, 'epoch': 2}
{'type': 'loss', 'content': 0.29606756567955017, 'timestamp': '2025-10-02 00:37:49.820699', 'step': 15220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:49.887471', 'step': 15220, 'epoch': 2}
{'type': 'loss', 'content': 0.038176264613866806, 'timestamp': '2025-10-02 00:37:49.897227', 'step': 15221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:49.968139', 'step': 15221, 'epoch': 2}
{'type': 'loss', 'content': 0.06864665448665619, 'timestamp': '2025-10-02 00:37:49.976000', 'step': 15222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:50.044394', 'step': 15222, 'epoch': 2}
{'type': 'loss', 'content': 0.047700267285108566, 'timestamp': '2025-10-02 00:37:50.047242', 'step': 15223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:50.103877', 'step': 15223, 'epoch': 2}
{'type': 'loss', 'content': 0.08983336389064789, 'timestamp': '2025-10-02 00:37:50.114917', 'step': 15224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:50.182243', 'step': 15224, 'epoch': 2}
{'type': 'loss', 'content': 0.005478861276060343, 'timestamp': '2025-10-02 00:37:50.192531', 'step': 15225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:37:50.271046', 'step': 15225, 'epoch': 2}
{'type': 'loss', 'content': 0.06100304424762726, 'timestamp': '2025-10-02 00:37:50.281575', 'step': 15226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:50.351509', 'step': 15226, 'epoch': 2}
{'type': 'loss', 'content': 0.03460828214883804, 'timestamp': '2025-10-02 00:37:50.360920', 'step': 15227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:50.422585', 'step': 15227, 'epoch': 2}
{'type': 'loss', 'content': 0.06892099976539612, 'timestamp': '2025-10-02 00:37:50.433414', 'step': 15228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:50.501317', 'step': 15228, 'epoch': 2}
{'type': 'loss', 'content': 0.06813015788793564, 'timestamp': '2025-10-02 00:37:50.510668', 'step': 15229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:50.574426', 'step': 15229, 'epoch': 2}
{'type': 'loss', 'content': 0.06784186512231827, 'timestamp': '2025-10-02 00:37:50.584057', 'step': 15230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:50.641860', 'step': 15230, 'epoch': 2}
{'type': 'loss', 'content': 0.0416250042617321, 'timestamp': '2025-10-02 00:37:50.646883', 'step': 15231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:50.702952', 'step': 15231, 'epoch': 2}
{'type': 'loss', 'content': 0.13948696851730347, 'timestamp': '2025-10-02 00:37:50.714553', 'step': 15232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:50.786984', 'step': 15232, 'epoch': 2}
{'type': 'loss', 'content': 0.01907256431877613, 'timestamp': '2025-10-02 00:37:50.789520', 'step': 15233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:50.860542', 'step': 15233, 'epoch': 2}
{'type': 'loss', 'content': 0.08781612664461136, 'timestamp': '2025-10-02 00:37:50.868484', 'step': 15234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:50.936652', 'step': 15234, 'epoch': 2}
{'type': 'loss', 'content': 0.079906165599823, 'timestamp': '2025-10-02 00:37:50.940061', 'step': 15235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:51.002676', 'step': 15235, 'epoch': 2}
{'type': 'loss', 'content': 0.011159557849168777, 'timestamp': '2025-10-02 00:37:51.013380', 'step': 15236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:51.081713', 'step': 15236, 'epoch': 2}
{'type': 'loss', 'content': 0.048909302800893784, 'timestamp': '2025-10-02 00:37:51.090141', 'step': 15237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:51.161858', 'step': 15237, 'epoch': 2}
{'type': 'loss', 'content': 0.19469290971755981, 'timestamp': '2025-10-02 00:37:51.166390', 'step': 15238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:51.247956', 'step': 15238, 'epoch': 2}
{'type': 'loss', 'content': 0.0706670880317688, 'timestamp': '2025-10-02 00:37:51.259830', 'step': 15239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:51.324634', 'step': 15239, 'epoch': 2}
{'type': 'loss', 'content': 0.03514905646443367, 'timestamp': '2025-10-02 00:37:51.331882', 'step': 15240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:37:51.390622', 'step': 15240, 'epoch': 2}
{'type': 'loss', 'content': 0.1334211677312851, 'timestamp': '2025-10-02 00:37:51.393143', 'step': 15241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:51.453639', 'step': 15241, 'epoch': 2}
{'type': 'loss', 'content': 0.07657494395971298, 'timestamp': '2025-10-02 00:37:51.456434', 'step': 15242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:51.511125', 'step': 15242, 'epoch': 2}
{'type': 'loss', 'content': 0.06831586360931396, 'timestamp': '2025-10-02 00:37:51.513375', 'step': 15243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:51.567599', 'step': 15243, 'epoch': 2}
{'type': 'loss', 'content': 0.06316287815570831, 'timestamp': '2025-10-02 00:37:51.573585', 'step': 15244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:51.628727', 'step': 15244, 'epoch': 2}
{'type': 'loss', 'content': 0.0009478723513893783, 'timestamp': '2025-10-02 00:37:51.634871', 'step': 15245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:51.689022', 'step': 15245, 'epoch': 2}
{'type': 'loss', 'content': 0.05394705384969711, 'timestamp': '2025-10-02 00:37:51.691440', 'step': 15246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:51.750495', 'step': 15246, 'epoch': 2}
{'type': 'loss', 'content': 0.04127730429172516, 'timestamp': '2025-10-02 00:37:51.760709', 'step': 15247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:51.815830', 'step': 15247, 'epoch': 2}
{'type': 'loss', 'content': 0.01471866574138403, 'timestamp': '2025-10-02 00:37:51.825995', 'step': 15248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:51.879068', 'step': 15248, 'epoch': 2}
{'type': 'loss', 'content': 0.054733745753765106, 'timestamp': '2025-10-02 00:37:51.881454', 'step': 15249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:51.934862', 'step': 15249, 'epoch': 2}
{'type': 'loss', 'content': 0.18190892040729523, 'timestamp': '2025-10-02 00:37:51.937284', 'step': 15250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:51.997157', 'step': 15250, 'epoch': 2}
{'type': 'loss', 'content': 0.11901827901601791, 'timestamp': '2025-10-02 00:37:52.007347', 'step': 15251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:52.061880', 'step': 15251, 'epoch': 2}
{'type': 'loss', 'content': 0.026205062866210938, 'timestamp': '2025-10-02 00:37:52.070381', 'step': 15252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:52.123926', 'step': 15252, 'epoch': 2}
{'type': 'loss', 'content': 0.042764145880937576, 'timestamp': '2025-10-02 00:37:52.131787', 'step': 15253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:52.185961', 'step': 15253, 'epoch': 2}
{'type': 'loss', 'content': 0.05152652785181999, 'timestamp': '2025-10-02 00:37:52.188283', 'step': 15254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:52.242456', 'step': 15254, 'epoch': 2}
{'type': 'loss', 'content': 0.04453735053539276, 'timestamp': '2025-10-02 00:37:52.245047', 'step': 15255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:52.300308', 'step': 15255, 'epoch': 2}
{'type': 'loss', 'content': 0.06288935989141464, 'timestamp': '2025-10-02 00:37:52.310640', 'step': 15256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:52.364367', 'step': 15256, 'epoch': 2}
{'type': 'loss', 'content': 0.04338252916932106, 'timestamp': '2025-10-02 00:37:52.366633', 'step': 15257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:52.420485', 'step': 15257, 'epoch': 2}
{'type': 'loss', 'content': 0.051916446536779404, 'timestamp': '2025-10-02 00:37:52.423245', 'step': 15258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:52.477918', 'step': 15258, 'epoch': 2}
{'type': 'loss', 'content': 0.18652234971523285, 'timestamp': '2025-10-02 00:37:52.483243', 'step': 15259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:52.540909', 'step': 15259, 'epoch': 2}
{'type': 'loss', 'content': 0.15552370250225067, 'timestamp': '2025-10-02 00:37:52.550074', 'step': 15260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:52.621579', 'step': 15260, 'epoch': 2}
{'type': 'loss', 'content': 0.05776296555995941, 'timestamp': '2025-10-02 00:37:52.623816', 'step': 15261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:52.678586', 'step': 15261, 'epoch': 2}
{'type': 'loss', 'content': 0.03749058395624161, 'timestamp': '2025-10-02 00:37:52.681191', 'step': 15262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:52.735273', 'step': 15262, 'epoch': 2}
{'type': 'loss', 'content': 0.08856961876153946, 'timestamp': '2025-10-02 00:37:52.737975', 'step': 15263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:37:52.795198', 'step': 15263, 'epoch': 2}
{'type': 'loss', 'content': 0.06000259146094322, 'timestamp': '2025-10-02 00:37:52.805348', 'step': 15264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:52.859731', 'step': 15264, 'epoch': 2}
{'type': 'loss', 'content': 0.09212107956409454, 'timestamp': '2025-10-02 00:37:52.869995', 'step': 15265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:52.924798', 'step': 15265, 'epoch': 2}
{'type': 'loss', 'content': 0.02109557017683983, 'timestamp': '2025-10-02 00:37:52.927335', 'step': 15266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:52.981197', 'step': 15266, 'epoch': 2}
{'type': 'loss', 'content': 0.10488192737102509, 'timestamp': '2025-10-02 00:37:52.983926', 'step': 15267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:53.038268', 'step': 15267, 'epoch': 2}
{'type': 'loss', 'content': 0.05949069559574127, 'timestamp': '2025-10-02 00:37:53.047003', 'step': 15268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:53.110202', 'step': 15268, 'epoch': 2}
{'type': 'loss', 'content': 0.06334037333726883, 'timestamp': '2025-10-02 00:37:53.112838', 'step': 15269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:53.166816', 'step': 15269, 'epoch': 2}
{'type': 'loss', 'content': 0.1598314493894577, 'timestamp': '2025-10-02 00:37:53.169367', 'step': 15270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:53.224024', 'step': 15270, 'epoch': 2}
{'type': 'loss', 'content': 0.09436865150928497, 'timestamp': '2025-10-02 00:37:53.227222', 'step': 15271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:37:53.282271', 'step': 15271, 'epoch': 2}
{'type': 'loss', 'content': 0.01775047928094864, 'timestamp': '2025-10-02 00:37:53.290818', 'step': 15272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:53.345359', 'step': 15272, 'epoch': 2}
{'type': 'loss', 'content': 0.08398996293544769, 'timestamp': '2025-10-02 00:37:53.348000', 'step': 15273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:53.407468', 'step': 15273, 'epoch': 2}
{'type': 'loss', 'content': 0.028907353058457375, 'timestamp': '2025-10-02 00:37:53.417668', 'step': 15274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:53.472060', 'step': 15274, 'epoch': 2}
{'type': 'loss', 'content': 0.05769205838441849, 'timestamp': '2025-10-02 00:37:53.474436', 'step': 15275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:53.528199', 'step': 15275, 'epoch': 2}
{'type': 'loss', 'content': 0.17448627948760986, 'timestamp': '2025-10-02 00:37:53.534168', 'step': 15276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:53.588856', 'step': 15276, 'epoch': 2}
{'type': 'loss', 'content': 0.046444181352853775, 'timestamp': '2025-10-02 00:37:53.590985', 'step': 15277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:53.645672', 'step': 15277, 'epoch': 2}
{'type': 'loss', 'content': 0.09883319586515427, 'timestamp': '2025-10-02 00:37:53.652238', 'step': 15278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:53.724045', 'step': 15278, 'epoch': 2}
{'type': 'loss', 'content': 0.011271479539573193, 'timestamp': '2025-10-02 00:37:53.734238', 'step': 15279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:53.804195', 'step': 15279, 'epoch': 2}
{'type': 'loss', 'content': 0.12246018648147583, 'timestamp': '2025-10-02 00:37:53.816762', 'step': 15280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:53.871345', 'step': 15280, 'epoch': 2}
{'type': 'loss', 'content': 0.05245542153716087, 'timestamp': '2025-10-02 00:37:53.873811', 'step': 15281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:53.927616', 'step': 15281, 'epoch': 2}
{'type': 'loss', 'content': 0.09359610825777054, 'timestamp': '2025-10-02 00:37:53.929934', 'step': 15282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:53.984439', 'step': 15282, 'epoch': 2}
{'type': 'loss', 'content': 0.06057611107826233, 'timestamp': '2025-10-02 00:37:53.987006', 'step': 15283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:37:54.042676', 'step': 15283, 'epoch': 2}
{'type': 'loss', 'content': 0.18366725742816925, 'timestamp': '2025-10-02 00:37:54.049105', 'step': 15284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:54.102482', 'step': 15284, 'epoch': 2}
{'type': 'loss', 'content': 0.037151798605918884, 'timestamp': '2025-10-02 00:37:54.104928', 'step': 15285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:54.159951', 'step': 15285, 'epoch': 2}
{'type': 'loss', 'content': 0.020548228174448013, 'timestamp': '2025-10-02 00:37:54.165882', 'step': 15286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:54.219920', 'step': 15286, 'epoch': 2}
{'type': 'loss', 'content': 0.10053032636642456, 'timestamp': '2025-10-02 00:37:54.222121', 'step': 15287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:37:54.292123', 'step': 15287, 'epoch': 2}
{'type': 'loss', 'content': 0.04294045269489288, 'timestamp': '2025-10-02 00:37:54.305337', 'step': 15288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:54.359513', 'step': 15288, 'epoch': 2}
{'type': 'loss', 'content': 0.07028532773256302, 'timestamp': '2025-10-02 00:37:54.362843', 'step': 15289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:54.423528', 'step': 15289, 'epoch': 2}
{'type': 'loss', 'content': 0.024991262704133987, 'timestamp': '2025-10-02 00:37:54.433736', 'step': 15290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:54.488116', 'step': 15290, 'epoch': 2}
{'type': 'loss', 'content': 0.15185438096523285, 'timestamp': '2025-10-02 00:37:54.490636', 'step': 15291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:37:54.547286', 'step': 15291, 'epoch': 2}
{'type': 'loss', 'content': 0.0674622654914856, 'timestamp': '2025-10-02 00:37:54.555131', 'step': 15292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:54.608161', 'step': 15292, 'epoch': 2}
{'type': 'loss', 'content': 0.051279813051223755, 'timestamp': '2025-10-02 00:37:54.611459', 'step': 15293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:54.666206', 'step': 15293, 'epoch': 2}
{'type': 'loss', 'content': 0.037080250680446625, 'timestamp': '2025-10-02 00:37:54.669912', 'step': 15294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:54.726600', 'step': 15294, 'epoch': 2}
{'type': 'loss', 'content': 0.09764603525400162, 'timestamp': '2025-10-02 00:37:54.729117', 'step': 15295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:54.786149', 'step': 15295, 'epoch': 2}
{'type': 'loss', 'content': 0.11493968218564987, 'timestamp': '2025-10-02 00:37:54.795209', 'step': 15296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:37:54.853468', 'step': 15296, 'epoch': 2}
{'type': 'loss', 'content': 0.08507468551397324, 'timestamp': '2025-10-02 00:37:54.860301', 'step': 15297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:37:54.917202', 'step': 15297, 'epoch': 2}
{'type': 'loss', 'content': 0.12417377531528473, 'timestamp': '2025-10-02 00:37:54.919750', 'step': 15298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:54.974731', 'step': 15298, 'epoch': 2}
{'type': 'loss', 'content': 0.07028953731060028, 'timestamp': '2025-10-02 00:37:54.980858', 'step': 15299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:55.054297', 'step': 15299, 'epoch': 2}
{'type': 'loss', 'content': 0.0634472519159317, 'timestamp': '2025-10-02 00:37:55.065251', 'step': 15300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:55.121076', 'step': 15300, 'epoch': 2}
{'type': 'loss', 'content': 0.0660417303442955, 'timestamp': '2025-10-02 00:37:55.127193', 'step': 15301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:55.183694', 'step': 15301, 'epoch': 2}
{'type': 'loss', 'content': 0.048270322382450104, 'timestamp': '2025-10-02 00:37:55.189943', 'step': 15302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:37:55.248288', 'step': 15302, 'epoch': 2}
{'type': 'loss', 'content': 0.02598637156188488, 'timestamp': '2025-10-02 00:37:55.257848', 'step': 15303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:37:55.314612', 'step': 15303, 'epoch': 2}
{'type': 'loss', 'content': 0.04879371076822281, 'timestamp': '2025-10-02 00:37:55.321548', 'step': 15304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:37:55.377566', 'step': 15304, 'epoch': 2}
{'type': 'loss', 'content': 0.08096455037593842, 'timestamp': '2025-10-02 00:37:55.381463', 'step': 15305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:37:55.441289', 'step': 15305, 'epoch': 2}
{'type': 'loss', 'content': 0.09590711444616318, 'timestamp': '2025-10-02 00:37:55.444227', 'step': 15306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:37:55.501085', 'step': 15306, 'epoch': 2}
{'type': 'loss', 'content': 0.12291954457759857, 'timestamp': '2025-10-02 00:37:55.504250', 'step': 15307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:37:55.565481', 'step': 15307, 'epoch': 2}
{'type': 'loss', 'content': 0.07678557932376862, 'timestamp': '2025-10-02 00:37:55.576447', 'step': 15308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:37:55.631148', 'step': 15308, 'epoch': 2}
{'type': 'loss', 'content': 0.11734859645366669, 'timestamp': '2025-10-02 00:37:55.634474', 'step': 15309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:37:55.691200', 'step': 15309, 'epoch': 2}
{'type': 'loss', 'content': 0.06252829730510712, 'timestamp': '2025-10-02 00:37:55.694380', 'step': 15310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:37:55.752966', 'step': 15310, 'epoch': 2}
{'type': 'loss', 'content': 0.13698948919773102, 'timestamp': '2025-10-02 00:37:55.758703', 'step': 15311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:37:55.818686', 'step': 15311, 'epoch': 2}
{'type': 'loss', 'content': 0.09592103213071823, 'timestamp': '2025-10-02 00:37:55.825464', 'step': 15312, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:38:22.877913', 'step': 15312, 'epoch': 2}
{'type': 'pplx', 'content': 97.80141562917117, 'timestamp': '2025-10-02 00:38:22.881914', 'step': 15312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:22.936405', 'step': 15312, 'epoch': 2}
{'type': 'loss', 'content': 0.038709815591573715, 'timestamp': '2025-10-02 00:38:22.939732', 'step': 15313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:22.995155', 'step': 15313, 'epoch': 2}
{'type': 'loss', 'content': 0.12120141834020615, 'timestamp': '2025-10-02 00:38:22.999517', 'step': 15314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:23.054276', 'step': 15314, 'epoch': 2}
{'type': 'loss', 'content': 0.05744970962405205, 'timestamp': '2025-10-02 00:38:23.060291', 'step': 15315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:23.118487', 'step': 15315, 'epoch': 2}
{'type': 'loss', 'content': 0.013115741312503815, 'timestamp': '2025-10-02 00:38:23.125660', 'step': 15316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:23.181792', 'step': 15316, 'epoch': 2}
{'type': 'loss', 'content': 0.09315776824951172, 'timestamp': '2025-10-02 00:38:23.185003', 'step': 15317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:23.243197', 'step': 15317, 'epoch': 2}
{'type': 'loss', 'content': 0.03815140947699547, 'timestamp': '2025-10-02 00:38:23.252783', 'step': 15318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:23.311315', 'step': 15318, 'epoch': 2}
{'type': 'loss', 'content': 0.06695935130119324, 'timestamp': '2025-10-02 00:38:23.314482', 'step': 15319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:23.373721', 'step': 15319, 'epoch': 2}
{'type': 'loss', 'content': 0.04757852479815483, 'timestamp': '2025-10-02 00:38:23.383886', 'step': 15320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:23.439633', 'step': 15320, 'epoch': 2}
{'type': 'loss', 'content': 0.05227711424231529, 'timestamp': '2025-10-02 00:38:23.445909', 'step': 15321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:23.501298', 'step': 15321, 'epoch': 2}
{'type': 'loss', 'content': 0.16378764808177948, 'timestamp': '2025-10-02 00:38:23.510038', 'step': 15322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:23.580373', 'step': 15322, 'epoch': 2}
{'type': 'loss', 'content': 0.052174944430589676, 'timestamp': '2025-10-02 00:38:23.590612', 'step': 15323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:23.659136', 'step': 15323, 'epoch': 2}
{'type': 'loss', 'content': 0.03181422874331474, 'timestamp': '2025-10-02 00:38:23.666041', 'step': 15324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:23.722264', 'step': 15324, 'epoch': 2}
{'type': 'loss', 'content': 0.08524849265813828, 'timestamp': '2025-10-02 00:38:23.724652', 'step': 15325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:38:23.801761', 'step': 15325, 'epoch': 2}
{'type': 'loss', 'content': 0.059049222618341446, 'timestamp': '2025-10-02 00:38:23.815017', 'step': 15326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:23.872787', 'step': 15326, 'epoch': 2}
{'type': 'loss', 'content': 0.07648871093988419, 'timestamp': '2025-10-02 00:38:23.875632', 'step': 15327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:23.937230', 'step': 15327, 'epoch': 2}
{'type': 'loss', 'content': 0.11281061917543411, 'timestamp': '2025-10-02 00:38:23.945905', 'step': 15328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:24.001272', 'step': 15328, 'epoch': 2}
{'type': 'loss', 'content': 0.0403088703751564, 'timestamp': '2025-10-02 00:38:24.005872', 'step': 15329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:24.067948', 'step': 15329, 'epoch': 2}
{'type': 'loss', 'content': 0.0186957698315382, 'timestamp': '2025-10-02 00:38:24.074070', 'step': 15330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:24.132007', 'step': 15330, 'epoch': 2}
{'type': 'loss', 'content': 0.09774141013622284, 'timestamp': '2025-10-02 00:38:24.133932', 'step': 15331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:24.191699', 'step': 15331, 'epoch': 2}
{'type': 'loss', 'content': 0.02334955520927906, 'timestamp': '2025-10-02 00:38:24.198660', 'step': 15332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:24.258770', 'step': 15332, 'epoch': 2}
{'type': 'loss', 'content': 0.008965050801634789, 'timestamp': '2025-10-02 00:38:24.270078', 'step': 15333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:24.327812', 'step': 15333, 'epoch': 2}
{'type': 'loss', 'content': 0.09517160058021545, 'timestamp': '2025-10-02 00:38:24.332370', 'step': 15334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:24.387576', 'step': 15334, 'epoch': 2}
{'type': 'loss', 'content': 0.04733593761920929, 'timestamp': '2025-10-02 00:38:24.390240', 'step': 15335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:24.444824', 'step': 15335, 'epoch': 2}
{'type': 'loss', 'content': 0.024778928607702255, 'timestamp': '2025-10-02 00:38:24.452425', 'step': 15336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:24.510806', 'step': 15336, 'epoch': 2}
{'type': 'loss', 'content': 0.14246022701263428, 'timestamp': '2025-10-02 00:38:24.515463', 'step': 15337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:24.571586', 'step': 15337, 'epoch': 2}
{'type': 'loss', 'content': 0.03400164470076561, 'timestamp': '2025-10-02 00:38:24.575024', 'step': 15338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:24.632926', 'step': 15338, 'epoch': 2}
{'type': 'loss', 'content': 0.0574299618601799, 'timestamp': '2025-10-02 00:38:24.636530', 'step': 15339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:24.693830', 'step': 15339, 'epoch': 2}
{'type': 'loss', 'content': 0.05757201835513115, 'timestamp': '2025-10-02 00:38:24.701302', 'step': 15340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:24.758802', 'step': 15340, 'epoch': 2}
{'type': 'loss', 'content': 0.03887449577450752, 'timestamp': '2025-10-02 00:38:24.761503', 'step': 15341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:38:24.824577', 'step': 15341, 'epoch': 2}
{'type': 'loss', 'content': 0.030260466039180756, 'timestamp': '2025-10-02 00:38:24.835247', 'step': 15342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:24.890102', 'step': 15342, 'epoch': 2}
{'type': 'loss', 'content': 0.1712564378976822, 'timestamp': '2025-10-02 00:38:24.894708', 'step': 15343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:24.950982', 'step': 15343, 'epoch': 2}
{'type': 'loss', 'content': 0.1541978269815445, 'timestamp': '2025-10-02 00:38:24.957731', 'step': 15344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:25.018628', 'step': 15344, 'epoch': 2}
{'type': 'loss', 'content': 0.0037582903169095516, 'timestamp': '2025-10-02 00:38:25.029665', 'step': 15345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:25.088481', 'step': 15345, 'epoch': 2}
{'type': 'loss', 'content': 0.06312170624732971, 'timestamp': '2025-10-02 00:38:25.097845', 'step': 15346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:25.155594', 'step': 15346, 'epoch': 2}
{'type': 'loss', 'content': 0.033483315259218216, 'timestamp': '2025-10-02 00:38:25.161691', 'step': 15347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:25.219857', 'step': 15347, 'epoch': 2}
{'type': 'loss', 'content': 0.04663567990064621, 'timestamp': '2025-10-02 00:38:25.225923', 'step': 15348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:25.281663', 'step': 15348, 'epoch': 2}
{'type': 'loss', 'content': 0.07831844687461853, 'timestamp': '2025-10-02 00:38:25.285540', 'step': 15349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:38:25.362283', 'step': 15349, 'epoch': 2}
{'type': 'loss', 'content': 0.06485907733440399, 'timestamp': '2025-10-02 00:38:25.375747', 'step': 15350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:25.443279', 'step': 15350, 'epoch': 2}
{'type': 'loss', 'content': 0.10629931837320328, 'timestamp': '2025-10-02 00:38:25.449481', 'step': 15351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:25.507314', 'step': 15351, 'epoch': 2}
{'type': 'loss', 'content': 0.021610427647829056, 'timestamp': '2025-10-02 00:38:25.513804', 'step': 15352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:25.567816', 'step': 15352, 'epoch': 2}
{'type': 'loss', 'content': 0.10588156431913376, 'timestamp': '2025-10-02 00:38:25.578082', 'step': 15353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:25.632593', 'step': 15353, 'epoch': 2}
{'type': 'loss', 'content': 0.13284161686897278, 'timestamp': '2025-10-02 00:38:25.634979', 'step': 15354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:25.688913', 'step': 15354, 'epoch': 2}
{'type': 'loss', 'content': 0.07795911282300949, 'timestamp': '2025-10-02 00:38:25.695272', 'step': 15355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:25.750489', 'step': 15355, 'epoch': 2}
{'type': 'loss', 'content': 0.06612370908260345, 'timestamp': '2025-10-02 00:38:25.756515', 'step': 15356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:25.810419', 'step': 15356, 'epoch': 2}
{'type': 'loss', 'content': 0.11835779249668121, 'timestamp': '2025-10-02 00:38:25.813082', 'step': 15357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:25.868636', 'step': 15357, 'epoch': 2}
{'type': 'loss', 'content': 0.00247497227974236, 'timestamp': '2025-10-02 00:38:25.876469', 'step': 15358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:25.931005', 'step': 15358, 'epoch': 2}
{'type': 'loss', 'content': 0.06095843017101288, 'timestamp': '2025-10-02 00:38:25.933841', 'step': 15359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:25.989093', 'step': 15359, 'epoch': 2}
{'type': 'loss', 'content': 0.05941005051136017, 'timestamp': '2025-10-02 00:38:25.999412', 'step': 15360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:26.054190', 'step': 15360, 'epoch': 2}
{'type': 'loss', 'content': 0.1018325611948967, 'timestamp': '2025-10-02 00:38:26.056629', 'step': 15361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:26.111641', 'step': 15361, 'epoch': 2}
{'type': 'loss', 'content': 0.03252587839961052, 'timestamp': '2025-10-02 00:38:26.121151', 'step': 15362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:26.175337', 'step': 15362, 'epoch': 2}
{'type': 'loss', 'content': 0.07848669588565826, 'timestamp': '2025-10-02 00:38:26.177887', 'step': 15363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:26.231952', 'step': 15363, 'epoch': 2}
{'type': 'loss', 'content': 0.11585862934589386, 'timestamp': '2025-10-02 00:38:26.237884', 'step': 15364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:26.290916', 'step': 15364, 'epoch': 2}
{'type': 'loss', 'content': 0.03679448738694191, 'timestamp': '2025-10-02 00:38:26.293586', 'step': 15365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:38:26.348413', 'step': 15365, 'epoch': 2}
{'type': 'loss', 'content': 0.05992823839187622, 'timestamp': '2025-10-02 00:38:26.350726', 'step': 15366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:26.404812', 'step': 15366, 'epoch': 2}
{'type': 'loss', 'content': 0.06319188326597214, 'timestamp': '2025-10-02 00:38:26.414207', 'step': 15367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:26.469123', 'step': 15367, 'epoch': 2}
{'type': 'loss', 'content': 0.052515652030706406, 'timestamp': '2025-10-02 00:38:26.476402', 'step': 15368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:26.529693', 'step': 15368, 'epoch': 2}
{'type': 'loss', 'content': 0.12821416556835175, 'timestamp': '2025-10-02 00:38:26.532039', 'step': 15369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:26.586869', 'step': 15369, 'epoch': 2}
{'type': 'loss', 'content': 0.008926107548177242, 'timestamp': '2025-10-02 00:38:26.589565', 'step': 15370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:26.651108', 'step': 15370, 'epoch': 2}
{'type': 'loss', 'content': 0.017180802300572395, 'timestamp': '2025-10-02 00:38:26.661646', 'step': 15371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:26.716676', 'step': 15371, 'epoch': 2}
{'type': 'loss', 'content': 0.030470702797174454, 'timestamp': '2025-10-02 00:38:26.723841', 'step': 15372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:26.777792', 'step': 15372, 'epoch': 2}
{'type': 'loss', 'content': 0.0646006241440773, 'timestamp': '2025-10-02 00:38:26.783957', 'step': 15373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:26.837781', 'step': 15373, 'epoch': 2}
{'type': 'loss', 'content': 0.026401281356811523, 'timestamp': '2025-10-02 00:38:26.840615', 'step': 15374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:26.894384', 'step': 15374, 'epoch': 2}
{'type': 'loss', 'content': 0.039869535714387894, 'timestamp': '2025-10-02 00:38:26.897147', 'step': 15375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:26.951713', 'step': 15375, 'epoch': 2}
{'type': 'loss', 'content': 0.031341370195150375, 'timestamp': '2025-10-02 00:38:26.958759', 'step': 15376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:27.014151', 'step': 15376, 'epoch': 2}
{'type': 'loss', 'content': 0.022500107064843178, 'timestamp': '2025-10-02 00:38:27.024457', 'step': 15377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:27.078861', 'step': 15377, 'epoch': 2}
{'type': 'loss', 'content': 0.07076113671064377, 'timestamp': '2025-10-02 00:38:27.081555', 'step': 15378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:27.135767', 'step': 15378, 'epoch': 2}
{'type': 'loss', 'content': 0.0018667818512767553, 'timestamp': '2025-10-02 00:38:27.138499', 'step': 15379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:27.191655', 'step': 15379, 'epoch': 2}
{'type': 'loss', 'content': 0.032744888216257095, 'timestamp': '2025-10-02 00:38:27.197569', 'step': 15380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:27.250195', 'step': 15380, 'epoch': 2}
{'type': 'loss', 'content': 0.05317637696862221, 'timestamp': '2025-10-02 00:38:27.252920', 'step': 15381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:27.306741', 'step': 15381, 'epoch': 2}
{'type': 'loss', 'content': 0.05282206833362579, 'timestamp': '2025-10-02 00:38:27.309490', 'step': 15382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:27.364913', 'step': 15382, 'epoch': 2}
{'type': 'loss', 'content': 0.12394525110721588, 'timestamp': '2025-10-02 00:38:27.367341', 'step': 15383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:27.422786', 'step': 15383, 'epoch': 2}
{'type': 'loss', 'content': 0.028308546170592308, 'timestamp': '2025-10-02 00:38:27.431492', 'step': 15384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:27.485115', 'step': 15384, 'epoch': 2}
{'type': 'loss', 'content': 0.026726188138127327, 'timestamp': '2025-10-02 00:38:27.492952', 'step': 15385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:27.546992', 'step': 15385, 'epoch': 2}
{'type': 'loss', 'content': 0.03591803088784218, 'timestamp': '2025-10-02 00:38:27.549517', 'step': 15386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:27.603178', 'step': 15386, 'epoch': 2}
{'type': 'loss', 'content': 0.040218744426965714, 'timestamp': '2025-10-02 00:38:27.605792', 'step': 15387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:38:27.660322', 'step': 15387, 'epoch': 2}
{'type': 'loss', 'content': 0.0874037966132164, 'timestamp': '2025-10-02 00:38:27.666424', 'step': 15388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:27.719633', 'step': 15388, 'epoch': 2}
{'type': 'loss', 'content': 0.01965947262942791, 'timestamp': '2025-10-02 00:38:27.727597', 'step': 15389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:27.786530', 'step': 15389, 'epoch': 2}
{'type': 'loss', 'content': 0.021487312391400337, 'timestamp': '2025-10-02 00:38:27.796741', 'step': 15390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:38:27.851232', 'step': 15390, 'epoch': 2}
{'type': 'loss', 'content': 0.080620676279068, 'timestamp': '2025-10-02 00:38:27.855421', 'step': 15391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:27.909274', 'step': 15391, 'epoch': 2}
{'type': 'loss', 'content': 0.12936067581176758, 'timestamp': '2025-10-02 00:38:27.915424', 'step': 15392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:27.968765', 'step': 15392, 'epoch': 2}
{'type': 'loss', 'content': 0.09780459851026535, 'timestamp': '2025-10-02 00:38:27.971452', 'step': 15393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:28.029941', 'step': 15393, 'epoch': 2}
{'type': 'loss', 'content': 0.10735401511192322, 'timestamp': '2025-10-02 00:38:28.040166', 'step': 15394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:28.094558', 'step': 15394, 'epoch': 2}
{'type': 'loss', 'content': 0.048353563994169235, 'timestamp': '2025-10-02 00:38:28.097239', 'step': 15395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:28.151373', 'step': 15395, 'epoch': 2}
{'type': 'loss', 'content': 0.06188672035932541, 'timestamp': '2025-10-02 00:38:28.157160', 'step': 15396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:28.210866', 'step': 15396, 'epoch': 2}
{'type': 'loss', 'content': 0.05163300409913063, 'timestamp': '2025-10-02 00:38:28.220531', 'step': 15397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:38:28.274162', 'step': 15397, 'epoch': 2}
{'type': 'loss', 'content': 0.03118997998535633, 'timestamp': '2025-10-02 00:38:28.276646', 'step': 15398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:28.330336', 'step': 15398, 'epoch': 2}
{'type': 'loss', 'content': 0.07609688490629196, 'timestamp': '2025-10-02 00:38:28.332814', 'step': 15399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:28.388586', 'step': 15399, 'epoch': 2}
{'type': 'loss', 'content': 0.10395137965679169, 'timestamp': '2025-10-02 00:38:28.396537', 'step': 15400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:28.454746', 'step': 15400, 'epoch': 2}
{'type': 'loss', 'content': 0.06096804514527321, 'timestamp': '2025-10-02 00:38:28.472699', 'step': 15401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:28.536238', 'step': 15401, 'epoch': 2}
{'type': 'loss', 'content': 0.09680910408496857, 'timestamp': '2025-10-02 00:38:28.542984', 'step': 15402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:28.624732', 'step': 15402, 'epoch': 2}
{'type': 'loss', 'content': 0.03454482555389404, 'timestamp': '2025-10-02 00:38:28.634933', 'step': 15403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:28.690385', 'step': 15403, 'epoch': 2}
{'type': 'loss', 'content': 0.1418399065732956, 'timestamp': '2025-10-02 00:38:28.696682', 'step': 15404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:28.749694', 'step': 15404, 'epoch': 2}
{'type': 'loss', 'content': 0.21536247432231903, 'timestamp': '2025-10-02 00:38:28.752116', 'step': 15405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:28.805796', 'step': 15405, 'epoch': 2}
{'type': 'loss', 'content': 0.019076036289334297, 'timestamp': '2025-10-02 00:38:28.808498', 'step': 15406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:38:28.875401', 'step': 15406, 'epoch': 2}
{'type': 'loss', 'content': 0.03470985218882561, 'timestamp': '2025-10-02 00:38:28.887332', 'step': 15407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:28.942922', 'step': 15407, 'epoch': 2}
{'type': 'loss', 'content': 0.07413703203201294, 'timestamp': '2025-10-02 00:38:28.949526', 'step': 15408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:38:29.021154', 'step': 15408, 'epoch': 2}
{'type': 'loss', 'content': 0.03708537667989731, 'timestamp': '2025-10-02 00:38:29.035584', 'step': 15409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:29.089935', 'step': 15409, 'epoch': 2}
{'type': 'loss', 'content': 0.01530369557440281, 'timestamp': '2025-10-02 00:38:29.096109', 'step': 15410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:29.150881', 'step': 15410, 'epoch': 2}
{'type': 'loss', 'content': 0.038881201297044754, 'timestamp': '2025-10-02 00:38:29.158627', 'step': 15411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:29.212650', 'step': 15411, 'epoch': 2}
{'type': 'loss', 'content': 0.007340952288359404, 'timestamp': '2025-10-02 00:38:29.221277', 'step': 15412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:38:29.282377', 'step': 15412, 'epoch': 2}
{'type': 'loss', 'content': 0.03590147942304611, 'timestamp': '2025-10-02 00:38:29.294130', 'step': 15413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:38:29.363248', 'step': 15413, 'epoch': 2}
{'type': 'loss', 'content': 0.033630602061748505, 'timestamp': '2025-10-02 00:38:29.375586', 'step': 15414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:29.430272', 'step': 15414, 'epoch': 2}
{'type': 'loss', 'content': 0.019769249483942986, 'timestamp': '2025-10-02 00:38:29.433233', 'step': 15415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:29.487126', 'step': 15415, 'epoch': 2}
{'type': 'loss', 'content': 0.03469080105423927, 'timestamp': '2025-10-02 00:38:29.493003', 'step': 15416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:38:29.545346', 'step': 15416, 'epoch': 2}
{'type': 'loss', 'content': 0.14817608892917633, 'timestamp': '2025-10-02 00:38:29.548291', 'step': 15417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:38:29.612024', 'step': 15417, 'epoch': 2}
{'type': 'loss', 'content': 0.037297967821359634, 'timestamp': '2025-10-02 00:38:29.622881', 'step': 15418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:29.676948', 'step': 15418, 'epoch': 2}
{'type': 'loss', 'content': 0.0710211843252182, 'timestamp': '2025-10-02 00:38:29.679393', 'step': 15419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:29.732891', 'step': 15419, 'epoch': 2}
{'type': 'loss', 'content': 0.0403938889503479, 'timestamp': '2025-10-02 00:38:29.738938', 'step': 15420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:29.792537', 'step': 15420, 'epoch': 2}
{'type': 'loss', 'content': 0.2387557327747345, 'timestamp': '2025-10-02 00:38:29.794912', 'step': 15421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:38:29.856638', 'step': 15421, 'epoch': 2}
{'type': 'loss', 'content': 0.02071697637438774, 'timestamp': '2025-10-02 00:38:29.867285', 'step': 15422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:29.923326', 'step': 15422, 'epoch': 2}
{'type': 'loss', 'content': 0.06707856804132462, 'timestamp': '2025-10-02 00:38:29.925817', 'step': 15423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:29.981750', 'step': 15423, 'epoch': 2}
{'type': 'loss', 'content': 0.02883494645357132, 'timestamp': '2025-10-02 00:38:29.988025', 'step': 15424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:30.041860', 'step': 15424, 'epoch': 2}
{'type': 'loss', 'content': 0.02552226185798645, 'timestamp': '2025-10-02 00:38:30.044122', 'step': 15425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:30.098145', 'step': 15425, 'epoch': 2}
{'type': 'loss', 'content': 0.17841967940330505, 'timestamp': '2025-10-02 00:38:30.100886', 'step': 15426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:30.154645', 'step': 15426, 'epoch': 2}
{'type': 'loss', 'content': 0.04835133254528046, 'timestamp': '2025-10-02 00:38:30.156903', 'step': 15427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:30.211012', 'step': 15427, 'epoch': 2}
{'type': 'loss', 'content': 0.1532912701368332, 'timestamp': '2025-10-02 00:38:30.217092', 'step': 15428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:30.276593', 'step': 15428, 'epoch': 2}
{'type': 'loss', 'content': 0.055065255612134933, 'timestamp': '2025-10-02 00:38:30.287844', 'step': 15429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:38:30.364188', 'step': 15429, 'epoch': 2}
{'type': 'loss', 'content': 0.01158928219228983, 'timestamp': '2025-10-02 00:38:30.377665', 'step': 15430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:30.432344', 'step': 15430, 'epoch': 2}
{'type': 'loss', 'content': 0.10570862144231796, 'timestamp': '2025-10-02 00:38:30.434943', 'step': 15431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:30.488576', 'step': 15431, 'epoch': 2}
{'type': 'loss', 'content': 0.0648563876748085, 'timestamp': '2025-10-02 00:38:30.495009', 'step': 15432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:30.549645', 'step': 15432, 'epoch': 2}
{'type': 'loss', 'content': 0.10481469333171844, 'timestamp': '2025-10-02 00:38:30.552393', 'step': 15433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:30.606782', 'step': 15433, 'epoch': 2}
{'type': 'loss', 'content': 0.022296754643321037, 'timestamp': '2025-10-02 00:38:30.608973', 'step': 15434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:30.664422', 'step': 15434, 'epoch': 2}
{'type': 'loss', 'content': 0.08688095211982727, 'timestamp': '2025-10-02 00:38:30.666802', 'step': 15435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:30.721874', 'step': 15435, 'epoch': 2}
{'type': 'loss', 'content': 0.053471822291612625, 'timestamp': '2025-10-02 00:38:30.728095', 'step': 15436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:30.780389', 'step': 15436, 'epoch': 2}
{'type': 'loss', 'content': 0.07874203473329544, 'timestamp': '2025-10-02 00:38:30.782658', 'step': 15437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:30.836321', 'step': 15437, 'epoch': 2}
{'type': 'loss', 'content': 0.0833035483956337, 'timestamp': '2025-10-02 00:38:30.838558', 'step': 15438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:30.892128', 'step': 15438, 'epoch': 2}
{'type': 'loss', 'content': 0.07913926988840103, 'timestamp': '2025-10-02 00:38:30.894706', 'step': 15439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:30.952036', 'step': 15439, 'epoch': 2}
{'type': 'loss', 'content': 0.019506175071001053, 'timestamp': '2025-10-02 00:38:30.962189', 'step': 15440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:38:31.024027', 'step': 15440, 'epoch': 2}
{'type': 'loss', 'content': 0.04273778200149536, 'timestamp': '2025-10-02 00:38:31.035498', 'step': 15441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:31.088806', 'step': 15441, 'epoch': 2}
{'type': 'loss', 'content': 0.09362338483333588, 'timestamp': '2025-10-02 00:38:31.091048', 'step': 15442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:31.145417', 'step': 15442, 'epoch': 2}
{'type': 'loss', 'content': 0.04330391436815262, 'timestamp': '2025-10-02 00:38:31.153268', 'step': 15443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:31.207491', 'step': 15443, 'epoch': 2}
{'type': 'loss', 'content': 0.008023527450859547, 'timestamp': '2025-10-02 00:38:31.231033', 'step': 15444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:31.320473', 'step': 15444, 'epoch': 2}
{'type': 'loss', 'content': 0.062384262681007385, 'timestamp': '2025-10-02 00:38:31.335874', 'step': 15445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:38:31.410896', 'step': 15445, 'epoch': 2}
{'type': 'loss', 'content': 0.013154840096831322, 'timestamp': '2025-10-02 00:38:31.423004', 'step': 15446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:31.488976', 'step': 15446, 'epoch': 2}
{'type': 'loss', 'content': 0.08936498314142227, 'timestamp': '2025-10-02 00:38:31.498821', 'step': 15447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:31.569269', 'step': 15447, 'epoch': 2}
{'type': 'loss', 'content': 0.0488036647439003, 'timestamp': '2025-10-02 00:38:31.582141', 'step': 15448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:31.654368', 'step': 15448, 'epoch': 2}
{'type': 'loss', 'content': 0.05088532716035843, 'timestamp': '2025-10-02 00:38:31.664902', 'step': 15449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:31.729454', 'step': 15449, 'epoch': 2}
{'type': 'loss', 'content': 0.03983934968709946, 'timestamp': '2025-10-02 00:38:31.732560', 'step': 15450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:31.804792', 'step': 15450, 'epoch': 2}
{'type': 'loss', 'content': 0.023188920691609383, 'timestamp': '2025-10-02 00:38:31.815427', 'step': 15451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:31.894069', 'step': 15451, 'epoch': 2}
{'type': 'loss', 'content': 0.0317758210003376, 'timestamp': '2025-10-02 00:38:31.905390', 'step': 15452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:31.971581', 'step': 15452, 'epoch': 2}
{'type': 'loss', 'content': 0.046502090990543365, 'timestamp': '2025-10-02 00:38:31.973813', 'step': 15453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:32.041758', 'step': 15453, 'epoch': 2}
{'type': 'loss', 'content': 0.1235966831445694, 'timestamp': '2025-10-02 00:38:32.048222', 'step': 15454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:32.113876', 'step': 15454, 'epoch': 2}
{'type': 'loss', 'content': 0.03285575658082962, 'timestamp': '2025-10-02 00:38:32.121662', 'step': 15455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:32.194596', 'step': 15455, 'epoch': 2}
{'type': 'loss', 'content': 0.053589172661304474, 'timestamp': '2025-10-02 00:38:32.204040', 'step': 15456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:32.277862', 'step': 15456, 'epoch': 2}
{'type': 'loss', 'content': 0.055719632655382156, 'timestamp': '2025-10-02 00:38:32.289303', 'step': 15457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:32.358144', 'step': 15457, 'epoch': 2}
{'type': 'loss', 'content': 0.0802670419216156, 'timestamp': '2025-10-02 00:38:32.366753', 'step': 15458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:32.437149', 'step': 15458, 'epoch': 2}
{'type': 'loss', 'content': 0.049155015498399734, 'timestamp': '2025-10-02 00:38:32.443340', 'step': 15459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:32.512463', 'step': 15459, 'epoch': 2}
{'type': 'loss', 'content': 0.029224339872598648, 'timestamp': '2025-10-02 00:38:32.521117', 'step': 15460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:32.596959', 'step': 15460, 'epoch': 2}
{'type': 'loss', 'content': 0.01492970623075962, 'timestamp': '2025-10-02 00:38:32.608296', 'step': 15461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:32.674099', 'step': 15461, 'epoch': 2}
{'type': 'loss', 'content': 0.03562117740511894, 'timestamp': '2025-10-02 00:38:32.682299', 'step': 15462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:38:32.748553', 'step': 15462, 'epoch': 2}
{'type': 'loss', 'content': 0.11540171504020691, 'timestamp': '2025-10-02 00:38:32.755809', 'step': 15463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:38:32.842917', 'step': 15463, 'epoch': 2}
{'type': 'loss', 'content': 0.02085627242922783, 'timestamp': '2025-10-02 00:38:32.855673', 'step': 15464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:32.930274', 'step': 15464, 'epoch': 2}
{'type': 'loss', 'content': 0.06445220112800598, 'timestamp': '2025-10-02 00:38:32.936363', 'step': 15465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:33.012379', 'step': 15465, 'epoch': 2}
{'type': 'loss', 'content': 0.0206462349742651, 'timestamp': '2025-10-02 00:38:33.019894', 'step': 15466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:33.096915', 'step': 15466, 'epoch': 2}
{'type': 'loss', 'content': 0.02428906410932541, 'timestamp': '2025-10-02 00:38:33.109438', 'step': 15467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:33.181201', 'step': 15467, 'epoch': 2}
{'type': 'loss', 'content': 0.05432141199707985, 'timestamp': '2025-10-02 00:38:33.192812', 'step': 15468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:33.257470', 'step': 15468, 'epoch': 2}
{'type': 'loss', 'content': 0.09221605211496353, 'timestamp': '2025-10-02 00:38:33.264233', 'step': 15469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:33.337662', 'step': 15469, 'epoch': 2}
{'type': 'loss', 'content': 0.021054565906524658, 'timestamp': '2025-10-02 00:38:33.343922', 'step': 15470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:33.418150', 'step': 15470, 'epoch': 2}
{'type': 'loss', 'content': 0.039043691009283066, 'timestamp': '2025-10-02 00:38:33.429041', 'step': 15471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:33.498300', 'step': 15471, 'epoch': 2}
{'type': 'loss', 'content': 0.029918091371655464, 'timestamp': '2025-10-02 00:38:33.512508', 'step': 15472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:33.574890', 'step': 15472, 'epoch': 2}
{'type': 'loss', 'content': 0.15296503901481628, 'timestamp': '2025-10-02 00:38:33.584648', 'step': 15473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:33.660586', 'step': 15473, 'epoch': 2}
{'type': 'loss', 'content': 0.08229323476552963, 'timestamp': '2025-10-02 00:38:33.668721', 'step': 15474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:33.740179', 'step': 15474, 'epoch': 2}
{'type': 'loss', 'content': 0.11117954552173615, 'timestamp': '2025-10-02 00:38:33.742889', 'step': 15475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:33.807820', 'step': 15475, 'epoch': 2}
{'type': 'loss', 'content': 0.1043396070599556, 'timestamp': '2025-10-02 00:38:33.819832', 'step': 15476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:33.890709', 'step': 15476, 'epoch': 2}
{'type': 'loss', 'content': 0.038334935903549194, 'timestamp': '2025-10-02 00:38:33.900225', 'step': 15477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:33.976467', 'step': 15477, 'epoch': 2}
{'type': 'loss', 'content': 0.021636418998241425, 'timestamp': '2025-10-02 00:38:33.987483', 'step': 15478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:34.062134', 'step': 15478, 'epoch': 2}
{'type': 'loss', 'content': 0.031680431216955185, 'timestamp': '2025-10-02 00:38:34.071482', 'step': 15479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:34.144782', 'step': 15479, 'epoch': 2}
{'type': 'loss', 'content': 0.0022173882462084293, 'timestamp': '2025-10-02 00:38:34.158662', 'step': 15480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:34.218883', 'step': 15480, 'epoch': 2}
{'type': 'loss', 'content': 0.09170515090227127, 'timestamp': '2025-10-02 00:38:34.230779', 'step': 15481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:34.294833', 'step': 15481, 'epoch': 2}
{'type': 'loss', 'content': 0.08947587758302689, 'timestamp': '2025-10-02 00:38:34.305103', 'step': 15482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:34.383017', 'step': 15482, 'epoch': 2}
{'type': 'loss', 'content': 0.020847316831350327, 'timestamp': '2025-10-02 00:38:34.395320', 'step': 15483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:34.465610', 'step': 15483, 'epoch': 2}
{'type': 'loss', 'content': 0.028459593653678894, 'timestamp': '2025-10-02 00:38:34.479965', 'step': 15484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:34.536470', 'step': 15484, 'epoch': 2}
{'type': 'loss', 'content': 0.08733274042606354, 'timestamp': '2025-10-02 00:38:34.546169', 'step': 15485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:34.621255', 'step': 15485, 'epoch': 2}
{'type': 'loss', 'content': 0.18354102969169617, 'timestamp': '2025-10-02 00:38:34.633044', 'step': 15486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:34.706497', 'step': 15486, 'epoch': 2}
{'type': 'loss', 'content': 0.04538067802786827, 'timestamp': '2025-10-02 00:38:34.714271', 'step': 15487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:34.783360', 'step': 15487, 'epoch': 2}
{'type': 'loss', 'content': 0.08457401394844055, 'timestamp': '2025-10-02 00:38:34.796256', 'step': 15488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:34.870949', 'step': 15488, 'epoch': 2}
{'type': 'loss', 'content': 0.0647973120212555, 'timestamp': '2025-10-02 00:38:34.881088', 'step': 15489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:34.956777', 'step': 15489, 'epoch': 2}
{'type': 'loss', 'content': 0.058317601680755615, 'timestamp': '2025-10-02 00:38:34.964766', 'step': 15490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:35.040137', 'step': 15490, 'epoch': 2}
{'type': 'loss', 'content': 0.05642259120941162, 'timestamp': '2025-10-02 00:38:35.043879', 'step': 15491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:35.113550', 'step': 15491, 'epoch': 2}
{'type': 'loss', 'content': 0.08447610586881638, 'timestamp': '2025-10-02 00:38:35.128993', 'step': 15492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:35.207794', 'step': 15492, 'epoch': 2}
{'type': 'loss', 'content': 0.040940653532743454, 'timestamp': '2025-10-02 00:38:35.214399', 'step': 15493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:35.276596', 'step': 15493, 'epoch': 2}
{'type': 'loss', 'content': 0.09039270132780075, 'timestamp': '2025-10-02 00:38:35.286718', 'step': 15494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:35.357400', 'step': 15494, 'epoch': 2}
{'type': 'loss', 'content': 0.05967872962355614, 'timestamp': '2025-10-02 00:38:35.362828', 'step': 15495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:35.436664', 'step': 15495, 'epoch': 2}
{'type': 'loss', 'content': 0.03339265659451485, 'timestamp': '2025-10-02 00:38:35.443639', 'step': 15496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:35.507745', 'step': 15496, 'epoch': 2}
{'type': 'loss', 'content': 0.06185062974691391, 'timestamp': '2025-10-02 00:38:35.518026', 'step': 15497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:35.579955', 'step': 15497, 'epoch': 2}
{'type': 'loss', 'content': 0.025530479848384857, 'timestamp': '2025-10-02 00:38:35.589122', 'step': 15498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:35.645648', 'step': 15498, 'epoch': 2}
{'type': 'loss', 'content': 0.06335964798927307, 'timestamp': '2025-10-02 00:38:35.648602', 'step': 15499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:35.732276', 'step': 15499, 'epoch': 2}
{'type': 'loss', 'content': 0.027480117976665497, 'timestamp': '2025-10-02 00:38:35.743548', 'step': 15500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 15500', 'timestamp': '2025-10-02 00:38:36.222594', 'step': 15500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:38:36.293709', 'step': 15500, 'epoch': 2}
{'type': 'loss', 'content': 0.051523905247449875, 'timestamp': '2025-10-02 00:38:36.307011', 'step': 15501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:36.378753', 'step': 15501, 'epoch': 2}
{'type': 'loss', 'content': 0.04301146790385246, 'timestamp': '2025-10-02 00:38:36.383115', 'step': 15502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:38:36.468034', 'step': 15502, 'epoch': 2}
{'type': 'loss', 'content': 0.04487043246626854, 'timestamp': '2025-10-02 00:38:36.481946', 'step': 15503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:36.548493', 'step': 15503, 'epoch': 2}
{'type': 'loss', 'content': 0.046344101428985596, 'timestamp': '2025-10-02 00:38:36.556081', 'step': 15504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:36.629559', 'step': 15504, 'epoch': 2}
{'type': 'loss', 'content': 0.09978001564741135, 'timestamp': '2025-10-02 00:38:36.644699', 'step': 15505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:36.728094', 'step': 15505, 'epoch': 2}
{'type': 'loss', 'content': 0.023243222385644913, 'timestamp': '2025-10-02 00:38:36.740095', 'step': 15506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:38:36.831923', 'step': 15506, 'epoch': 2}
{'type': 'loss', 'content': 0.03381580486893654, 'timestamp': '2025-10-02 00:38:36.844295', 'step': 15507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:38:36.946719', 'step': 15507, 'epoch': 2}
{'type': 'loss', 'content': 0.03724928945302963, 'timestamp': '2025-10-02 00:38:36.961157', 'step': 15508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:37.064907', 'step': 15508, 'epoch': 2}
{'type': 'loss', 'content': 0.05711859092116356, 'timestamp': '2025-10-02 00:38:37.073659', 'step': 15509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:37.148955', 'step': 15509, 'epoch': 2}
{'type': 'loss', 'content': 0.043208613991737366, 'timestamp': '2025-10-02 00:38:37.156642', 'step': 15510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:37.250560', 'step': 15510, 'epoch': 2}
{'type': 'loss', 'content': 0.094243623316288, 'timestamp': '2025-10-02 00:38:37.255537', 'step': 15511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:37.343782', 'step': 15511, 'epoch': 2}
{'type': 'loss', 'content': 0.016467448323965073, 'timestamp': '2025-10-02 00:38:37.352298', 'step': 15512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:37.421303', 'step': 15512, 'epoch': 2}
{'type': 'loss', 'content': 0.15167205035686493, 'timestamp': '2025-10-02 00:38:37.437926', 'step': 15513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:37.533940', 'step': 15513, 'epoch': 2}
{'type': 'loss', 'content': 0.042458247393369675, 'timestamp': '2025-10-02 00:38:37.548341', 'step': 15514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:37.606816', 'step': 15514, 'epoch': 2}
{'type': 'loss', 'content': 0.11277928203344345, 'timestamp': '2025-10-02 00:38:37.610949', 'step': 15515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:37.676309', 'step': 15515, 'epoch': 2}
{'type': 'loss', 'content': 0.04755985736846924, 'timestamp': '2025-10-02 00:38:37.695567', 'step': 15516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:37.763713', 'step': 15516, 'epoch': 2}
{'type': 'loss', 'content': 0.20905373990535736, 'timestamp': '2025-10-02 00:38:37.774514', 'step': 15517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:37.856210', 'step': 15517, 'epoch': 2}
{'type': 'loss', 'content': 0.05210591107606888, 'timestamp': '2025-10-02 00:38:37.866661', 'step': 15518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:38:37.939416', 'step': 15518, 'epoch': 2}
{'type': 'loss', 'content': 0.03516822308301926, 'timestamp': '2025-10-02 00:38:37.950243', 'step': 15519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:38.005190', 'step': 15519, 'epoch': 2}
{'type': 'loss', 'content': 0.04563668742775917, 'timestamp': '2025-10-02 00:38:38.012004', 'step': 15520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:38.081721', 'step': 15520, 'epoch': 2}
{'type': 'loss', 'content': 0.04952389374375343, 'timestamp': '2025-10-02 00:38:38.092719', 'step': 15521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:38.158954', 'step': 15521, 'epoch': 2}
{'type': 'loss', 'content': 0.05217618867754936, 'timestamp': '2025-10-02 00:38:38.165128', 'step': 15522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:38.229466', 'step': 15522, 'epoch': 2}
{'type': 'loss', 'content': 0.10756601393222809, 'timestamp': '2025-10-02 00:38:38.236851', 'step': 15523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:38.308736', 'step': 15523, 'epoch': 2}
{'type': 'loss', 'content': 0.019025031477212906, 'timestamp': '2025-10-02 00:38:38.314858', 'step': 15524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:38.381000', 'step': 15524, 'epoch': 2}
{'type': 'loss', 'content': 0.1054924950003624, 'timestamp': '2025-10-02 00:38:38.384213', 'step': 15525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:38.444019', 'step': 15525, 'epoch': 2}
{'type': 'loss', 'content': 0.01702667400240898, 'timestamp': '2025-10-02 00:38:38.446657', 'step': 15526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:38.503530', 'step': 15526, 'epoch': 2}
{'type': 'loss', 'content': 0.07054389268159866, 'timestamp': '2025-10-02 00:38:38.506469', 'step': 15527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:38.564670', 'step': 15527, 'epoch': 2}
{'type': 'loss', 'content': 0.021860653534531593, 'timestamp': '2025-10-02 00:38:38.574844', 'step': 15528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:38.638631', 'step': 15528, 'epoch': 2}
{'type': 'loss', 'content': 0.07200101763010025, 'timestamp': '2025-10-02 00:38:38.643303', 'step': 15529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:38.701363', 'step': 15529, 'epoch': 2}
{'type': 'loss', 'content': 0.12366636842489243, 'timestamp': '2025-10-02 00:38:38.704558', 'step': 15530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:38.776612', 'step': 15530, 'epoch': 2}
{'type': 'loss', 'content': 0.06476441770792007, 'timestamp': '2025-10-02 00:38:38.782635', 'step': 15531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:38.848895', 'step': 15531, 'epoch': 2}
{'type': 'loss', 'content': 0.20856261253356934, 'timestamp': '2025-10-02 00:38:38.858627', 'step': 15532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:38:38.926648', 'step': 15532, 'epoch': 2}
{'type': 'loss', 'content': 0.07589416205883026, 'timestamp': '2025-10-02 00:38:38.935639', 'step': 15533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:39.003073', 'step': 15533, 'epoch': 2}
{'type': 'loss', 'content': 0.028562719002366066, 'timestamp': '2025-10-02 00:38:39.006295', 'step': 15534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:39.068215', 'step': 15534, 'epoch': 2}
{'type': 'loss', 'content': 0.07496260851621628, 'timestamp': '2025-10-02 00:38:39.074005', 'step': 15535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:39.138234', 'step': 15535, 'epoch': 2}
{'type': 'loss', 'content': 0.04747672751545906, 'timestamp': '2025-10-02 00:38:39.145720', 'step': 15536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:39.206103', 'step': 15536, 'epoch': 2}
{'type': 'loss', 'content': 0.03375273570418358, 'timestamp': '2025-10-02 00:38:39.216423', 'step': 15537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:39.278217', 'step': 15537, 'epoch': 2}
{'type': 'loss', 'content': 0.07330671697854996, 'timestamp': '2025-10-02 00:38:39.286364', 'step': 15538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:39.345185', 'step': 15538, 'epoch': 2}
{'type': 'loss', 'content': 0.048030029982328415, 'timestamp': '2025-10-02 00:38:39.354754', 'step': 15539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:39.415630', 'step': 15539, 'epoch': 2}
{'type': 'loss', 'content': 0.052022404968738556, 'timestamp': '2025-10-02 00:38:39.425787', 'step': 15540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:39.483088', 'step': 15540, 'epoch': 2}
{'type': 'loss', 'content': 0.05110590532422066, 'timestamp': '2025-10-02 00:38:39.485234', 'step': 15541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:39.539144', 'step': 15541, 'epoch': 2}
{'type': 'loss', 'content': 0.012676791287958622, 'timestamp': '2025-10-02 00:38:39.548511', 'step': 15542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:39.602250', 'step': 15542, 'epoch': 2}
{'type': 'loss', 'content': 0.07969850301742554, 'timestamp': '2025-10-02 00:38:39.605249', 'step': 15543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:39.660161', 'step': 15543, 'epoch': 2}
{'type': 'loss', 'content': 0.03745897486805916, 'timestamp': '2025-10-02 00:38:39.668302', 'step': 15544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:39.722266', 'step': 15544, 'epoch': 2}
{'type': 'loss', 'content': 0.03249425068497658, 'timestamp': '2025-10-02 00:38:39.732512', 'step': 15545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:39.786571', 'step': 15545, 'epoch': 2}
{'type': 'loss', 'content': 0.04515281319618225, 'timestamp': '2025-10-02 00:38:39.789273', 'step': 15546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:39.843722', 'step': 15546, 'epoch': 2}
{'type': 'loss', 'content': 0.02681039646267891, 'timestamp': '2025-10-02 00:38:39.851285', 'step': 15547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:39.905705', 'step': 15547, 'epoch': 2}
{'type': 'loss', 'content': 0.12964123487472534, 'timestamp': '2025-10-02 00:38:39.911444', 'step': 15548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:39.968274', 'step': 15548, 'epoch': 2}
{'type': 'loss', 'content': 0.08350305259227753, 'timestamp': '2025-10-02 00:38:39.970503', 'step': 15549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:40.024268', 'step': 15549, 'epoch': 2}
{'type': 'loss', 'content': 0.10300414264202118, 'timestamp': '2025-10-02 00:38:40.027677', 'step': 15550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:40.084518', 'step': 15550, 'epoch': 2}
{'type': 'loss', 'content': 0.0507693812251091, 'timestamp': '2025-10-02 00:38:40.087098', 'step': 15551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:40.141303', 'step': 15551, 'epoch': 2}
{'type': 'loss', 'content': 0.11998295038938522, 'timestamp': '2025-10-02 00:38:40.148286', 'step': 15552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:40.202436', 'step': 15552, 'epoch': 2}
{'type': 'loss', 'content': 0.045591846108436584, 'timestamp': '2025-10-02 00:38:40.211951', 'step': 15553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:40.269951', 'step': 15553, 'epoch': 2}
{'type': 'loss', 'content': 0.06158745661377907, 'timestamp': '2025-10-02 00:38:40.272368', 'step': 15554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:40.327511', 'step': 15554, 'epoch': 2}
{'type': 'loss', 'content': 0.03069375455379486, 'timestamp': '2025-10-02 00:38:40.336574', 'step': 15555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:40.391438', 'step': 15555, 'epoch': 2}
{'type': 'loss', 'content': 0.05626466125249863, 'timestamp': '2025-10-02 00:38:40.398065', 'step': 15556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:40.453355', 'step': 15556, 'epoch': 2}
{'type': 'loss', 'content': 0.022145915776491165, 'timestamp': '2025-10-02 00:38:40.455814', 'step': 15557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:40.509870', 'step': 15557, 'epoch': 2}
{'type': 'loss', 'content': 0.09338793903589249, 'timestamp': '2025-10-02 00:38:40.512888', 'step': 15558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:40.571213', 'step': 15558, 'epoch': 2}
{'type': 'loss', 'content': 0.07097470015287399, 'timestamp': '2025-10-02 00:38:40.580763', 'step': 15559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:40.638228', 'step': 15559, 'epoch': 2}
{'type': 'loss', 'content': 0.07304095476865768, 'timestamp': '2025-10-02 00:38:40.645053', 'step': 15560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:40.700082', 'step': 15560, 'epoch': 2}
{'type': 'loss', 'content': 0.0903928354382515, 'timestamp': '2025-10-02 00:38:40.702560', 'step': 15561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:40.763203', 'step': 15561, 'epoch': 2}
{'type': 'loss', 'content': 0.04135624319314957, 'timestamp': '2025-10-02 00:38:40.772745', 'step': 15562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:40.827442', 'step': 15562, 'epoch': 2}
{'type': 'loss', 'content': 0.028460191562771797, 'timestamp': '2025-10-02 00:38:40.829995', 'step': 15563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:40.890421', 'step': 15563, 'epoch': 2}
{'type': 'loss', 'content': 0.06146056577563286, 'timestamp': '2025-10-02 00:38:40.897070', 'step': 15564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:38:40.964476', 'step': 15564, 'epoch': 2}
{'type': 'loss', 'content': 0.03422969579696655, 'timestamp': '2025-10-02 00:38:40.977457', 'step': 15565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:41.032421', 'step': 15565, 'epoch': 2}
{'type': 'loss', 'content': 0.05350784584879875, 'timestamp': '2025-10-02 00:38:41.034960', 'step': 15566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:41.090991', 'step': 15566, 'epoch': 2}
{'type': 'loss', 'content': 0.09832799434661865, 'timestamp': '2025-10-02 00:38:41.093442', 'step': 15567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:41.150208', 'step': 15567, 'epoch': 2}
{'type': 'loss', 'content': 0.03468465059995651, 'timestamp': '2025-10-02 00:38:41.156530', 'step': 15568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:38:41.230108', 'step': 15568, 'epoch': 2}
{'type': 'loss', 'content': 0.03302164003252983, 'timestamp': '2025-10-02 00:38:41.244515', 'step': 15569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 11200068058304.0}, 'timestamp': '2025-10-02 00:38:41.331795', 'step': 15569, 'epoch': 2}
{'type': 'loss', 'content': 0.013632920570671558, 'timestamp': '2025-10-02 00:38:41.346910', 'step': 15570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:41.401950', 'step': 15570, 'epoch': 2}
{'type': 'loss', 'content': 0.04518236964941025, 'timestamp': '2025-10-02 00:38:41.404404', 'step': 15571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:41.458927', 'step': 15571, 'epoch': 2}
{'type': 'loss', 'content': 0.14185591042041779, 'timestamp': '2025-10-02 00:38:41.465140', 'step': 15572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:41.519934', 'step': 15572, 'epoch': 2}
{'type': 'loss', 'content': 0.056966960430145264, 'timestamp': '2025-10-02 00:38:41.522909', 'step': 15573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:41.577371', 'step': 15573, 'epoch': 2}
{'type': 'loss', 'content': 0.04370063170790672, 'timestamp': '2025-10-02 00:38:41.586723', 'step': 15574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:38:41.649179', 'step': 15574, 'epoch': 2}
{'type': 'loss', 'content': 0.03672325983643532, 'timestamp': '2025-10-02 00:38:41.659856', 'step': 15575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:41.721067', 'step': 15575, 'epoch': 2}
{'type': 'loss', 'content': 0.008464558981359005, 'timestamp': '2025-10-02 00:38:41.732018', 'step': 15576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:41.786697', 'step': 15576, 'epoch': 2}
{'type': 'loss', 'content': 0.022772256284952164, 'timestamp': '2025-10-02 00:38:41.792113', 'step': 15577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:41.847405', 'step': 15577, 'epoch': 2}
{'type': 'loss', 'content': 0.06496533006429672, 'timestamp': '2025-10-02 00:38:41.854670', 'step': 15578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:41.910963', 'step': 15578, 'epoch': 2}
{'type': 'loss', 'content': 0.04669422283768654, 'timestamp': '2025-10-02 00:38:41.913651', 'step': 15579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:41.969763', 'step': 15579, 'epoch': 2}
{'type': 'loss', 'content': 0.09616626799106598, 'timestamp': '2025-10-02 00:38:41.975847', 'step': 15580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:42.029890', 'step': 15580, 'epoch': 2}
{'type': 'loss', 'content': 0.0904461219906807, 'timestamp': '2025-10-02 00:38:42.032602', 'step': 15581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:42.087797', 'step': 15581, 'epoch': 2}
{'type': 'loss', 'content': 0.06786680221557617, 'timestamp': '2025-10-02 00:38:42.090350', 'step': 15582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:42.145186', 'step': 15582, 'epoch': 2}
{'type': 'loss', 'content': 0.03869491443037987, 'timestamp': '2025-10-02 00:38:42.147704', 'step': 15583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:42.202365', 'step': 15583, 'epoch': 2}
{'type': 'loss', 'content': 0.04978075996041298, 'timestamp': '2025-10-02 00:38:42.208911', 'step': 15584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:42.262582', 'step': 15584, 'epoch': 2}
{'type': 'loss', 'content': 0.05284585431218147, 'timestamp': '2025-10-02 00:38:42.264831', 'step': 15585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:42.318823', 'step': 15585, 'epoch': 2}
{'type': 'loss', 'content': 0.027311032637953758, 'timestamp': '2025-10-02 00:38:42.324765', 'step': 15586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:42.380553', 'step': 15586, 'epoch': 2}
{'type': 'loss', 'content': 0.13524039089679718, 'timestamp': '2025-10-02 00:38:42.383268', 'step': 15587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:42.438660', 'step': 15587, 'epoch': 2}
{'type': 'loss', 'content': 0.039027005434036255, 'timestamp': '2025-10-02 00:38:42.448713', 'step': 15588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:42.504580', 'step': 15588, 'epoch': 2}
{'type': 'loss', 'content': 0.03671230375766754, 'timestamp': '2025-10-02 00:38:42.512111', 'step': 15589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:42.567330', 'step': 15589, 'epoch': 2}
{'type': 'loss', 'content': 0.01783960871398449, 'timestamp': '2025-10-02 00:38:42.576559', 'step': 15590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:42.632887', 'step': 15590, 'epoch': 2}
{'type': 'loss', 'content': 0.029333118349313736, 'timestamp': '2025-10-02 00:38:42.635886', 'step': 15591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:42.693150', 'step': 15591, 'epoch': 2}
{'type': 'loss', 'content': 0.10989654064178467, 'timestamp': '2025-10-02 00:38:42.699245', 'step': 15592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:42.757491', 'step': 15592, 'epoch': 2}
{'type': 'loss', 'content': 0.043177634477615356, 'timestamp': '2025-10-02 00:38:42.761708', 'step': 15593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:42.819759', 'step': 15593, 'epoch': 2}
{'type': 'loss', 'content': 0.033501721918582916, 'timestamp': '2025-10-02 00:38:42.829310', 'step': 15594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:42.885808', 'step': 15594, 'epoch': 2}
{'type': 'loss', 'content': 0.10423116385936737, 'timestamp': '2025-10-02 00:38:42.888278', 'step': 15595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:42.943041', 'step': 15595, 'epoch': 2}
{'type': 'loss', 'content': 0.04874546453356743, 'timestamp': '2025-10-02 00:38:42.950004', 'step': 15596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:43.004934', 'step': 15596, 'epoch': 2}
{'type': 'loss', 'content': 0.0726168304681778, 'timestamp': '2025-10-02 00:38:43.007645', 'step': 15597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:43.062494', 'step': 15597, 'epoch': 2}
{'type': 'loss', 'content': 0.11110398173332214, 'timestamp': '2025-10-02 00:38:43.066193', 'step': 15598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:43.121547', 'step': 15598, 'epoch': 2}
{'type': 'loss', 'content': 0.06217602640390396, 'timestamp': '2025-10-02 00:38:43.128678', 'step': 15599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:43.183084', 'step': 15599, 'epoch': 2}
{'type': 'loss', 'content': 0.1131325215101242, 'timestamp': '2025-10-02 00:38:43.189271', 'step': 15600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:43.244120', 'step': 15600, 'epoch': 2}
{'type': 'loss', 'content': 0.1331331729888916, 'timestamp': '2025-10-02 00:38:43.253235', 'step': 15601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:43.308220', 'step': 15601, 'epoch': 2}
{'type': 'loss', 'content': 0.10082463175058365, 'timestamp': '2025-10-02 00:38:43.311238', 'step': 15602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:43.367239', 'step': 15602, 'epoch': 2}
{'type': 'loss', 'content': 0.02115512639284134, 'timestamp': '2025-10-02 00:38:43.370120', 'step': 15603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:43.426756', 'step': 15603, 'epoch': 2}
{'type': 'loss', 'content': 0.07518056780099869, 'timestamp': '2025-10-02 00:38:43.434349', 'step': 15604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:43.496356', 'step': 15604, 'epoch': 2}
{'type': 'loss', 'content': 0.0938829556107521, 'timestamp': '2025-10-02 00:38:43.500305', 'step': 15605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:43.572759', 'step': 15605, 'epoch': 2}
{'type': 'loss', 'content': 0.01068318635225296, 'timestamp': '2025-10-02 00:38:43.582938', 'step': 15606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:43.668939', 'step': 15606, 'epoch': 2}
{'type': 'loss', 'content': 0.03072565607726574, 'timestamp': '2025-10-02 00:38:43.681577', 'step': 15607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:43.749412', 'step': 15607, 'epoch': 2}
{'type': 'loss', 'content': 0.026755450293421745, 'timestamp': '2025-10-02 00:38:43.757088', 'step': 15608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:43.818144', 'step': 15608, 'epoch': 2}
{'type': 'loss', 'content': 0.05564000457525253, 'timestamp': '2025-10-02 00:38:43.820731', 'step': 15609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:43.896697', 'step': 15609, 'epoch': 2}
{'type': 'loss', 'content': 0.02471901848912239, 'timestamp': '2025-10-02 00:38:43.906867', 'step': 15610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:43.965203', 'step': 15610, 'epoch': 2}
{'type': 'loss', 'content': 0.013151523657143116, 'timestamp': '2025-10-02 00:38:43.974743', 'step': 15611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:44.040267', 'step': 15611, 'epoch': 2}
{'type': 'loss', 'content': 0.12894245982170105, 'timestamp': '2025-10-02 00:38:44.052032', 'step': 15612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:44.115365', 'step': 15612, 'epoch': 2}
{'type': 'loss', 'content': 0.02313368394970894, 'timestamp': '2025-10-02 00:38:44.124568', 'step': 15613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:44.189686', 'step': 15613, 'epoch': 2}
{'type': 'loss', 'content': 0.0555429682135582, 'timestamp': '2025-10-02 00:38:44.198731', 'step': 15614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:44.270688', 'step': 15614, 'epoch': 2}
{'type': 'loss', 'content': 0.00438209343701601, 'timestamp': '2025-10-02 00:38:44.275780', 'step': 15615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:44.345941', 'step': 15615, 'epoch': 2}
{'type': 'loss', 'content': 0.1276148557662964, 'timestamp': '2025-10-02 00:38:44.358343', 'step': 15616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:44.427910', 'step': 15616, 'epoch': 2}
{'type': 'loss', 'content': 0.009243019856512547, 'timestamp': '2025-10-02 00:38:44.432675', 'step': 15617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:44.502548', 'step': 15617, 'epoch': 2}
{'type': 'loss', 'content': 0.15241564810276031, 'timestamp': '2025-10-02 00:38:44.509599', 'step': 15618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:38:44.586732', 'step': 15618, 'epoch': 2}
{'type': 'loss', 'content': 0.026505181565880775, 'timestamp': '2025-10-02 00:38:44.597361', 'step': 15619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:44.663713', 'step': 15619, 'epoch': 2}
{'type': 'loss', 'content': 0.06086447834968567, 'timestamp': '2025-10-02 00:38:44.671278', 'step': 15620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:44.737215', 'step': 15620, 'epoch': 2}
{'type': 'loss', 'content': 0.07141537219285965, 'timestamp': '2025-10-02 00:38:44.740473', 'step': 15621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:44.809869', 'step': 15621, 'epoch': 2}
{'type': 'loss', 'content': 0.17673873901367188, 'timestamp': '2025-10-02 00:38:44.818385', 'step': 15622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:44.879758', 'step': 15622, 'epoch': 2}
{'type': 'loss', 'content': 0.05507410317659378, 'timestamp': '2025-10-02 00:38:44.886990', 'step': 15623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:38:44.973771', 'step': 15623, 'epoch': 2}
{'type': 'loss', 'content': 0.02562832273542881, 'timestamp': '2025-10-02 00:38:44.988328', 'step': 15624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:45.057808', 'step': 15624, 'epoch': 2}
{'type': 'loss', 'content': 0.0756935179233551, 'timestamp': '2025-10-02 00:38:45.060822', 'step': 15625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:45.125093', 'step': 15625, 'epoch': 2}
{'type': 'loss', 'content': 0.13463203608989716, 'timestamp': '2025-10-02 00:38:45.133531', 'step': 15626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:45.204770', 'step': 15626, 'epoch': 2}
{'type': 'loss', 'content': 0.040070414543151855, 'timestamp': '2025-10-02 00:38:45.215103', 'step': 15627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:45.274539', 'step': 15627, 'epoch': 2}
{'type': 'loss', 'content': 0.16543054580688477, 'timestamp': '2025-10-02 00:38:45.289484', 'step': 15628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:38:45.359503', 'step': 15628, 'epoch': 2}
{'type': 'loss', 'content': 0.05550669878721237, 'timestamp': '2025-10-02 00:38:45.372463', 'step': 15629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:45.445700', 'step': 15629, 'epoch': 2}
{'type': 'loss', 'content': 0.059132177382707596, 'timestamp': '2025-10-02 00:38:45.456229', 'step': 15630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:45.515829', 'step': 15630, 'epoch': 2}
{'type': 'loss', 'content': 0.017918124794960022, 'timestamp': '2025-10-02 00:38:45.524882', 'step': 15631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:45.598156', 'step': 15631, 'epoch': 2}
{'type': 'loss', 'content': 0.03419980779290199, 'timestamp': '2025-10-02 00:38:45.608760', 'step': 15632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:45.666499', 'step': 15632, 'epoch': 2}
{'type': 'loss', 'content': 0.07857892662286758, 'timestamp': '2025-10-02 00:38:45.669235', 'step': 15633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:45.739443', 'step': 15633, 'epoch': 2}
{'type': 'loss', 'content': 0.14381521940231323, 'timestamp': '2025-10-02 00:38:45.742755', 'step': 15634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:45.823908', 'step': 15634, 'epoch': 2}
{'type': 'loss', 'content': 0.10744429379701614, 'timestamp': '2025-10-02 00:38:45.826735', 'step': 15635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:45.884746', 'step': 15635, 'epoch': 2}
{'type': 'loss', 'content': 0.033553920686244965, 'timestamp': '2025-10-02 00:38:45.895052', 'step': 15636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:45.962482', 'step': 15636, 'epoch': 2}
{'type': 'loss', 'content': 0.12467557936906815, 'timestamp': '2025-10-02 00:38:45.965821', 'step': 15637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:46.040413', 'step': 15637, 'epoch': 2}
{'type': 'loss', 'content': 0.018476782366633415, 'timestamp': '2025-10-02 00:38:46.050047', 'step': 15638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:46.107686', 'step': 15638, 'epoch': 2}
{'type': 'loss', 'content': 0.147922083735466, 'timestamp': '2025-10-02 00:38:46.112002', 'step': 15639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:46.169773', 'step': 15639, 'epoch': 2}
{'type': 'loss', 'content': 0.04633741080760956, 'timestamp': '2025-10-02 00:38:46.181154', 'step': 15640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:46.238299', 'step': 15640, 'epoch': 2}
{'type': 'loss', 'content': 0.13917917013168335, 'timestamp': '2025-10-02 00:38:46.249977', 'step': 15641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:46.315539', 'step': 15641, 'epoch': 2}
{'type': 'loss', 'content': 0.048985954374074936, 'timestamp': '2025-10-02 00:38:46.326119', 'step': 15642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:46.420568', 'step': 15642, 'epoch': 2}
{'type': 'loss', 'content': 0.1132875457406044, 'timestamp': '2025-10-02 00:38:46.430660', 'step': 15643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:46.491917', 'step': 15643, 'epoch': 2}
{'type': 'loss', 'content': 0.04490650072693825, 'timestamp': '2025-10-02 00:38:46.502751', 'step': 15644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:46.567158', 'step': 15644, 'epoch': 2}
{'type': 'loss', 'content': 0.11653786152601242, 'timestamp': '2025-10-02 00:38:46.572086', 'step': 15645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:46.633046', 'step': 15645, 'epoch': 2}
{'type': 'loss', 'content': 0.031091764569282532, 'timestamp': '2025-10-02 00:38:46.646072', 'step': 15646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:46.730105', 'step': 15646, 'epoch': 2}
{'type': 'loss', 'content': 0.1307978481054306, 'timestamp': '2025-10-02 00:38:46.738356', 'step': 15647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:46.814859', 'step': 15647, 'epoch': 2}
{'type': 'loss', 'content': 0.018716707825660706, 'timestamp': '2025-10-02 00:38:46.828826', 'step': 15648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:46.889610', 'step': 15648, 'epoch': 2}
{'type': 'loss', 'content': 0.07172586023807526, 'timestamp': '2025-10-02 00:38:46.903022', 'step': 15649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:46.972240', 'step': 15649, 'epoch': 2}
{'type': 'loss', 'content': 0.10578984022140503, 'timestamp': '2025-10-02 00:38:46.979003', 'step': 15650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:47.078594', 'step': 15650, 'epoch': 2}
{'type': 'loss', 'content': 0.14102043211460114, 'timestamp': '2025-10-02 00:38:47.085892', 'step': 15651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:47.145191', 'step': 15651, 'epoch': 2}
{'type': 'loss', 'content': 0.027247831225395203, 'timestamp': '2025-10-02 00:38:47.154311', 'step': 15652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:47.234216', 'step': 15652, 'epoch': 2}
{'type': 'loss', 'content': 0.044884681701660156, 'timestamp': '2025-10-02 00:38:47.241337', 'step': 15653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:47.298187', 'step': 15653, 'epoch': 2}
{'type': 'loss', 'content': 0.11709337681531906, 'timestamp': '2025-10-02 00:38:47.301023', 'step': 15654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:47.358330', 'step': 15654, 'epoch': 2}
{'type': 'loss', 'content': 0.04088957980275154, 'timestamp': '2025-10-02 00:38:47.363546', 'step': 15655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:47.425677', 'step': 15655, 'epoch': 2}
{'type': 'loss', 'content': 0.012172317132353783, 'timestamp': '2025-10-02 00:38:47.434229', 'step': 15656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:47.502092', 'step': 15656, 'epoch': 2}
{'type': 'loss', 'content': 0.019154217094182968, 'timestamp': '2025-10-02 00:38:47.506619', 'step': 15657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:47.567460', 'step': 15657, 'epoch': 2}
{'type': 'loss', 'content': 0.0655302181839943, 'timestamp': '2025-10-02 00:38:47.573228', 'step': 15658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:38:47.643109', 'step': 15658, 'epoch': 2}
{'type': 'loss', 'content': 0.037390414625406265, 'timestamp': '2025-10-02 00:38:47.663477', 'step': 15659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:47.737905', 'step': 15659, 'epoch': 2}
{'type': 'loss', 'content': 0.10455596446990967, 'timestamp': '2025-10-02 00:38:47.756286', 'step': 15660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:47.820886', 'step': 15660, 'epoch': 2}
{'type': 'loss', 'content': 0.1526961326599121, 'timestamp': '2025-10-02 00:38:47.834325', 'step': 15661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:47.905363', 'step': 15661, 'epoch': 2}
{'type': 'loss', 'content': 0.03196008875966072, 'timestamp': '2025-10-02 00:38:47.912229', 'step': 15662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:47.970128', 'step': 15662, 'epoch': 2}
{'type': 'loss', 'content': 0.06247511878609657, 'timestamp': '2025-10-02 00:38:47.972829', 'step': 15663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:48.032942', 'step': 15663, 'epoch': 2}
{'type': 'loss', 'content': 0.020397691056132317, 'timestamp': '2025-10-02 00:38:48.039093', 'step': 15664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:48.095167', 'step': 15664, 'epoch': 2}
{'type': 'loss', 'content': 0.06956861913204193, 'timestamp': '2025-10-02 00:38:48.104140', 'step': 15665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:48.163354', 'step': 15665, 'epoch': 2}
{'type': 'loss', 'content': 0.08890685439109802, 'timestamp': '2025-10-02 00:38:48.168469', 'step': 15666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:48.226696', 'step': 15666, 'epoch': 2}
{'type': 'loss', 'content': 0.09478066116571426, 'timestamp': '2025-10-02 00:38:48.229793', 'step': 15667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:48.297965', 'step': 15667, 'epoch': 2}
{'type': 'loss', 'content': 0.007383170071989298, 'timestamp': '2025-10-02 00:38:48.309279', 'step': 15668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:48.372861', 'step': 15668, 'epoch': 2}
{'type': 'loss', 'content': 0.06373551487922668, 'timestamp': '2025-10-02 00:38:48.376886', 'step': 15669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:48.434933', 'step': 15669, 'epoch': 2}
{'type': 'loss', 'content': 0.014888748526573181, 'timestamp': '2025-10-02 00:38:48.437870', 'step': 15670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:48.495364', 'step': 15670, 'epoch': 2}
{'type': 'loss', 'content': 0.17750325798988342, 'timestamp': '2025-10-02 00:38:48.505021', 'step': 15671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:48.585435', 'step': 15671, 'epoch': 2}
{'type': 'loss', 'content': 0.10051197558641434, 'timestamp': '2025-10-02 00:38:48.596358', 'step': 15672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:48.664032', 'step': 15672, 'epoch': 2}
{'type': 'loss', 'content': 0.10298929363489151, 'timestamp': '2025-10-02 00:38:48.676393', 'step': 15673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:48.752697', 'step': 15673, 'epoch': 2}
{'type': 'loss', 'content': 0.027960022911429405, 'timestamp': '2025-10-02 00:38:48.761809', 'step': 15674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:48.825897', 'step': 15674, 'epoch': 2}
{'type': 'loss', 'content': 0.06301601231098175, 'timestamp': '2025-10-02 00:38:48.828659', 'step': 15675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:48.893227', 'step': 15675, 'epoch': 2}
{'type': 'loss', 'content': 0.02411196008324623, 'timestamp': '2025-10-02 00:38:48.904441', 'step': 15676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:48.972027', 'step': 15676, 'epoch': 2}
{'type': 'loss', 'content': 0.027663178741931915, 'timestamp': '2025-10-02 00:38:48.983326', 'step': 15677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:49.057255', 'step': 15677, 'epoch': 2}
{'type': 'loss', 'content': 0.13189685344696045, 'timestamp': '2025-10-02 00:38:49.061041', 'step': 15678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:49.132220', 'step': 15678, 'epoch': 2}
{'type': 'loss', 'content': 0.09855518490076065, 'timestamp': '2025-10-02 00:38:49.141954', 'step': 15679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:49.222176', 'step': 15679, 'epoch': 2}
{'type': 'loss', 'content': 0.06897316128015518, 'timestamp': '2025-10-02 00:38:49.234171', 'step': 15680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:49.307877', 'step': 15680, 'epoch': 2}
{'type': 'loss', 'content': 0.028842994943261147, 'timestamp': '2025-10-02 00:38:49.318878', 'step': 15681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:49.379774', 'step': 15681, 'epoch': 2}
{'type': 'loss', 'content': 0.09973496943712234, 'timestamp': '2025-10-02 00:38:49.383538', 'step': 15682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:49.440773', 'step': 15682, 'epoch': 2}
{'type': 'loss', 'content': 0.06512285768985748, 'timestamp': '2025-10-02 00:38:49.443888', 'step': 15683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:49.500135', 'step': 15683, 'epoch': 2}
{'type': 'loss', 'content': 0.12403708696365356, 'timestamp': '2025-10-02 00:38:49.506768', 'step': 15684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:49.569721', 'step': 15684, 'epoch': 2}
{'type': 'loss', 'content': 0.029498033225536346, 'timestamp': '2025-10-02 00:38:49.580685', 'step': 15685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:49.642776', 'step': 15685, 'epoch': 2}
{'type': 'loss', 'content': 0.05955550819635391, 'timestamp': '2025-10-02 00:38:49.648027', 'step': 15686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:49.745291', 'step': 15686, 'epoch': 2}
{'type': 'loss', 'content': 0.02771664224565029, 'timestamp': '2025-10-02 00:38:49.752233', 'step': 15687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:49.810594', 'step': 15687, 'epoch': 2}
{'type': 'loss', 'content': 0.02679258957505226, 'timestamp': '2025-10-02 00:38:49.824793', 'step': 15688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:49.881245', 'step': 15688, 'epoch': 2}
{'type': 'loss', 'content': 0.03881172463297844, 'timestamp': '2025-10-02 00:38:49.888137', 'step': 15689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:49.949878', 'step': 15689, 'epoch': 2}
{'type': 'loss', 'content': 0.035986706614494324, 'timestamp': '2025-10-02 00:38:49.965257', 'step': 15690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:50.053437', 'step': 15690, 'epoch': 2}
{'type': 'loss', 'content': 0.04568963870406151, 'timestamp': '2025-10-02 00:38:50.066000', 'step': 15691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:50.137738', 'step': 15691, 'epoch': 2}
{'type': 'loss', 'content': 0.08901476860046387, 'timestamp': '2025-10-02 00:38:50.144790', 'step': 15692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:50.220530', 'step': 15692, 'epoch': 2}
{'type': 'loss', 'content': 0.042654626071453094, 'timestamp': '2025-10-02 00:38:50.224397', 'step': 15693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:50.281855', 'step': 15693, 'epoch': 2}
{'type': 'loss', 'content': 0.1513284146785736, 'timestamp': '2025-10-02 00:38:50.294116', 'step': 15694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:50.371323', 'step': 15694, 'epoch': 2}
{'type': 'loss', 'content': 0.008143213577568531, 'timestamp': '2025-10-02 00:38:50.381497', 'step': 15695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:50.439860', 'step': 15695, 'epoch': 2}
{'type': 'loss', 'content': 0.03973758593201637, 'timestamp': '2025-10-02 00:38:50.446888', 'step': 15696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:50.503665', 'step': 15696, 'epoch': 2}
{'type': 'loss', 'content': 0.11631744354963303, 'timestamp': '2025-10-02 00:38:50.506763', 'step': 15697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:50.569387', 'step': 15697, 'epoch': 2}
{'type': 'loss', 'content': 0.04126585274934769, 'timestamp': '2025-10-02 00:38:50.578153', 'step': 15698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:50.654709', 'step': 15698, 'epoch': 2}
{'type': 'loss', 'content': 0.044641509652137756, 'timestamp': '2025-10-02 00:38:50.659905', 'step': 15699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:50.718367', 'step': 15699, 'epoch': 2}
{'type': 'loss', 'content': 0.00801297090947628, 'timestamp': '2025-10-02 00:38:50.725194', 'step': 15700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:38:50.802697', 'step': 15700, 'epoch': 2}
{'type': 'loss', 'content': 0.0009579003090038896, 'timestamp': '2025-10-02 00:38:50.817382', 'step': 15701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:50.875183', 'step': 15701, 'epoch': 2}
{'type': 'loss', 'content': 0.03630068898200989, 'timestamp': '2025-10-02 00:38:50.879047', 'step': 15702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:50.954689', 'step': 15702, 'epoch': 2}
{'type': 'loss', 'content': 0.08034160733222961, 'timestamp': '2025-10-02 00:38:50.963569', 'step': 15703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:51.020832', 'step': 15703, 'epoch': 2}
{'type': 'loss', 'content': 0.12719988822937012, 'timestamp': '2025-10-02 00:38:51.027844', 'step': 15704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:51.083952', 'step': 15704, 'epoch': 2}
{'type': 'loss', 'content': 0.05061068758368492, 'timestamp': '2025-10-02 00:38:51.086854', 'step': 15705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:51.144000', 'step': 15705, 'epoch': 2}
{'type': 'loss', 'content': 0.005134212784469128, 'timestamp': '2025-10-02 00:38:51.151084', 'step': 15706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:38:51.214944', 'step': 15706, 'epoch': 2}
{'type': 'loss', 'content': 0.026348523795604706, 'timestamp': '2025-10-02 00:38:51.225763', 'step': 15707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:51.293890', 'step': 15707, 'epoch': 2}
{'type': 'loss', 'content': 0.014090115204453468, 'timestamp': '2025-10-02 00:38:51.307143', 'step': 15708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:51.374327', 'step': 15708, 'epoch': 2}
{'type': 'loss', 'content': 0.04510121047496796, 'timestamp': '2025-10-02 00:38:51.379756', 'step': 15709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:51.445841', 'step': 15709, 'epoch': 2}
{'type': 'loss', 'content': 0.12672550976276398, 'timestamp': '2025-10-02 00:38:51.454649', 'step': 15710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:51.531112', 'step': 15710, 'epoch': 2}
{'type': 'loss', 'content': 0.09243358671665192, 'timestamp': '2025-10-02 00:38:51.541524', 'step': 15711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:51.598778', 'step': 15711, 'epoch': 2}
{'type': 'loss', 'content': 0.09455501288175583, 'timestamp': '2025-10-02 00:38:51.605177', 'step': 15712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:51.663293', 'step': 15712, 'epoch': 2}
{'type': 'loss', 'content': 0.11389985680580139, 'timestamp': '2025-10-02 00:38:51.674578', 'step': 15713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:51.747175', 'step': 15713, 'epoch': 2}
{'type': 'loss', 'content': 0.048809707164764404, 'timestamp': '2025-10-02 00:38:51.756696', 'step': 15714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:51.827829', 'step': 15714, 'epoch': 2}
{'type': 'loss', 'content': 0.13148383796215057, 'timestamp': '2025-10-02 00:38:51.834883', 'step': 15715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:51.906526', 'step': 15715, 'epoch': 2}
{'type': 'loss', 'content': 0.09990552812814713, 'timestamp': '2025-10-02 00:38:51.915554', 'step': 15716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:51.983501', 'step': 15716, 'epoch': 2}
{'type': 'loss', 'content': 0.07265856862068176, 'timestamp': '2025-10-02 00:38:51.990283', 'step': 15717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:52.047442', 'step': 15717, 'epoch': 2}
{'type': 'loss', 'content': 0.052050068974494934, 'timestamp': '2025-10-02 00:38:52.053888', 'step': 15718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:52.121405', 'step': 15718, 'epoch': 2}
{'type': 'loss', 'content': 0.11802913248538971, 'timestamp': '2025-10-02 00:38:52.126360', 'step': 15719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:52.195395', 'step': 15719, 'epoch': 2}
{'type': 'loss', 'content': 0.06704244762659073, 'timestamp': '2025-10-02 00:38:52.203053', 'step': 15720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:52.273508', 'step': 15720, 'epoch': 2}
{'type': 'loss', 'content': 0.10227757692337036, 'timestamp': '2025-10-02 00:38:52.278063', 'step': 15721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:38:52.346684', 'step': 15721, 'epoch': 2}
{'type': 'loss', 'content': 0.17127786576747894, 'timestamp': '2025-10-02 00:38:52.354394', 'step': 15722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:52.425712', 'step': 15722, 'epoch': 2}
{'type': 'loss', 'content': 0.15405027568340302, 'timestamp': '2025-10-02 00:38:52.429245', 'step': 15723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:52.495283', 'step': 15723, 'epoch': 2}
{'type': 'loss', 'content': 0.1624385118484497, 'timestamp': '2025-10-02 00:38:52.503839', 'step': 15724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:52.563673', 'step': 15724, 'epoch': 2}
{'type': 'loss', 'content': 0.09081742912530899, 'timestamp': '2025-10-02 00:38:52.568343', 'step': 15725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:52.625007', 'step': 15725, 'epoch': 2}
{'type': 'loss', 'content': 0.04925164580345154, 'timestamp': '2025-10-02 00:38:52.631274', 'step': 15726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:52.699321', 'step': 15726, 'epoch': 2}
{'type': 'loss', 'content': 0.054766904562711716, 'timestamp': '2025-10-02 00:38:52.706113', 'step': 15727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:52.775886', 'step': 15727, 'epoch': 2}
{'type': 'loss', 'content': 0.08471211045980453, 'timestamp': '2025-10-02 00:38:52.786796', 'step': 15728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:52.854998', 'step': 15728, 'epoch': 2}
{'type': 'loss', 'content': 0.06621342152357101, 'timestamp': '2025-10-02 00:38:52.861882', 'step': 15729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:52.928591', 'step': 15729, 'epoch': 2}
{'type': 'loss', 'content': 0.12036668509244919, 'timestamp': '2025-10-02 00:38:52.934586', 'step': 15730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:53.000708', 'step': 15730, 'epoch': 2}
{'type': 'loss', 'content': 0.1221531480550766, 'timestamp': '2025-10-02 00:38:53.004898', 'step': 15731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:53.071765', 'step': 15731, 'epoch': 2}
{'type': 'loss', 'content': 0.05128444358706474, 'timestamp': '2025-10-02 00:38:53.078925', 'step': 15732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:53.136118', 'step': 15732, 'epoch': 2}
{'type': 'loss', 'content': 0.018850522115826607, 'timestamp': '2025-10-02 00:38:53.146182', 'step': 15733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:53.212632', 'step': 15733, 'epoch': 2}
{'type': 'loss', 'content': 0.041091058403253555, 'timestamp': '2025-10-02 00:38:53.216349', 'step': 15734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:53.271686', 'step': 15734, 'epoch': 2}
{'type': 'loss', 'content': 0.21690423786640167, 'timestamp': '2025-10-02 00:38:53.282082', 'step': 15735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:53.359474', 'step': 15735, 'epoch': 2}
{'type': 'loss', 'content': 0.04076472669839859, 'timestamp': '2025-10-02 00:38:53.380682', 'step': 15736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:53.440522', 'step': 15736, 'epoch': 2}
{'type': 'loss', 'content': 0.016131170094013214, 'timestamp': '2025-10-02 00:38:53.449556', 'step': 15737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:53.552189', 'step': 15737, 'epoch': 2}
{'type': 'loss', 'content': 0.057760413736104965, 'timestamp': '2025-10-02 00:38:53.557450', 'step': 15738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:53.631267', 'step': 15738, 'epoch': 2}
{'type': 'loss', 'content': 0.0122829619795084, 'timestamp': '2025-10-02 00:38:53.634027', 'step': 15739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:53.697520', 'step': 15739, 'epoch': 2}
{'type': 'loss', 'content': 0.13302205502986908, 'timestamp': '2025-10-02 00:38:53.705472', 'step': 15740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:53.763332', 'step': 15740, 'epoch': 2}
{'type': 'loss', 'content': 0.07650534808635712, 'timestamp': '2025-10-02 00:38:53.766660', 'step': 15741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:53.822980', 'step': 15741, 'epoch': 2}
{'type': 'loss', 'content': 0.01801164261996746, 'timestamp': '2025-10-02 00:38:53.829883', 'step': 15742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:53.885376', 'step': 15742, 'epoch': 2}
{'type': 'loss', 'content': 0.13612167537212372, 'timestamp': '2025-10-02 00:38:53.887914', 'step': 15743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:53.947184', 'step': 15743, 'epoch': 2}
{'type': 'loss', 'content': 0.035070519894361496, 'timestamp': '2025-10-02 00:38:53.958140', 'step': 15744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:54.014943', 'step': 15744, 'epoch': 2}
{'type': 'loss', 'content': 0.07629072666168213, 'timestamp': '2025-10-02 00:38:54.023872', 'step': 15745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:54.080905', 'step': 15745, 'epoch': 2}
{'type': 'loss', 'content': 0.026532402262091637, 'timestamp': '2025-10-02 00:38:54.083614', 'step': 15746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:54.146235', 'step': 15746, 'epoch': 2}
{'type': 'loss', 'content': 0.04525148868560791, 'timestamp': '2025-10-02 00:38:54.151446', 'step': 15747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:54.207907', 'step': 15747, 'epoch': 2}
{'type': 'loss', 'content': 0.11008572578430176, 'timestamp': '2025-10-02 00:38:54.214070', 'step': 15748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:54.274595', 'step': 15748, 'epoch': 2}
{'type': 'loss', 'content': 0.03493213281035423, 'timestamp': '2025-10-02 00:38:54.285552', 'step': 15749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:38:54.348347', 'step': 15749, 'epoch': 2}
{'type': 'loss', 'content': 0.03407841920852661, 'timestamp': '2025-10-02 00:38:54.358784', 'step': 15750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:54.414006', 'step': 15750, 'epoch': 2}
{'type': 'loss', 'content': 0.1435663104057312, 'timestamp': '2025-10-02 00:38:54.417214', 'step': 15751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:54.472711', 'step': 15751, 'epoch': 2}
{'type': 'loss', 'content': 0.03415153548121452, 'timestamp': '2025-10-02 00:38:54.482931', 'step': 15752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:54.541751', 'step': 15752, 'epoch': 2}
{'type': 'loss', 'content': 0.03127646818757057, 'timestamp': '2025-10-02 00:38:54.552690', 'step': 15753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:38:54.609167', 'step': 15753, 'epoch': 2}
{'type': 'loss', 'content': 0.02204969897866249, 'timestamp': '2025-10-02 00:38:54.618096', 'step': 15754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:54.694071', 'step': 15754, 'epoch': 2}
{'type': 'loss', 'content': 0.05659006908535957, 'timestamp': '2025-10-02 00:38:54.701066', 'step': 15755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:54.772160', 'step': 15755, 'epoch': 2}
{'type': 'loss', 'content': 0.08234213292598724, 'timestamp': '2025-10-02 00:38:54.785545', 'step': 15756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:54.846914', 'step': 15756, 'epoch': 2}
{'type': 'loss', 'content': 0.06326747685670853, 'timestamp': '2025-10-02 00:38:54.853923', 'step': 15757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:54.914573', 'step': 15757, 'epoch': 2}
{'type': 'loss', 'content': 0.19603562355041504, 'timestamp': '2025-10-02 00:38:54.919222', 'step': 15758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:38:55.001706', 'step': 15758, 'epoch': 2}
{'type': 'loss', 'content': 0.04348679259419441, 'timestamp': '2025-10-02 00:38:55.015157', 'step': 15759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:55.073835', 'step': 15759, 'epoch': 2}
{'type': 'loss', 'content': 0.1199742928147316, 'timestamp': '2025-10-02 00:38:55.082841', 'step': 15760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:55.154017', 'step': 15760, 'epoch': 2}
{'type': 'loss', 'content': 0.03214304521679878, 'timestamp': '2025-10-02 00:38:55.157600', 'step': 15761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:55.231208', 'step': 15761, 'epoch': 2}
{'type': 'loss', 'content': 0.25867795944213867, 'timestamp': '2025-10-02 00:38:55.236162', 'step': 15762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:55.296990', 'step': 15762, 'epoch': 2}
{'type': 'loss', 'content': 0.0591675341129303, 'timestamp': '2025-10-02 00:38:55.301604', 'step': 15763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:55.376517', 'step': 15763, 'epoch': 2}
{'type': 'loss', 'content': 0.004429214168339968, 'timestamp': '2025-10-02 00:38:55.386260', 'step': 15764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:55.447390', 'step': 15764, 'epoch': 2}
{'type': 'loss', 'content': 0.13325977325439453, 'timestamp': '2025-10-02 00:38:55.465559', 'step': 15765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:55.541638', 'step': 15765, 'epoch': 2}
{'type': 'loss', 'content': 0.026628490537405014, 'timestamp': '2025-10-02 00:38:55.560146', 'step': 15766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:55.656244', 'step': 15766, 'epoch': 2}
{'type': 'loss', 'content': 0.04306860268115997, 'timestamp': '2025-10-02 00:38:55.674254', 'step': 15767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:38:55.781376', 'step': 15767, 'epoch': 2}
{'type': 'loss', 'content': 0.060725707560777664, 'timestamp': '2025-10-02 00:38:55.801338', 'step': 15768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:55.903554', 'step': 15768, 'epoch': 2}
{'type': 'loss', 'content': 0.121437668800354, 'timestamp': '2025-10-02 00:38:55.919227', 'step': 15769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:56.013078', 'step': 15769, 'epoch': 2}
{'type': 'loss', 'content': 0.1473734974861145, 'timestamp': '2025-10-02 00:38:56.019565', 'step': 15770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:38:56.117298', 'step': 15770, 'epoch': 2}
{'type': 'loss', 'content': 0.014957329258322716, 'timestamp': '2025-10-02 00:38:56.127381', 'step': 15771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:56.189855', 'step': 15771, 'epoch': 2}
{'type': 'loss', 'content': 0.027474232017993927, 'timestamp': '2025-10-02 00:38:56.207587', 'step': 15772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:56.293031', 'step': 15772, 'epoch': 2}
{'type': 'loss', 'content': 0.03590318188071251, 'timestamp': '2025-10-02 00:38:56.306798', 'step': 15773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:56.399110', 'step': 15773, 'epoch': 2}
{'type': 'loss', 'content': 0.03955881670117378, 'timestamp': '2025-10-02 00:38:56.415190', 'step': 15774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:56.484631', 'step': 15774, 'epoch': 2}
{'type': 'loss', 'content': 0.04015813767910004, 'timestamp': '2025-10-02 00:38:56.494054', 'step': 15775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:56.570521', 'step': 15775, 'epoch': 2}
{'type': 'loss', 'content': 0.0899525061249733, 'timestamp': '2025-10-02 00:38:56.587886', 'step': 15776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:38:56.658789', 'step': 15776, 'epoch': 2}
{'type': 'loss', 'content': 0.10913456231355667, 'timestamp': '2025-10-02 00:38:56.662785', 'step': 15777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:56.725756', 'step': 15777, 'epoch': 2}
{'type': 'loss', 'content': 0.04143857583403587, 'timestamp': '2025-10-02 00:38:56.742945', 'step': 15778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:56.835407', 'step': 15778, 'epoch': 2}
{'type': 'loss', 'content': 0.14357639849185944, 'timestamp': '2025-10-02 00:38:56.850859', 'step': 15779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:56.953530', 'step': 15779, 'epoch': 2}
{'type': 'loss', 'content': 0.047816235572099686, 'timestamp': '2025-10-02 00:38:56.972008', 'step': 15780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:38:57.056363', 'step': 15780, 'epoch': 2}
{'type': 'loss', 'content': 0.040625981986522675, 'timestamp': '2025-10-02 00:38:57.065059', 'step': 15781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:57.133303', 'step': 15781, 'epoch': 2}
{'type': 'loss', 'content': 0.08743081986904144, 'timestamp': '2025-10-02 00:38:57.136395', 'step': 15782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:57.202888', 'step': 15782, 'epoch': 2}
{'type': 'loss', 'content': 0.06257174909114838, 'timestamp': '2025-10-02 00:38:57.215668', 'step': 15783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:57.306097', 'step': 15783, 'epoch': 2}
{'type': 'loss', 'content': 0.13829413056373596, 'timestamp': '2025-10-02 00:38:57.323669', 'step': 15784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:38:57.418839', 'step': 15784, 'epoch': 2}
{'type': 'loss', 'content': 0.020686713978648186, 'timestamp': '2025-10-02 00:38:57.435451', 'step': 15785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:57.536792', 'step': 15785, 'epoch': 2}
{'type': 'loss', 'content': 0.10139410197734833, 'timestamp': '2025-10-02 00:38:57.540444', 'step': 15786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:57.612014', 'step': 15786, 'epoch': 2}
{'type': 'loss', 'content': 0.0456857793033123, 'timestamp': '2025-10-02 00:38:57.617351', 'step': 15787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:57.680685', 'step': 15787, 'epoch': 2}
{'type': 'loss', 'content': 0.08746346086263657, 'timestamp': '2025-10-02 00:38:57.699701', 'step': 15788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:38:57.800663', 'step': 15788, 'epoch': 2}
{'type': 'loss', 'content': 0.08102656155824661, 'timestamp': '2025-10-02 00:38:57.812373', 'step': 15789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:57.903894', 'step': 15789, 'epoch': 2}
{'type': 'loss', 'content': 0.07335174828767776, 'timestamp': '2025-10-02 00:38:57.920490', 'step': 15790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:58.009869', 'step': 15790, 'epoch': 2}
{'type': 'loss', 'content': 0.05232502147555351, 'timestamp': '2025-10-02 00:38:58.013635', 'step': 15791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:38:58.082234', 'step': 15791, 'epoch': 2}
{'type': 'loss', 'content': 0.08149426430463791, 'timestamp': '2025-10-02 00:38:58.101409', 'step': 15792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:58.193362', 'step': 15792, 'epoch': 2}
{'type': 'loss', 'content': 0.03835749253630638, 'timestamp': '2025-10-02 00:38:58.197071', 'step': 15793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:38:58.268189', 'step': 15793, 'epoch': 2}
{'type': 'loss', 'content': 0.1083388701081276, 'timestamp': '2025-10-02 00:38:58.273390', 'step': 15794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:38:58.359774', 'step': 15794, 'epoch': 2}
{'type': 'loss', 'content': 0.07481172680854797, 'timestamp': '2025-10-02 00:38:58.377165', 'step': 15795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:38:58.468654', 'step': 15795, 'epoch': 2}
{'type': 'loss', 'content': 0.04483341798186302, 'timestamp': '2025-10-02 00:38:58.476292', 'step': 15796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:58.546427', 'step': 15796, 'epoch': 2}
{'type': 'loss', 'content': 0.061233311891555786, 'timestamp': '2025-10-02 00:38:58.561714', 'step': 15797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:38:58.641686', 'step': 15797, 'epoch': 2}
{'type': 'loss', 'content': 0.1595609486103058, 'timestamp': '2025-10-02 00:38:58.645736', 'step': 15798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:58.719020', 'step': 15798, 'epoch': 2}
{'type': 'loss', 'content': 0.04000559076666832, 'timestamp': '2025-10-02 00:38:58.734388', 'step': 15799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:38:58.808527', 'step': 15799, 'epoch': 2}
{'type': 'loss', 'content': 0.034218624234199524, 'timestamp': '2025-10-02 00:38:58.821712', 'step': 15800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:58.889869', 'step': 15800, 'epoch': 2}
{'type': 'loss', 'content': 0.050583548843860626, 'timestamp': '2025-10-02 00:38:58.893750', 'step': 15801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:58.963374', 'step': 15801, 'epoch': 2}
{'type': 'loss', 'content': 0.01943051815032959, 'timestamp': '2025-10-02 00:38:58.972926', 'step': 15802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:59.055535', 'step': 15802, 'epoch': 2}
{'type': 'loss', 'content': 0.09663115441799164, 'timestamp': '2025-10-02 00:38:59.059293', 'step': 15803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:59.148787', 'step': 15803, 'epoch': 2}
{'type': 'loss', 'content': 0.016081443056464195, 'timestamp': '2025-10-02 00:38:59.156150', 'step': 15804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:59.228297', 'step': 15804, 'epoch': 2}
{'type': 'loss', 'content': 0.04014447703957558, 'timestamp': '2025-10-02 00:38:59.233085', 'step': 15805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:38:59.292646', 'step': 15805, 'epoch': 2}
{'type': 'loss', 'content': 0.1176503375172615, 'timestamp': '2025-10-02 00:38:59.296907', 'step': 15806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:59.369157', 'step': 15806, 'epoch': 2}
{'type': 'loss', 'content': 0.032448600977659225, 'timestamp': '2025-10-02 00:38:59.374311', 'step': 15807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:38:59.435682', 'step': 15807, 'epoch': 2}
{'type': 'loss', 'content': 0.03560122847557068, 'timestamp': '2025-10-02 00:38:59.444001', 'step': 15808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:38:59.530875', 'step': 15808, 'epoch': 2}
{'type': 'loss', 'content': 0.06040485203266144, 'timestamp': '2025-10-02 00:38:59.541017', 'step': 15809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:38:59.648191', 'step': 15809, 'epoch': 2}
{'type': 'loss', 'content': 0.030282514169812202, 'timestamp': '2025-10-02 00:38:59.651950', 'step': 15810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:59.712550', 'step': 15810, 'epoch': 2}
{'type': 'loss', 'content': 0.044345755130052567, 'timestamp': '2025-10-02 00:38:59.717747', 'step': 15811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:38:59.790003', 'step': 15811, 'epoch': 2}
{'type': 'loss', 'content': 0.09351006895303726, 'timestamp': '2025-10-02 00:38:59.797538', 'step': 15812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:38:59.885059', 'step': 15812, 'epoch': 2}
{'type': 'loss', 'content': 0.0038924715481698513, 'timestamp': '2025-10-02 00:38:59.895360', 'step': 15813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:38:59.955878', 'step': 15813, 'epoch': 2}
{'type': 'loss', 'content': 0.13907533884048462, 'timestamp': '2025-10-02 00:38:59.960598', 'step': 15814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:00.019339', 'step': 15814, 'epoch': 2}
{'type': 'loss', 'content': 0.06722237914800644, 'timestamp': '2025-10-02 00:39:00.024643', 'step': 15815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:00.092852', 'step': 15815, 'epoch': 2}
{'type': 'loss', 'content': 0.04756302759051323, 'timestamp': '2025-10-02 00:39:00.100947', 'step': 15816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:00.161233', 'step': 15816, 'epoch': 2}
{'type': 'loss', 'content': 0.07859602570533752, 'timestamp': '2025-10-02 00:39:00.168235', 'step': 15817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:00.230669', 'step': 15817, 'epoch': 2}
{'type': 'loss', 'content': 0.1384935975074768, 'timestamp': '2025-10-02 00:39:00.234945', 'step': 15818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:00.292874', 'step': 15818, 'epoch': 2}
{'type': 'loss', 'content': 0.10966064035892487, 'timestamp': '2025-10-02 00:39:00.296962', 'step': 15819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:00.388420', 'step': 15819, 'epoch': 2}
{'type': 'loss', 'content': 0.14177891612052917, 'timestamp': '2025-10-02 00:39:00.405619', 'step': 15820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:00.483611', 'step': 15820, 'epoch': 2}
{'type': 'loss', 'content': 0.05325384810566902, 'timestamp': '2025-10-02 00:39:00.488966', 'step': 15821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:00.547787', 'step': 15821, 'epoch': 2}
{'type': 'loss', 'content': 0.1037813276052475, 'timestamp': '2025-10-02 00:39:00.553131', 'step': 15822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:00.623056', 'step': 15822, 'epoch': 2}
{'type': 'loss', 'content': 0.08457136154174805, 'timestamp': '2025-10-02 00:39:00.631842', 'step': 15823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:00.690012', 'step': 15823, 'epoch': 2}
{'type': 'loss', 'content': 0.0079255485907197, 'timestamp': '2025-10-02 00:39:00.697777', 'step': 15824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:00.755506', 'step': 15824, 'epoch': 2}
{'type': 'loss', 'content': 0.2066923975944519, 'timestamp': '2025-10-02 00:39:00.759119', 'step': 15825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:00.831824', 'step': 15825, 'epoch': 2}
{'type': 'loss', 'content': 0.016157763078808784, 'timestamp': '2025-10-02 00:39:00.846107', 'step': 15826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:00.934387', 'step': 15826, 'epoch': 2}
{'type': 'loss', 'content': 0.08183754235506058, 'timestamp': '2025-10-02 00:39:00.938231', 'step': 15827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:00.995737', 'step': 15827, 'epoch': 2}
{'type': 'loss', 'content': 0.0581616535782814, 'timestamp': '2025-10-02 00:39:01.003245', 'step': 15828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:01.065204', 'step': 15828, 'epoch': 2}
{'type': 'loss', 'content': 0.08229146897792816, 'timestamp': '2025-10-02 00:39:01.068873', 'step': 15829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:01.138857', 'step': 15829, 'epoch': 2}
{'type': 'loss', 'content': 0.1476290225982666, 'timestamp': '2025-10-02 00:39:01.146240', 'step': 15830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:01.207613', 'step': 15830, 'epoch': 2}
{'type': 'loss', 'content': 0.13500884175300598, 'timestamp': '2025-10-02 00:39:01.222133', 'step': 15831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:01.282071', 'step': 15831, 'epoch': 2}
{'type': 'loss', 'content': 0.10567457228899002, 'timestamp': '2025-10-02 00:39:01.289446', 'step': 15832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:01.352415', 'step': 15832, 'epoch': 2}
{'type': 'loss', 'content': 0.03138316050171852, 'timestamp': '2025-10-02 00:39:01.363387', 'step': 15833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:01.421681', 'step': 15833, 'epoch': 2}
{'type': 'loss', 'content': 0.03609624132514, 'timestamp': '2025-10-02 00:39:01.428574', 'step': 15834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:01.494886', 'step': 15834, 'epoch': 2}
{'type': 'loss', 'content': 0.14486199617385864, 'timestamp': '2025-10-02 00:39:01.497979', 'step': 15835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:01.554428', 'step': 15835, 'epoch': 2}
{'type': 'loss', 'content': 0.047729503363370895, 'timestamp': '2025-10-02 00:39:01.569476', 'step': 15836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:01.645126', 'step': 15836, 'epoch': 2}
{'type': 'loss', 'content': 0.0273753572255373, 'timestamp': '2025-10-02 00:39:01.658176', 'step': 15837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:01.747884', 'step': 15837, 'epoch': 2}
{'type': 'loss', 'content': 0.05707439035177231, 'timestamp': '2025-10-02 00:39:01.751875', 'step': 15838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:01.833817', 'step': 15838, 'epoch': 2}
{'type': 'loss', 'content': 0.009015488438308239, 'timestamp': '2025-10-02 00:39:01.844613', 'step': 15839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:01.923569', 'step': 15839, 'epoch': 2}
{'type': 'loss', 'content': 0.0391925610601902, 'timestamp': '2025-10-02 00:39:01.936417', 'step': 15840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:02.001447', 'step': 15840, 'epoch': 2}
{'type': 'loss', 'content': 0.011960453353822231, 'timestamp': '2025-10-02 00:39:02.006015', 'step': 15841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:02.080831', 'step': 15841, 'epoch': 2}
{'type': 'loss', 'content': 0.023022927343845367, 'timestamp': '2025-10-02 00:39:02.087669', 'step': 15842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:02.165772', 'step': 15842, 'epoch': 2}
{'type': 'loss', 'content': 0.07902996242046356, 'timestamp': '2025-10-02 00:39:02.168866', 'step': 15843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:39:02.226956', 'step': 15843, 'epoch': 2}
{'type': 'loss', 'content': 0.15414267778396606, 'timestamp': '2025-10-02 00:39:02.239786', 'step': 15844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:02.313039', 'step': 15844, 'epoch': 2}
{'type': 'loss', 'content': 0.10981129854917526, 'timestamp': '2025-10-02 00:39:02.326078', 'step': 15845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:02.407524', 'step': 15845, 'epoch': 2}
{'type': 'loss', 'content': 0.054236263036727905, 'timestamp': '2025-10-02 00:39:02.417024', 'step': 15846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:02.496636', 'step': 15846, 'epoch': 2}
{'type': 'loss', 'content': 0.05547702684998512, 'timestamp': '2025-10-02 00:39:02.505107', 'step': 15847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:02.573799', 'step': 15847, 'epoch': 2}
{'type': 'loss', 'content': 0.011710643768310547, 'timestamp': '2025-10-02 00:39:02.589923', 'step': 15848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:02.649672', 'step': 15848, 'epoch': 2}
{'type': 'loss', 'content': 0.01485361997038126, 'timestamp': '2025-10-02 00:39:02.654409', 'step': 15849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:02.734041', 'step': 15849, 'epoch': 2}
{'type': 'loss', 'content': 0.11766975373029709, 'timestamp': '2025-10-02 00:39:02.745183', 'step': 15850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:02.823379', 'step': 15850, 'epoch': 2}
{'type': 'loss', 'content': 0.10229212045669556, 'timestamp': '2025-10-02 00:39:02.832814', 'step': 15851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:02.908103', 'step': 15851, 'epoch': 2}
{'type': 'loss', 'content': 0.07589508593082428, 'timestamp': '2025-10-02 00:39:02.922398', 'step': 15852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:02.998927', 'step': 15852, 'epoch': 2}
{'type': 'loss', 'content': 0.021292980760335922, 'timestamp': '2025-10-02 00:39:03.009033', 'step': 15853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:03.077000', 'step': 15853, 'epoch': 2}
{'type': 'loss', 'content': 0.024948380887508392, 'timestamp': '2025-10-02 00:39:03.086006', 'step': 15854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:03.160522', 'step': 15854, 'epoch': 2}
{'type': 'loss', 'content': 0.09091842174530029, 'timestamp': '2025-10-02 00:39:03.169649', 'step': 15855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:03.250941', 'step': 15855, 'epoch': 2}
{'type': 'loss', 'content': 0.18655206263065338, 'timestamp': '2025-10-02 00:39:03.264995', 'step': 15856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:03.344084', 'step': 15856, 'epoch': 2}
{'type': 'loss', 'content': 0.07572086155414581, 'timestamp': '2025-10-02 00:39:03.349080', 'step': 15857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:03.428334', 'step': 15857, 'epoch': 2}
{'type': 'loss', 'content': 0.03260551020503044, 'timestamp': '2025-10-02 00:39:03.437306', 'step': 15858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:03.495731', 'step': 15858, 'epoch': 2}
{'type': 'loss', 'content': 0.013007177971303463, 'timestamp': '2025-10-02 00:39:03.502504', 'step': 15859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:03.567055', 'step': 15859, 'epoch': 2}
{'type': 'loss', 'content': 0.027416987344622612, 'timestamp': '2025-10-02 00:39:03.578310', 'step': 15860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:03.637642', 'step': 15860, 'epoch': 2}
{'type': 'loss', 'content': 0.030820094048976898, 'timestamp': '2025-10-02 00:39:03.641512', 'step': 15861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:03.699442', 'step': 15861, 'epoch': 2}
{'type': 'loss', 'content': 0.027258001267910004, 'timestamp': '2025-10-02 00:39:03.706275', 'step': 15862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:03.762647', 'step': 15862, 'epoch': 2}
{'type': 'loss', 'content': 0.09554755687713623, 'timestamp': '2025-10-02 00:39:03.767764', 'step': 15863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:03.830397', 'step': 15863, 'epoch': 2}
{'type': 'loss', 'content': 0.04715343564748764, 'timestamp': '2025-10-02 00:39:03.840718', 'step': 15864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:03.896185', 'step': 15864, 'epoch': 2}
{'type': 'loss', 'content': 0.039469216018915176, 'timestamp': '2025-10-02 00:39:03.899801', 'step': 15865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:03.957303', 'step': 15865, 'epoch': 2}
{'type': 'loss', 'content': 0.12496701627969742, 'timestamp': '2025-10-02 00:39:03.960212', 'step': 15866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:04.025517', 'step': 15866, 'epoch': 2}
{'type': 'loss', 'content': 0.10059589147567749, 'timestamp': '2025-10-02 00:39:04.035111', 'step': 15867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:04.107731', 'step': 15867, 'epoch': 2}
{'type': 'loss', 'content': 0.13839171826839447, 'timestamp': '2025-10-02 00:39:04.119035', 'step': 15868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:04.184878', 'step': 15868, 'epoch': 2}
{'type': 'loss', 'content': 0.07910372316837311, 'timestamp': '2025-10-02 00:39:04.198672', 'step': 15869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:39:04.304528', 'step': 15869, 'epoch': 2}
{'type': 'loss', 'content': 0.04014154151082039, 'timestamp': '2025-10-02 00:39:04.317942', 'step': 15870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:04.377942', 'step': 15870, 'epoch': 2}
{'type': 'loss', 'content': 0.06167159974575043, 'timestamp': '2025-10-02 00:39:04.384026', 'step': 15871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:04.463653', 'step': 15871, 'epoch': 2}
{'type': 'loss', 'content': 0.07954426854848862, 'timestamp': '2025-10-02 00:39:04.478429', 'step': 15872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:04.567286', 'step': 15872, 'epoch': 2}
{'type': 'loss', 'content': 0.0469832643866539, 'timestamp': '2025-10-02 00:39:04.578195', 'step': 15873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:04.659115', 'step': 15873, 'epoch': 2}
{'type': 'loss', 'content': 0.11308367550373077, 'timestamp': '2025-10-02 00:39:04.673839', 'step': 15874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:04.754995', 'step': 15874, 'epoch': 2}
{'type': 'loss', 'content': 0.031393636018037796, 'timestamp': '2025-10-02 00:39:04.764020', 'step': 15875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:39:04.852558', 'step': 15875, 'epoch': 2}
{'type': 'loss', 'content': 0.012544011697173119, 'timestamp': '2025-10-02 00:39:04.864924', 'step': 15876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:04.923854', 'step': 15876, 'epoch': 2}
{'type': 'loss', 'content': 0.02722790278494358, 'timestamp': '2025-10-02 00:39:04.927403', 'step': 15877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:04.985191', 'step': 15877, 'epoch': 2}
{'type': 'loss', 'content': 0.04771906137466431, 'timestamp': '2025-10-02 00:39:04.991512', 'step': 15878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:05.051491', 'step': 15878, 'epoch': 2}
{'type': 'loss', 'content': 0.09839783608913422, 'timestamp': '2025-10-02 00:39:05.055395', 'step': 15879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:05.124204', 'step': 15879, 'epoch': 2}
{'type': 'loss', 'content': 0.05136977136135101, 'timestamp': '2025-10-02 00:39:05.135550', 'step': 15880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:05.205265', 'step': 15880, 'epoch': 2}
{'type': 'loss', 'content': 0.0700114443898201, 'timestamp': '2025-10-02 00:39:05.214683', 'step': 15881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:05.288071', 'step': 15881, 'epoch': 2}
{'type': 'loss', 'content': 0.1235431358218193, 'timestamp': '2025-10-02 00:39:05.297632', 'step': 15882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:05.378139', 'step': 15882, 'epoch': 2}
{'type': 'loss', 'content': 0.09046042710542679, 'timestamp': '2025-10-02 00:39:05.397306', 'step': 15883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:05.459959', 'step': 15883, 'epoch': 2}
{'type': 'loss', 'content': 0.07179241627454758, 'timestamp': '2025-10-02 00:39:05.469176', 'step': 15884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:05.561993', 'step': 15884, 'epoch': 2}
{'type': 'loss', 'content': 0.04013427346944809, 'timestamp': '2025-10-02 00:39:05.581488', 'step': 15885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:05.689992', 'step': 15885, 'epoch': 2}
{'type': 'loss', 'content': 0.06694876402616501, 'timestamp': '2025-10-02 00:39:05.695115', 'step': 15886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:05.756054', 'step': 15886, 'epoch': 2}
{'type': 'loss', 'content': 0.07652211934328079, 'timestamp': '2025-10-02 00:39:05.759171', 'step': 15887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:05.829150', 'step': 15887, 'epoch': 2}
{'type': 'loss', 'content': 0.03973696008324623, 'timestamp': '2025-10-02 00:39:05.840062', 'step': 15888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:05.904637', 'step': 15888, 'epoch': 2}
{'type': 'loss', 'content': 0.022678421810269356, 'timestamp': '2025-10-02 00:39:05.915938', 'step': 15889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:05.973679', 'step': 15889, 'epoch': 2}
{'type': 'loss', 'content': 0.03334628418087959, 'timestamp': '2025-10-02 00:39:05.980440', 'step': 15890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:06.044142', 'step': 15890, 'epoch': 2}
{'type': 'loss', 'content': 0.01286264043301344, 'timestamp': '2025-10-02 00:39:06.053673', 'step': 15891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:06.120460', 'step': 15891, 'epoch': 2}
{'type': 'loss', 'content': 0.05349085479974747, 'timestamp': '2025-10-02 00:39:06.147763', 'step': 15892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:06.211013', 'step': 15892, 'epoch': 2}
{'type': 'loss', 'content': 0.04382482171058655, 'timestamp': '2025-10-02 00:39:06.220943', 'step': 15893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:06.290984', 'step': 15893, 'epoch': 2}
{'type': 'loss', 'content': 0.1068892627954483, 'timestamp': '2025-10-02 00:39:06.295814', 'step': 15894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:06.353221', 'step': 15894, 'epoch': 2}
{'type': 'loss', 'content': 0.03535166382789612, 'timestamp': '2025-10-02 00:39:06.355992', 'step': 15895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:06.413135', 'step': 15895, 'epoch': 2}
{'type': 'loss', 'content': 0.025538671761751175, 'timestamp': '2025-10-02 00:39:06.420689', 'step': 15896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:06.478667', 'step': 15896, 'epoch': 2}
{'type': 'loss', 'content': 0.17414577305316925, 'timestamp': '2025-10-02 00:39:06.483051', 'step': 15897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:06.546891', 'step': 15897, 'epoch': 2}
{'type': 'loss', 'content': 0.007866921834647655, 'timestamp': '2025-10-02 00:39:06.555811', 'step': 15898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:06.612334', 'step': 15898, 'epoch': 2}
{'type': 'loss', 'content': 0.017996838316321373, 'timestamp': '2025-10-02 00:39:06.621870', 'step': 15899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:06.679050', 'step': 15899, 'epoch': 2}
{'type': 'loss', 'content': 0.05631779506802559, 'timestamp': '2025-10-02 00:39:06.685506', 'step': 15900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:06.744301', 'step': 15900, 'epoch': 2}
{'type': 'loss', 'content': 0.14430099725723267, 'timestamp': '2025-10-02 00:39:06.754426', 'step': 15901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:06.827409', 'step': 15901, 'epoch': 2}
{'type': 'loss', 'content': 0.08423656225204468, 'timestamp': '2025-10-02 00:39:06.839705', 'step': 15902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:06.927400', 'step': 15902, 'epoch': 2}
{'type': 'loss', 'content': 0.024423765018582344, 'timestamp': '2025-10-02 00:39:06.936939', 'step': 15903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:07.011064', 'step': 15903, 'epoch': 2}
{'type': 'loss', 'content': 0.03712518885731697, 'timestamp': '2025-10-02 00:39:07.035285', 'step': 15904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:07.144385', 'step': 15904, 'epoch': 2}
{'type': 'loss', 'content': 0.06276291608810425, 'timestamp': '2025-10-02 00:39:07.155335', 'step': 15905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:39:07.230643', 'step': 15905, 'epoch': 2}
{'type': 'loss', 'content': 0.05711424723267555, 'timestamp': '2025-10-02 00:39:07.242932', 'step': 15906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:07.316729', 'step': 15906, 'epoch': 2}
{'type': 'loss', 'content': 0.10820740461349487, 'timestamp': '2025-10-02 00:39:07.331061', 'step': 15907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:07.387768', 'step': 15907, 'epoch': 2}
{'type': 'loss', 'content': 0.06886481493711472, 'timestamp': '2025-10-02 00:39:07.398625', 'step': 15908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:39:07.469764', 'step': 15908, 'epoch': 2}
{'type': 'loss', 'content': 0.013925221748650074, 'timestamp': '2025-10-02 00:39:07.482749', 'step': 15909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:07.552429', 'step': 15909, 'epoch': 2}
{'type': 'loss', 'content': 0.011904284358024597, 'timestamp': '2025-10-02 00:39:07.555781', 'step': 15910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:07.620233', 'step': 15910, 'epoch': 2}
{'type': 'loss', 'content': 0.005090491846203804, 'timestamp': '2025-10-02 00:39:07.629118', 'step': 15911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:07.687685', 'step': 15911, 'epoch': 2}
{'type': 'loss', 'content': 0.01757047139108181, 'timestamp': '2025-10-02 00:39:07.697516', 'step': 15912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:07.757330', 'step': 15912, 'epoch': 2}
{'type': 'loss', 'content': 0.10753818601369858, 'timestamp': '2025-10-02 00:39:07.762684', 'step': 15913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:07.821647', 'step': 15913, 'epoch': 2}
{'type': 'loss', 'content': 0.11881109327077866, 'timestamp': '2025-10-02 00:39:07.827233', 'step': 15914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:07.888627', 'step': 15914, 'epoch': 2}
{'type': 'loss', 'content': 0.06057858467102051, 'timestamp': '2025-10-02 00:39:07.891853', 'step': 15915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:07.954635', 'step': 15915, 'epoch': 2}
{'type': 'loss', 'content': 0.02771030180156231, 'timestamp': '2025-10-02 00:39:07.964398', 'step': 15916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:08.023977', 'step': 15916, 'epoch': 2}
{'type': 'loss', 'content': 0.08879709988832474, 'timestamp': '2025-10-02 00:39:08.028819', 'step': 15917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:08.089467', 'step': 15917, 'epoch': 2}
{'type': 'loss', 'content': 0.09014308452606201, 'timestamp': '2025-10-02 00:39:08.092250', 'step': 15918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:08.150979', 'step': 15918, 'epoch': 2}
{'type': 'loss', 'content': 0.08168348670005798, 'timestamp': '2025-10-02 00:39:08.167795', 'step': 15919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:08.247760', 'step': 15919, 'epoch': 2}
{'type': 'loss', 'content': 0.0231307502835989, 'timestamp': '2025-10-02 00:39:08.258281', 'step': 15920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:08.318282', 'step': 15920, 'epoch': 2}
{'type': 'loss', 'content': 0.025728432461619377, 'timestamp': '2025-10-02 00:39:08.324680', 'step': 15921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:08.396458', 'step': 15921, 'epoch': 2}
{'type': 'loss', 'content': 0.0947398841381073, 'timestamp': '2025-10-02 00:39:08.403511', 'step': 15922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:08.472525', 'step': 15922, 'epoch': 2}
{'type': 'loss', 'content': 0.05638791248202324, 'timestamp': '2025-10-02 00:39:08.480581', 'step': 15923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:08.552303', 'step': 15923, 'epoch': 2}
{'type': 'loss', 'content': 0.15905135869979858, 'timestamp': '2025-10-02 00:39:08.567877', 'step': 15924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:39:08.647449', 'step': 15924, 'epoch': 2}
{'type': 'loss', 'content': 0.07253442704677582, 'timestamp': '2025-10-02 00:39:08.660463', 'step': 15925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:08.735286', 'step': 15925, 'epoch': 2}
{'type': 'loss', 'content': 0.035190608352422714, 'timestamp': '2025-10-02 00:39:08.748071', 'step': 15926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:08.829165', 'step': 15926, 'epoch': 2}
{'type': 'loss', 'content': 0.015046816319227219, 'timestamp': '2025-10-02 00:39:08.839574', 'step': 15927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:08.913151', 'step': 15927, 'epoch': 2}
{'type': 'loss', 'content': 0.0632760152220726, 'timestamp': '2025-10-02 00:39:08.927009', 'step': 15928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:08.998794', 'step': 15928, 'epoch': 2}
{'type': 'loss', 'content': 0.04186023771762848, 'timestamp': '2025-10-02 00:39:09.002238', 'step': 15929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:09.064252', 'step': 15929, 'epoch': 2}
{'type': 'loss', 'content': 0.018106376752257347, 'timestamp': '2025-10-02 00:39:09.073539', 'step': 15930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:09.135666', 'step': 15930, 'epoch': 2}
{'type': 'loss', 'content': 0.08496436476707458, 'timestamp': '2025-10-02 00:39:09.139571', 'step': 15931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:09.196989', 'step': 15931, 'epoch': 2}
{'type': 'loss', 'content': 0.04558734968304634, 'timestamp': '2025-10-02 00:39:09.210528', 'step': 15932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:09.276596', 'step': 15932, 'epoch': 2}
{'type': 'loss', 'content': 0.025338562205433846, 'timestamp': '2025-10-02 00:39:09.285677', 'step': 15933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:09.356536', 'step': 15933, 'epoch': 2}
{'type': 'loss', 'content': 0.01284173782914877, 'timestamp': '2025-10-02 00:39:09.366974', 'step': 15934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:09.431732', 'step': 15934, 'epoch': 2}
{'type': 'loss', 'content': 0.04625197872519493, 'timestamp': '2025-10-02 00:39:09.434726', 'step': 15935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:09.501767', 'step': 15935, 'epoch': 2}
{'type': 'loss', 'content': 0.12437833100557327, 'timestamp': '2025-10-02 00:39:09.513403', 'step': 15936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:09.577992', 'step': 15936, 'epoch': 2}
{'type': 'loss', 'content': 0.10431491583585739, 'timestamp': '2025-10-02 00:39:09.581792', 'step': 15937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:09.656899', 'step': 15937, 'epoch': 2}
{'type': 'loss', 'content': 0.07035654783248901, 'timestamp': '2025-10-02 00:39:09.667017', 'step': 15938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:09.723348', 'step': 15938, 'epoch': 2}
{'type': 'loss', 'content': 0.04351217299699783, 'timestamp': '2025-10-02 00:39:09.726481', 'step': 15939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:09.782876', 'step': 15939, 'epoch': 2}
{'type': 'loss', 'content': 0.04957231506705284, 'timestamp': '2025-10-02 00:39:09.789174', 'step': 15940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:39:09.858295', 'step': 15940, 'epoch': 2}
{'type': 'loss', 'content': 0.039158694446086884, 'timestamp': '2025-10-02 00:39:09.870010', 'step': 15941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:09.930608', 'step': 15941, 'epoch': 2}
{'type': 'loss', 'content': 0.01516772247850895, 'timestamp': '2025-10-02 00:39:09.940773', 'step': 15942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:10.005383', 'step': 15942, 'epoch': 2}
{'type': 'loss', 'content': 0.07079605013132095, 'timestamp': '2025-10-02 00:39:10.010721', 'step': 15943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:10.079502', 'step': 15943, 'epoch': 2}
{'type': 'loss', 'content': 0.05700716748833656, 'timestamp': '2025-10-02 00:39:10.086469', 'step': 15944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:10.156193', 'step': 15944, 'epoch': 2}
{'type': 'loss', 'content': 0.08156885951757431, 'timestamp': '2025-10-02 00:39:10.162801', 'step': 15945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:10.220353', 'step': 15945, 'epoch': 2}
{'type': 'loss', 'content': 0.1262706220149994, 'timestamp': '2025-10-02 00:39:10.226235', 'step': 15946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:10.285012', 'step': 15946, 'epoch': 2}
{'type': 'loss', 'content': 0.058840565383434296, 'timestamp': '2025-10-02 00:39:10.288509', 'step': 15947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:10.347425', 'step': 15947, 'epoch': 2}
{'type': 'loss', 'content': 0.07141251116991043, 'timestamp': '2025-10-02 00:39:10.354626', 'step': 15948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:10.413572', 'step': 15948, 'epoch': 2}
{'type': 'loss', 'content': 0.06059274077415466, 'timestamp': '2025-10-02 00:39:10.422511', 'step': 15949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:10.483641', 'step': 15949, 'epoch': 2}
{'type': 'loss', 'content': 0.06750762462615967, 'timestamp': '2025-10-02 00:39:10.486443', 'step': 15950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:10.547258', 'step': 15950, 'epoch': 2}
{'type': 'loss', 'content': 0.015453952364623547, 'timestamp': '2025-10-02 00:39:10.556805', 'step': 15951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:10.613553', 'step': 15951, 'epoch': 2}
{'type': 'loss', 'content': 0.05444389581680298, 'timestamp': '2025-10-02 00:39:10.620287', 'step': 15952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:10.676939', 'step': 15952, 'epoch': 2}
{'type': 'loss', 'content': 0.043204184621572495, 'timestamp': '2025-10-02 00:39:10.683039', 'step': 15953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:10.739846', 'step': 15953, 'epoch': 2}
{'type': 'loss', 'content': 0.04633133485913277, 'timestamp': '2025-10-02 00:39:10.744903', 'step': 15954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:10.799857', 'step': 15954, 'epoch': 2}
{'type': 'loss', 'content': 0.027891850098967552, 'timestamp': '2025-10-02 00:39:10.802457', 'step': 15955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:39:10.857265', 'step': 15955, 'epoch': 2}
{'type': 'loss', 'content': 0.06215222179889679, 'timestamp': '2025-10-02 00:39:10.863393', 'step': 15956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:10.922611', 'step': 15956, 'epoch': 2}
{'type': 'loss', 'content': 0.041854213923215866, 'timestamp': '2025-10-02 00:39:10.933535', 'step': 15957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:10.990532', 'step': 15957, 'epoch': 2}
{'type': 'loss', 'content': 0.1168164312839508, 'timestamp': '2025-10-02 00:39:10.992759', 'step': 15958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:39:11.063228', 'step': 15958, 'epoch': 2}
{'type': 'loss', 'content': 0.023100027814507484, 'timestamp': '2025-10-02 00:39:11.075825', 'step': 15959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:11.142210', 'step': 15959, 'epoch': 2}
{'type': 'loss', 'content': 0.018252553418278694, 'timestamp': '2025-10-02 00:39:11.151688', 'step': 15960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:11.231403', 'step': 15960, 'epoch': 2}
{'type': 'loss', 'content': 0.06656697392463684, 'timestamp': '2025-10-02 00:39:11.244747', 'step': 15961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:11.314111', 'step': 15961, 'epoch': 2}
{'type': 'loss', 'content': 0.04144923388957977, 'timestamp': '2025-10-02 00:39:11.317970', 'step': 15962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:11.381634', 'step': 15962, 'epoch': 2}
{'type': 'loss', 'content': 0.05854259058833122, 'timestamp': '2025-10-02 00:39:11.385753', 'step': 15963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:11.447800', 'step': 15963, 'epoch': 2}
{'type': 'loss', 'content': 0.03157402575016022, 'timestamp': '2025-10-02 00:39:11.453749', 'step': 15964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:11.508695', 'step': 15964, 'epoch': 2}
{'type': 'loss', 'content': 0.16137656569480896, 'timestamp': '2025-10-02 00:39:11.513486', 'step': 15965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:11.569626', 'step': 15965, 'epoch': 2}
{'type': 'loss', 'content': 0.031656280159950256, 'timestamp': '2025-10-02 00:39:11.576328', 'step': 15966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:11.636964', 'step': 15966, 'epoch': 2}
{'type': 'loss', 'content': 0.03141908720135689, 'timestamp': '2025-10-02 00:39:11.642146', 'step': 15967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:11.709688', 'step': 15967, 'epoch': 2}
{'type': 'loss', 'content': 0.03530602529644966, 'timestamp': '2025-10-02 00:39:11.719946', 'step': 15968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:11.776944', 'step': 15968, 'epoch': 2}
{'type': 'loss', 'content': 0.05113601312041283, 'timestamp': '2025-10-02 00:39:11.779313', 'step': 15969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:11.841174', 'step': 15969, 'epoch': 2}
{'type': 'loss', 'content': 0.05506989359855652, 'timestamp': '2025-10-02 00:39:11.851629', 'step': 15970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:11.910176', 'step': 15970, 'epoch': 2}
{'type': 'loss', 'content': 0.004761228337883949, 'timestamp': '2025-10-02 00:39:11.916994', 'step': 15971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:11.976010', 'step': 15971, 'epoch': 2}
{'type': 'loss', 'content': 0.037588100880384445, 'timestamp': '2025-10-02 00:39:11.982368', 'step': 15972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:39:12.037734', 'step': 15972, 'epoch': 2}
{'type': 'loss', 'content': 0.12284450978040695, 'timestamp': '2025-10-02 00:39:12.041242', 'step': 15973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:12.098713', 'step': 15973, 'epoch': 2}
{'type': 'loss', 'content': 0.11182001978158951, 'timestamp': '2025-10-02 00:39:12.101150', 'step': 15974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:12.156263', 'step': 15974, 'epoch': 2}
{'type': 'loss', 'content': 0.07024931907653809, 'timestamp': '2025-10-02 00:39:12.160339', 'step': 15975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:12.219041', 'step': 15975, 'epoch': 2}
{'type': 'loss', 'content': 0.033059101551771164, 'timestamp': '2025-10-02 00:39:12.227939', 'step': 15976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:39:12.291765', 'step': 15976, 'epoch': 2}
{'type': 'loss', 'content': 0.038004033267498016, 'timestamp': '2025-10-02 00:39:12.303475', 'step': 15977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:12.360544', 'step': 15977, 'epoch': 2}
{'type': 'loss', 'content': 0.011521347798407078, 'timestamp': '2025-10-02 00:39:12.365989', 'step': 15978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:12.427700', 'step': 15978, 'epoch': 2}
{'type': 'loss', 'content': 0.02355981431901455, 'timestamp': '2025-10-02 00:39:12.437806', 'step': 15979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:12.506361', 'step': 15979, 'epoch': 2}
{'type': 'loss', 'content': 0.08935292065143585, 'timestamp': '2025-10-02 00:39:12.514016', 'step': 15980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:12.578582', 'step': 15980, 'epoch': 2}
{'type': 'loss', 'content': 0.01806487701833248, 'timestamp': '2025-10-02 00:39:12.585360', 'step': 15981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:12.651945', 'step': 15981, 'epoch': 2}
{'type': 'loss', 'content': 0.02208712324500084, 'timestamp': '2025-10-02 00:39:12.658031', 'step': 15982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:12.722913', 'step': 15982, 'epoch': 2}
{'type': 'loss', 'content': 0.0823696106672287, 'timestamp': '2025-10-02 00:39:12.731709', 'step': 15983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:12.791748', 'step': 15983, 'epoch': 2}
{'type': 'loss', 'content': 0.01766905002295971, 'timestamp': '2025-10-02 00:39:12.802072', 'step': 15984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:12.858645', 'step': 15984, 'epoch': 2}
{'type': 'loss', 'content': 0.11613178253173828, 'timestamp': '2025-10-02 00:39:12.864388', 'step': 15985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:12.928378', 'step': 15985, 'epoch': 2}
{'type': 'loss', 'content': 0.05036100000143051, 'timestamp': '2025-10-02 00:39:12.934232', 'step': 15986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:13.000133', 'step': 15986, 'epoch': 2}
{'type': 'loss', 'content': 0.19063808023929596, 'timestamp': '2025-10-02 00:39:13.003598', 'step': 15987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:13.072826', 'step': 15987, 'epoch': 2}
{'type': 'loss', 'content': 0.0912233516573906, 'timestamp': '2025-10-02 00:39:13.080353', 'step': 15988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:13.144327', 'step': 15988, 'epoch': 2}
{'type': 'loss', 'content': 0.010673146694898605, 'timestamp': '2025-10-02 00:39:13.152987', 'step': 15989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:13.220633', 'step': 15989, 'epoch': 2}
{'type': 'loss', 'content': 0.005317324306815863, 'timestamp': '2025-10-02 00:39:13.229427', 'step': 15990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:13.297099', 'step': 15990, 'epoch': 2}
{'type': 'loss', 'content': 0.03493548557162285, 'timestamp': '2025-10-02 00:39:13.302830', 'step': 15991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:13.368844', 'step': 15991, 'epoch': 2}
{'type': 'loss', 'content': 0.02907515875995159, 'timestamp': '2025-10-02 00:39:13.380499', 'step': 15992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:13.447609', 'step': 15992, 'epoch': 2}
{'type': 'loss', 'content': 0.029634617269039154, 'timestamp': '2025-10-02 00:39:13.451891', 'step': 15993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:13.519802', 'step': 15993, 'epoch': 2}
{'type': 'loss', 'content': 0.03859110549092293, 'timestamp': '2025-10-02 00:39:13.526847', 'step': 15994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:13.588385', 'step': 15994, 'epoch': 2}
{'type': 'loss', 'content': 0.032354533672332764, 'timestamp': '2025-10-02 00:39:13.593410', 'step': 15995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:13.655950', 'step': 15995, 'epoch': 2}
{'type': 'loss', 'content': 0.0164373517036438, 'timestamp': '2025-10-02 00:39:13.665283', 'step': 15996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:13.729356', 'step': 15996, 'epoch': 2}
{'type': 'loss', 'content': 0.0687263235449791, 'timestamp': '2025-10-02 00:39:13.740591', 'step': 15997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:13.798922', 'step': 15997, 'epoch': 2}
{'type': 'loss', 'content': 0.11553467065095901, 'timestamp': '2025-10-02 00:39:13.804364', 'step': 15998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:13.870200', 'step': 15998, 'epoch': 2}
{'type': 'loss', 'content': 0.07132408022880554, 'timestamp': '2025-10-02 00:39:13.872955', 'step': 15999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:13.929676', 'step': 15999, 'epoch': 2}
{'type': 'loss', 'content': 0.11012588441371918, 'timestamp': '2025-10-02 00:39:13.936934', 'step': 16000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 16000', 'timestamp': '2025-10-02 00:39:14.362056', 'step': 16000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:14.426584', 'step': 16000, 'epoch': 2}
{'type': 'loss', 'content': 0.03451315686106682, 'timestamp': '2025-10-02 00:39:14.434733', 'step': 16001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:14.494784', 'step': 16001, 'epoch': 2}
{'type': 'loss', 'content': 0.034830402582883835, 'timestamp': '2025-10-02 00:39:14.503296', 'step': 16002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:14.563635', 'step': 16002, 'epoch': 2}
{'type': 'loss', 'content': 0.05255519971251488, 'timestamp': '2025-10-02 00:39:14.566672', 'step': 16003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:14.626119', 'step': 16003, 'epoch': 2}
{'type': 'loss', 'content': 0.10496579110622406, 'timestamp': '2025-10-02 00:39:14.641155', 'step': 16004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:14.711655', 'step': 16004, 'epoch': 2}
{'type': 'loss', 'content': 0.05346737802028656, 'timestamp': '2025-10-02 00:39:14.720443', 'step': 16005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:14.780172', 'step': 16005, 'epoch': 2}
{'type': 'loss', 'content': 0.03827226907014847, 'timestamp': '2025-10-02 00:39:14.784392', 'step': 16006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:14.844385', 'step': 16006, 'epoch': 2}
{'type': 'loss', 'content': 0.010040242224931717, 'timestamp': '2025-10-02 00:39:14.849649', 'step': 16007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:39:14.923739', 'step': 16007, 'epoch': 2}
{'type': 'loss', 'content': 0.01953739859163761, 'timestamp': '2025-10-02 00:39:14.938986', 'step': 16008, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:39:43.262889', 'step': 16008, 'epoch': 2}
{'type': 'pplx', 'content': 102.5225605042916, 'timestamp': '2025-10-02 00:39:43.266860', 'step': 16008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:43.331501', 'step': 16008, 'epoch': 2}
{'type': 'loss', 'content': 0.25219276547431946, 'timestamp': '2025-10-02 00:39:43.341304', 'step': 16009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:43.411504', 'step': 16009, 'epoch': 2}
{'type': 'loss', 'content': 0.07121001183986664, 'timestamp': '2025-10-02 00:39:43.415915', 'step': 16010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:43.476628', 'step': 16010, 'epoch': 2}
{'type': 'loss', 'content': 0.1361542046070099, 'timestamp': '2025-10-02 00:39:43.484457', 'step': 16011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:43.557095', 'step': 16011, 'epoch': 2}
{'type': 'loss', 'content': 0.10542813688516617, 'timestamp': '2025-10-02 00:39:43.564603', 'step': 16012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:43.630611', 'step': 16012, 'epoch': 2}
{'type': 'loss', 'content': 0.09033817797899246, 'timestamp': '2025-10-02 00:39:43.639403', 'step': 16013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:43.718474', 'step': 16013, 'epoch': 2}
{'type': 'loss', 'content': 0.0766223594546318, 'timestamp': '2025-10-02 00:39:43.723955', 'step': 16014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:43.781264', 'step': 16014, 'epoch': 2}
{'type': 'loss', 'content': 0.10927721112966537, 'timestamp': '2025-10-02 00:39:43.785263', 'step': 16015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:43.843993', 'step': 16015, 'epoch': 2}
{'type': 'loss', 'content': 0.10901060700416565, 'timestamp': '2025-10-02 00:39:43.853636', 'step': 16016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:43.907690', 'step': 16016, 'epoch': 2}
{'type': 'loss', 'content': 0.04328104853630066, 'timestamp': '2025-10-02 00:39:43.910277', 'step': 16017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:43.964977', 'step': 16017, 'epoch': 2}
{'type': 'loss', 'content': 0.016243772581219673, 'timestamp': '2025-10-02 00:39:43.967424', 'step': 16018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:39:44.040701', 'step': 16018, 'epoch': 2}
{'type': 'loss', 'content': 0.016469469293951988, 'timestamp': '2025-10-02 00:39:44.054150', 'step': 16019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:44.108829', 'step': 16019, 'epoch': 2}
{'type': 'loss', 'content': 0.059852179139852524, 'timestamp': '2025-10-02 00:39:44.115772', 'step': 16020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:44.176666', 'step': 16020, 'epoch': 2}
{'type': 'loss', 'content': 0.054598841816186905, 'timestamp': '2025-10-02 00:39:44.188219', 'step': 16021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:44.243384', 'step': 16021, 'epoch': 2}
{'type': 'loss', 'content': 0.0594368539750576, 'timestamp': '2025-10-02 00:39:44.248758', 'step': 16022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:44.306134', 'step': 16022, 'epoch': 2}
{'type': 'loss', 'content': 0.06948177516460419, 'timestamp': '2025-10-02 00:39:44.308517', 'step': 16023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:44.363488', 'step': 16023, 'epoch': 2}
{'type': 'loss', 'content': 0.12659460306167603, 'timestamp': '2025-10-02 00:39:44.371301', 'step': 16024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:44.425492', 'step': 16024, 'epoch': 2}
{'type': 'loss', 'content': 0.05333499237895012, 'timestamp': '2025-10-02 00:39:44.427854', 'step': 16025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:44.482862', 'step': 16025, 'epoch': 2}
{'type': 'loss', 'content': 0.04373358562588692, 'timestamp': '2025-10-02 00:39:44.485025', 'step': 16026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:44.539460', 'step': 16026, 'epoch': 2}
{'type': 'loss', 'content': 0.07468489557504654, 'timestamp': '2025-10-02 00:39:44.542227', 'step': 16027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:44.597910', 'step': 16027, 'epoch': 2}
{'type': 'loss', 'content': 0.04633840173482895, 'timestamp': '2025-10-02 00:39:44.604305', 'step': 16028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:44.659483', 'step': 16028, 'epoch': 2}
{'type': 'loss', 'content': 0.07635971903800964, 'timestamp': '2025-10-02 00:39:44.668525', 'step': 16029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:44.724255', 'step': 16029, 'epoch': 2}
{'type': 'loss', 'content': 0.00937030278146267, 'timestamp': '2025-10-02 00:39:44.731050', 'step': 16030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:44.793258', 'step': 16030, 'epoch': 2}
{'type': 'loss', 'content': 0.004214917775243521, 'timestamp': '2025-10-02 00:39:44.803978', 'step': 16031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:44.859480', 'step': 16031, 'epoch': 2}
{'type': 'loss', 'content': 0.057828519493341446, 'timestamp': '2025-10-02 00:39:44.865304', 'step': 16032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:44.920952', 'step': 16032, 'epoch': 2}
{'type': 'loss', 'content': 0.031179945915937424, 'timestamp': '2025-10-02 00:39:44.923587', 'step': 16033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:44.978687', 'step': 16033, 'epoch': 2}
{'type': 'loss', 'content': 0.09292875230312347, 'timestamp': '2025-10-02 00:39:44.982589', 'step': 16034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:39:45.037562', 'step': 16034, 'epoch': 2}
{'type': 'loss', 'content': 0.09620846807956696, 'timestamp': '2025-10-02 00:39:45.040178', 'step': 16035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:45.095527', 'step': 16035, 'epoch': 2}
{'type': 'loss', 'content': 0.053578659892082214, 'timestamp': '2025-10-02 00:39:45.103457', 'step': 16036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:45.158681', 'step': 16036, 'epoch': 2}
{'type': 'loss', 'content': 0.05243013799190521, 'timestamp': '2025-10-02 00:39:45.162891', 'step': 16037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:45.217955', 'step': 16037, 'epoch': 2}
{'type': 'loss', 'content': 0.09773387759923935, 'timestamp': '2025-10-02 00:39:45.224460', 'step': 16038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:39:45.293788', 'step': 16038, 'epoch': 2}
{'type': 'loss', 'content': 0.04106244072318077, 'timestamp': '2025-10-02 00:39:45.305955', 'step': 16039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:45.361976', 'step': 16039, 'epoch': 2}
{'type': 'loss', 'content': 0.059109125286340714, 'timestamp': '2025-10-02 00:39:45.368089', 'step': 16040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:45.422033', 'step': 16040, 'epoch': 2}
{'type': 'loss', 'content': 0.06017327681183815, 'timestamp': '2025-10-02 00:39:45.428979', 'step': 16041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:45.484592', 'step': 16041, 'epoch': 2}
{'type': 'loss', 'content': 0.05770120769739151, 'timestamp': '2025-10-02 00:39:45.487453', 'step': 16042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:45.542873', 'step': 16042, 'epoch': 2}
{'type': 'loss', 'content': 0.044508423656225204, 'timestamp': '2025-10-02 00:39:45.552640', 'step': 16043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:45.607094', 'step': 16043, 'epoch': 2}
{'type': 'loss', 'content': 0.06677141040563583, 'timestamp': '2025-10-02 00:39:45.613775', 'step': 16044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:45.668714', 'step': 16044, 'epoch': 2}
{'type': 'loss', 'content': 0.003235062351450324, 'timestamp': '2025-10-02 00:39:45.677731', 'step': 16045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:45.732169', 'step': 16045, 'epoch': 2}
{'type': 'loss', 'content': 0.10803250223398209, 'timestamp': '2025-10-02 00:39:45.734401', 'step': 16046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:45.789035', 'step': 16046, 'epoch': 2}
{'type': 'loss', 'content': 0.16346944868564606, 'timestamp': '2025-10-02 00:39:45.794372', 'step': 16047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:45.849054', 'step': 16047, 'epoch': 2}
{'type': 'loss', 'content': 0.07718684524297714, 'timestamp': '2025-10-02 00:39:45.855046', 'step': 16048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:45.909299', 'step': 16048, 'epoch': 2}
{'type': 'loss', 'content': 0.04914160072803497, 'timestamp': '2025-10-02 00:39:45.914810', 'step': 16049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:45.971147', 'step': 16049, 'epoch': 2}
{'type': 'loss', 'content': 0.055243637412786484, 'timestamp': '2025-10-02 00:39:45.980888', 'step': 16050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:46.036463', 'step': 16050, 'epoch': 2}
{'type': 'loss', 'content': 0.020595001056790352, 'timestamp': '2025-10-02 00:39:46.042031', 'step': 16051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:46.097614', 'step': 16051, 'epoch': 2}
{'type': 'loss', 'content': 0.05058413743972778, 'timestamp': '2025-10-02 00:39:46.104106', 'step': 16052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:46.164607', 'step': 16052, 'epoch': 2}
{'type': 'loss', 'content': 0.03766854852437973, 'timestamp': '2025-10-02 00:39:46.167685', 'step': 16053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:46.223716', 'step': 16053, 'epoch': 2}
{'type': 'loss', 'content': 0.0955120250582695, 'timestamp': '2025-10-02 00:39:46.226367', 'step': 16054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:46.281927', 'step': 16054, 'epoch': 2}
{'type': 'loss', 'content': 0.008049672469496727, 'timestamp': '2025-10-02 00:39:46.287583', 'step': 16055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:46.342027', 'step': 16055, 'epoch': 2}
{'type': 'loss', 'content': 0.19683696329593658, 'timestamp': '2025-10-02 00:39:46.349119', 'step': 16056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:46.404447', 'step': 16056, 'epoch': 2}
{'type': 'loss', 'content': 0.1343706250190735, 'timestamp': '2025-10-02 00:39:46.407446', 'step': 16057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:46.463702', 'step': 16057, 'epoch': 2}
{'type': 'loss', 'content': 0.06822305172681808, 'timestamp': '2025-10-02 00:39:46.470929', 'step': 16058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:46.525507', 'step': 16058, 'epoch': 2}
{'type': 'loss', 'content': 0.03506125509738922, 'timestamp': '2025-10-02 00:39:46.528528', 'step': 16059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:46.584995', 'step': 16059, 'epoch': 2}
{'type': 'loss', 'content': 0.03829151391983032, 'timestamp': '2025-10-02 00:39:46.591311', 'step': 16060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:46.650023', 'step': 16060, 'epoch': 2}
{'type': 'loss', 'content': 0.009152954444289207, 'timestamp': '2025-10-02 00:39:46.661248', 'step': 16061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:46.716845', 'step': 16061, 'epoch': 2}
{'type': 'loss', 'content': 0.20118606090545654, 'timestamp': '2025-10-02 00:39:46.719521', 'step': 16062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:46.775183', 'step': 16062, 'epoch': 2}
{'type': 'loss', 'content': 0.011569165624678135, 'timestamp': '2025-10-02 00:39:46.778049', 'step': 16063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:39:46.840943', 'step': 16063, 'epoch': 2}
{'type': 'loss', 'content': 0.0317007414996624, 'timestamp': '2025-10-02 00:39:46.852833', 'step': 16064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:46.907589', 'step': 16064, 'epoch': 2}
{'type': 'loss', 'content': 0.19534768164157867, 'timestamp': '2025-10-02 00:39:46.910259', 'step': 16065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:46.965289', 'step': 16065, 'epoch': 2}
{'type': 'loss', 'content': 0.04255640506744385, 'timestamp': '2025-10-02 00:39:46.968057', 'step': 16066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:47.023631', 'step': 16066, 'epoch': 2}
{'type': 'loss', 'content': 0.018392151221632957, 'timestamp': '2025-10-02 00:39:47.026821', 'step': 16067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:47.082262', 'step': 16067, 'epoch': 2}
{'type': 'loss', 'content': 0.03748837485909462, 'timestamp': '2025-10-02 00:39:47.088360', 'step': 16068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:47.142620', 'step': 16068, 'epoch': 2}
{'type': 'loss', 'content': 0.06936000287532806, 'timestamp': '2025-10-02 00:39:47.145382', 'step': 16069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:47.200264', 'step': 16069, 'epoch': 2}
{'type': 'loss', 'content': 0.055922504514455795, 'timestamp': '2025-10-02 00:39:47.202829', 'step': 16070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:47.257141', 'step': 16070, 'epoch': 2}
{'type': 'loss', 'content': 0.05645183473825455, 'timestamp': '2025-10-02 00:39:47.259953', 'step': 16071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:39:47.315774', 'step': 16071, 'epoch': 2}
{'type': 'loss', 'content': 0.08231809735298157, 'timestamp': '2025-10-02 00:39:47.322860', 'step': 16072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:47.377250', 'step': 16072, 'epoch': 2}
{'type': 'loss', 'content': 0.03468531742691994, 'timestamp': '2025-10-02 00:39:47.386523', 'step': 16073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:47.441901', 'step': 16073, 'epoch': 2}
{'type': 'loss', 'content': 0.1301661878824234, 'timestamp': '2025-10-02 00:39:47.445335', 'step': 16074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:47.502084', 'step': 16074, 'epoch': 2}
{'type': 'loss', 'content': 0.07401157915592194, 'timestamp': '2025-10-02 00:39:47.507201', 'step': 16075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:47.564370', 'step': 16075, 'epoch': 2}
{'type': 'loss', 'content': 0.04063158854842186, 'timestamp': '2025-10-02 00:39:47.571940', 'step': 16076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:47.627709', 'step': 16076, 'epoch': 2}
{'type': 'loss', 'content': 0.08890435099601746, 'timestamp': '2025-10-02 00:39:47.631233', 'step': 16077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:47.686748', 'step': 16077, 'epoch': 2}
{'type': 'loss', 'content': 0.10357716679573059, 'timestamp': '2025-10-02 00:39:47.689455', 'step': 16078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:47.744311', 'step': 16078, 'epoch': 2}
{'type': 'loss', 'content': 0.08979789912700653, 'timestamp': '2025-10-02 00:39:47.747178', 'step': 16079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:47.805172', 'step': 16079, 'epoch': 2}
{'type': 'loss', 'content': 0.025974968448281288, 'timestamp': '2025-10-02 00:39:47.811938', 'step': 16080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:47.867911', 'step': 16080, 'epoch': 2}
{'type': 'loss', 'content': 0.058799464255571365, 'timestamp': '2025-10-02 00:39:47.871121', 'step': 16081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:47.928630', 'step': 16081, 'epoch': 2}
{'type': 'loss', 'content': 0.1366281658411026, 'timestamp': '2025-10-02 00:39:47.931898', 'step': 16082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:47.988433', 'step': 16082, 'epoch': 2}
{'type': 'loss', 'content': 0.0536511056125164, 'timestamp': '2025-10-02 00:39:47.998046', 'step': 16083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:39:48.061108', 'step': 16083, 'epoch': 2}
{'type': 'loss', 'content': 0.006753480993211269, 'timestamp': '2025-10-02 00:39:48.072792', 'step': 16084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:39:48.134945', 'step': 16084, 'epoch': 2}
{'type': 'loss', 'content': 0.03614508733153343, 'timestamp': '2025-10-02 00:39:48.146721', 'step': 16085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:48.204787', 'step': 16085, 'epoch': 2}
{'type': 'loss', 'content': 0.09624072164297104, 'timestamp': '2025-10-02 00:39:48.208313', 'step': 16086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:48.266007', 'step': 16086, 'epoch': 2}
{'type': 'loss', 'content': 0.08720923215150833, 'timestamp': '2025-10-02 00:39:48.273640', 'step': 16087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:48.345215', 'step': 16087, 'epoch': 2}
{'type': 'loss', 'content': 0.026481520384550095, 'timestamp': '2025-10-02 00:39:48.355785', 'step': 16088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:48.413513', 'step': 16088, 'epoch': 2}
{'type': 'loss', 'content': 0.05489075556397438, 'timestamp': '2025-10-02 00:39:48.419424', 'step': 16089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:48.475388', 'step': 16089, 'epoch': 2}
{'type': 'loss', 'content': 0.04984251409769058, 'timestamp': '2025-10-02 00:39:48.478498', 'step': 16090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:48.535110', 'step': 16090, 'epoch': 2}
{'type': 'loss', 'content': 0.030086802318692207, 'timestamp': '2025-10-02 00:39:48.540992', 'step': 16091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:48.598840', 'step': 16091, 'epoch': 2}
{'type': 'loss', 'content': 0.01929030567407608, 'timestamp': '2025-10-02 00:39:48.605533', 'step': 16092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:48.661065', 'step': 16092, 'epoch': 2}
{'type': 'loss', 'content': 0.06894395500421524, 'timestamp': '2025-10-02 00:39:48.670395', 'step': 16093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:48.725845', 'step': 16093, 'epoch': 2}
{'type': 'loss', 'content': 0.04641424119472504, 'timestamp': '2025-10-02 00:39:48.731953', 'step': 16094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:48.787168', 'step': 16094, 'epoch': 2}
{'type': 'loss', 'content': 0.08941111713647842, 'timestamp': '2025-10-02 00:39:48.790560', 'step': 16095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:48.847014', 'step': 16095, 'epoch': 2}
{'type': 'loss', 'content': 0.1303817629814148, 'timestamp': '2025-10-02 00:39:48.854054', 'step': 16096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:48.914257', 'step': 16096, 'epoch': 2}
{'type': 'loss', 'content': 0.053197603672742844, 'timestamp': '2025-10-02 00:39:48.924722', 'step': 16097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:48.980895', 'step': 16097, 'epoch': 2}
{'type': 'loss', 'content': 0.12093973159790039, 'timestamp': '2025-10-02 00:39:48.983937', 'step': 16098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:49.042811', 'step': 16098, 'epoch': 2}
{'type': 'loss', 'content': 0.02494528703391552, 'timestamp': '2025-10-02 00:39:49.052256', 'step': 16099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:49.106547', 'step': 16099, 'epoch': 2}
{'type': 'loss', 'content': 0.03841837868094444, 'timestamp': '2025-10-02 00:39:49.115023', 'step': 16100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:49.171885', 'step': 16100, 'epoch': 2}
{'type': 'loss', 'content': 0.039651256054639816, 'timestamp': '2025-10-02 00:39:49.176389', 'step': 16101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:49.230440', 'step': 16101, 'epoch': 2}
{'type': 'loss', 'content': 0.12675032019615173, 'timestamp': '2025-10-02 00:39:49.232949', 'step': 16102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:49.287903', 'step': 16102, 'epoch': 2}
{'type': 'loss', 'content': 0.04998523369431496, 'timestamp': '2025-10-02 00:39:49.297688', 'step': 16103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:49.352727', 'step': 16103, 'epoch': 2}
{'type': 'loss', 'content': 0.07504767924547195, 'timestamp': '2025-10-02 00:39:49.358981', 'step': 16104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:49.412175', 'step': 16104, 'epoch': 2}
{'type': 'loss', 'content': 0.12868374586105347, 'timestamp': '2025-10-02 00:39:49.414931', 'step': 16105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:49.468995', 'step': 16105, 'epoch': 2}
{'type': 'loss', 'content': 0.05691402032971382, 'timestamp': '2025-10-02 00:39:49.471590', 'step': 16106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:49.526074', 'step': 16106, 'epoch': 2}
{'type': 'loss', 'content': 0.02576041966676712, 'timestamp': '2025-10-02 00:39:49.532129', 'step': 16107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:49.587847', 'step': 16107, 'epoch': 2}
{'type': 'loss', 'content': 0.1724541336297989, 'timestamp': '2025-10-02 00:39:49.594038', 'step': 16108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:49.648363', 'step': 16108, 'epoch': 2}
{'type': 'loss', 'content': 0.10814784467220306, 'timestamp': '2025-10-02 00:39:49.651040', 'step': 16109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:49.707082', 'step': 16109, 'epoch': 2}
{'type': 'loss', 'content': 0.01599038764834404, 'timestamp': '2025-10-02 00:39:49.713078', 'step': 16110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:49.767373', 'step': 16110, 'epoch': 2}
{'type': 'loss', 'content': 0.12515632808208466, 'timestamp': '2025-10-02 00:39:49.770539', 'step': 16111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:49.824668', 'step': 16111, 'epoch': 2}
{'type': 'loss', 'content': 0.051702357828617096, 'timestamp': '2025-10-02 00:39:49.831064', 'step': 16112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:49.884623', 'step': 16112, 'epoch': 2}
{'type': 'loss', 'content': 0.0781862884759903, 'timestamp': '2025-10-02 00:39:49.892211', 'step': 16113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:39:49.955363', 'step': 16113, 'epoch': 2}
{'type': 'loss', 'content': 0.05210963264107704, 'timestamp': '2025-10-02 00:39:49.966438', 'step': 16114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:50.027601', 'step': 16114, 'epoch': 2}
{'type': 'loss', 'content': 0.06042422354221344, 'timestamp': '2025-10-02 00:39:50.038332', 'step': 16115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:50.093418', 'step': 16115, 'epoch': 2}
{'type': 'loss', 'content': 0.0634281262755394, 'timestamp': '2025-10-02 00:39:50.100178', 'step': 16116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:50.153452', 'step': 16116, 'epoch': 2}
{'type': 'loss', 'content': 0.09506796300411224, 'timestamp': '2025-10-02 00:39:50.159431', 'step': 16117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:50.214599', 'step': 16117, 'epoch': 2}
{'type': 'loss', 'content': 0.024887200444936752, 'timestamp': '2025-10-02 00:39:50.217137', 'step': 16118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:50.271275', 'step': 16118, 'epoch': 2}
{'type': 'loss', 'content': 0.024693898856639862, 'timestamp': '2025-10-02 00:39:50.273913', 'step': 16119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:50.328189', 'step': 16119, 'epoch': 2}
{'type': 'loss', 'content': 0.01964690536260605, 'timestamp': '2025-10-02 00:39:50.336522', 'step': 16120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:50.390304', 'step': 16120, 'epoch': 2}
{'type': 'loss', 'content': 0.02069607377052307, 'timestamp': '2025-10-02 00:39:50.399914', 'step': 16121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:50.453343', 'step': 16121, 'epoch': 2}
{'type': 'loss', 'content': 0.14991135895252228, 'timestamp': '2025-10-02 00:39:50.456166', 'step': 16122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:50.512136', 'step': 16122, 'epoch': 2}
{'type': 'loss', 'content': 0.07570307701826096, 'timestamp': '2025-10-02 00:39:50.517875', 'step': 16123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:50.572487', 'step': 16123, 'epoch': 2}
{'type': 'loss', 'content': 0.042797766625881195, 'timestamp': '2025-10-02 00:39:50.580707', 'step': 16124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:39:50.641583', 'step': 16124, 'epoch': 2}
{'type': 'loss', 'content': 0.032877709716558456, 'timestamp': '2025-10-02 00:39:50.653579', 'step': 16125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:50.715043', 'step': 16125, 'epoch': 2}
{'type': 'loss', 'content': 0.07458945363759995, 'timestamp': '2025-10-02 00:39:50.725703', 'step': 16126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:50.780679', 'step': 16126, 'epoch': 2}
{'type': 'loss', 'content': 0.11777105927467346, 'timestamp': '2025-10-02 00:39:50.783680', 'step': 16127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:50.838161', 'step': 16127, 'epoch': 2}
{'type': 'loss', 'content': 0.027174750342965126, 'timestamp': '2025-10-02 00:39:50.848286', 'step': 16128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:50.902126', 'step': 16128, 'epoch': 2}
{'type': 'loss', 'content': 0.014917310327291489, 'timestamp': '2025-10-02 00:39:50.905253', 'step': 16129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:50.960746', 'step': 16129, 'epoch': 2}
{'type': 'loss', 'content': 0.03428803011775017, 'timestamp': '2025-10-02 00:39:50.963613', 'step': 16130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:51.018267', 'step': 16130, 'epoch': 2}
{'type': 'loss', 'content': 0.027529729530215263, 'timestamp': '2025-10-02 00:39:51.027887', 'step': 16131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:39:51.095268', 'step': 16131, 'epoch': 2}
{'type': 'loss', 'content': 0.042160339653491974, 'timestamp': '2025-10-02 00:39:51.108294', 'step': 16132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:51.163179', 'step': 16132, 'epoch': 2}
{'type': 'loss', 'content': 0.01096892636269331, 'timestamp': '2025-10-02 00:39:51.165775', 'step': 16133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:51.219508', 'step': 16133, 'epoch': 2}
{'type': 'loss', 'content': 0.038500744849443436, 'timestamp': '2025-10-02 00:39:51.222261', 'step': 16134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:51.277021', 'step': 16134, 'epoch': 2}
{'type': 'loss', 'content': 0.08790113776922226, 'timestamp': '2025-10-02 00:39:51.284461', 'step': 16135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:51.338539', 'step': 16135, 'epoch': 2}
{'type': 'loss', 'content': 0.06045857071876526, 'timestamp': '2025-10-02 00:39:51.344748', 'step': 16136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:51.404776', 'step': 16136, 'epoch': 2}
{'type': 'loss', 'content': 0.012822381220757961, 'timestamp': '2025-10-02 00:39:51.416383', 'step': 16137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:51.470553', 'step': 16137, 'epoch': 2}
{'type': 'loss', 'content': 0.12194881588220596, 'timestamp': '2025-10-02 00:39:51.473120', 'step': 16138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:51.528097', 'step': 16138, 'epoch': 2}
{'type': 'loss', 'content': 0.015765463933348656, 'timestamp': '2025-10-02 00:39:51.530950', 'step': 16139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:51.585436', 'step': 16139, 'epoch': 2}
{'type': 'loss', 'content': 0.08007136732339859, 'timestamp': '2025-10-02 00:39:51.591571', 'step': 16140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:51.645715', 'step': 16140, 'epoch': 2}
{'type': 'loss', 'content': 0.06321191042661667, 'timestamp': '2025-10-02 00:39:51.648369', 'step': 16141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:51.703017', 'step': 16141, 'epoch': 2}
{'type': 'loss', 'content': 0.03756536543369293, 'timestamp': '2025-10-02 00:39:51.705749', 'step': 16142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:51.760709', 'step': 16142, 'epoch': 2}
{'type': 'loss', 'content': 0.05418479070067406, 'timestamp': '2025-10-02 00:39:51.763665', 'step': 16143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:51.818264', 'step': 16143, 'epoch': 2}
{'type': 'loss', 'content': 0.09718822687864304, 'timestamp': '2025-10-02 00:39:51.825334', 'step': 16144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:51.885718', 'step': 16144, 'epoch': 2}
{'type': 'loss', 'content': 0.0213481355458498, 'timestamp': '2025-10-02 00:39:51.897249', 'step': 16145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:51.952089', 'step': 16145, 'epoch': 2}
{'type': 'loss', 'content': 0.017582839354872704, 'timestamp': '2025-10-02 00:39:51.955339', 'step': 16146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:52.009895', 'step': 16146, 'epoch': 2}
{'type': 'loss', 'content': 0.04264247044920921, 'timestamp': '2025-10-02 00:39:52.012578', 'step': 16147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:52.067688', 'step': 16147, 'epoch': 2}
{'type': 'loss', 'content': 0.010323435068130493, 'timestamp': '2025-10-02 00:39:52.078059', 'step': 16148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:52.133507', 'step': 16148, 'epoch': 2}
{'type': 'loss', 'content': 0.05717696249485016, 'timestamp': '2025-10-02 00:39:52.135923', 'step': 16149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:52.190474', 'step': 16149, 'epoch': 2}
{'type': 'loss', 'content': 0.07367435097694397, 'timestamp': '2025-10-02 00:39:52.193139', 'step': 16150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:52.247611', 'step': 16150, 'epoch': 2}
{'type': 'loss', 'content': 0.04290773719549179, 'timestamp': '2025-10-02 00:39:52.250128', 'step': 16151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:52.305317', 'step': 16151, 'epoch': 2}
{'type': 'loss', 'content': 0.03915149345993996, 'timestamp': '2025-10-02 00:39:52.311366', 'step': 16152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:52.365983', 'step': 16152, 'epoch': 2}
{'type': 'loss', 'content': 0.035118069499731064, 'timestamp': '2025-10-02 00:39:52.376439', 'step': 16153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:52.438756', 'step': 16153, 'epoch': 2}
{'type': 'loss', 'content': 0.0030622598715126514, 'timestamp': '2025-10-02 00:39:52.449426', 'step': 16154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:52.503938', 'step': 16154, 'epoch': 2}
{'type': 'loss', 'content': 0.02812127210199833, 'timestamp': '2025-10-02 00:39:52.510050', 'step': 16155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:52.566157', 'step': 16155, 'epoch': 2}
{'type': 'loss', 'content': 0.07190209627151489, 'timestamp': '2025-10-02 00:39:52.576482', 'step': 16156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:52.629666', 'step': 16156, 'epoch': 2}
{'type': 'loss', 'content': 0.06529439985752106, 'timestamp': '2025-10-02 00:39:52.632497', 'step': 16157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:52.686706', 'step': 16157, 'epoch': 2}
{'type': 'loss', 'content': 0.10404829680919647, 'timestamp': '2025-10-02 00:39:52.689564', 'step': 16158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:52.743771', 'step': 16158, 'epoch': 2}
{'type': 'loss', 'content': 0.040326885879039764, 'timestamp': '2025-10-02 00:39:52.746855', 'step': 16159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:52.801057', 'step': 16159, 'epoch': 2}
{'type': 'loss', 'content': 0.0850471556186676, 'timestamp': '2025-10-02 00:39:52.807202', 'step': 16160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:52.863446', 'step': 16160, 'epoch': 2}
{'type': 'loss', 'content': 0.01989266462624073, 'timestamp': '2025-10-02 00:39:52.866121', 'step': 16161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:52.921858', 'step': 16161, 'epoch': 2}
{'type': 'loss', 'content': 0.004374931566417217, 'timestamp': '2025-10-02 00:39:52.924720', 'step': 16162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:52.979364', 'step': 16162, 'epoch': 2}
{'type': 'loss', 'content': 0.058745451271533966, 'timestamp': '2025-10-02 00:39:52.981743', 'step': 16163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:53.035956', 'step': 16163, 'epoch': 2}
{'type': 'loss', 'content': 0.05504175275564194, 'timestamp': '2025-10-02 00:39:53.042310', 'step': 16164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:53.096273', 'step': 16164, 'epoch': 2}
{'type': 'loss', 'content': 0.033470477908849716, 'timestamp': '2025-10-02 00:39:53.098778', 'step': 16165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:53.154267', 'step': 16165, 'epoch': 2}
{'type': 'loss', 'content': 0.059444088488817215, 'timestamp': '2025-10-02 00:39:53.160180', 'step': 16166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:53.215714', 'step': 16166, 'epoch': 2}
{'type': 'loss', 'content': 0.00474247382953763, 'timestamp': '2025-10-02 00:39:53.225070', 'step': 16167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:53.281035', 'step': 16167, 'epoch': 2}
{'type': 'loss', 'content': 0.09415420889854431, 'timestamp': '2025-10-02 00:39:53.287428', 'step': 16168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:53.341694', 'step': 16168, 'epoch': 2}
{'type': 'loss', 'content': 0.03544623777270317, 'timestamp': '2025-10-02 00:39:53.344430', 'step': 16169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:53.398451', 'step': 16169, 'epoch': 2}
{'type': 'loss', 'content': 0.09959309548139572, 'timestamp': '2025-10-02 00:39:53.400809', 'step': 16170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:53.460049', 'step': 16170, 'epoch': 2}
{'type': 'loss', 'content': 0.05447126179933548, 'timestamp': '2025-10-02 00:39:53.470479', 'step': 16171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:53.524608', 'step': 16171, 'epoch': 2}
{'type': 'loss', 'content': 0.05931730568408966, 'timestamp': '2025-10-02 00:39:53.531215', 'step': 16172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:53.585808', 'step': 16172, 'epoch': 2}
{'type': 'loss', 'content': 0.09255754947662354, 'timestamp': '2025-10-02 00:39:53.588492', 'step': 16173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:53.643361', 'step': 16173, 'epoch': 2}
{'type': 'loss', 'content': 0.04470308870077133, 'timestamp': '2025-10-02 00:39:53.646113', 'step': 16174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:53.700013', 'step': 16174, 'epoch': 2}
{'type': 'loss', 'content': 0.10776273906230927, 'timestamp': '2025-10-02 00:39:53.709410', 'step': 16175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:53.764654', 'step': 16175, 'epoch': 2}
{'type': 'loss', 'content': 0.009241648949682713, 'timestamp': '2025-10-02 00:39:53.775065', 'step': 16176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:53.828864', 'step': 16176, 'epoch': 2}
{'type': 'loss', 'content': 0.03294156864285469, 'timestamp': '2025-10-02 00:39:53.839357', 'step': 16177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:53.893645', 'step': 16177, 'epoch': 2}
{'type': 'loss', 'content': 0.09053360670804977, 'timestamp': '2025-10-02 00:39:53.899512', 'step': 16178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:53.954120', 'step': 16178, 'epoch': 2}
{'type': 'loss', 'content': 0.06050945445895195, 'timestamp': '2025-10-02 00:39:53.956503', 'step': 16179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:54.010731', 'step': 16179, 'epoch': 2}
{'type': 'loss', 'content': 0.2293892502784729, 'timestamp': '2025-10-02 00:39:54.017243', 'step': 16180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:54.078284', 'step': 16180, 'epoch': 2}
{'type': 'loss', 'content': 0.019948644563555717, 'timestamp': '2025-10-02 00:39:54.089868', 'step': 16181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:54.148014', 'step': 16181, 'epoch': 2}
{'type': 'loss', 'content': 0.0037989190313965082, 'timestamp': '2025-10-02 00:39:54.157581', 'step': 16182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:54.212452', 'step': 16182, 'epoch': 2}
{'type': 'loss', 'content': 0.10245008766651154, 'timestamp': '2025-10-02 00:39:54.215071', 'step': 16183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:54.269842', 'step': 16183, 'epoch': 2}
{'type': 'loss', 'content': 0.10328798741102219, 'timestamp': '2025-10-02 00:39:54.276090', 'step': 16184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:54.329424', 'step': 16184, 'epoch': 2}
{'type': 'loss', 'content': 0.11893170326948166, 'timestamp': '2025-10-02 00:39:54.335270', 'step': 16185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:54.389861', 'step': 16185, 'epoch': 2}
{'type': 'loss', 'content': 0.0249301940202713, 'timestamp': '2025-10-02 00:39:54.397767', 'step': 16186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:54.459435', 'step': 16186, 'epoch': 2}
{'type': 'loss', 'content': 0.046118371188640594, 'timestamp': '2025-10-02 00:39:54.470191', 'step': 16187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:54.524515', 'step': 16187, 'epoch': 2}
{'type': 'loss', 'content': 0.03141019120812416, 'timestamp': '2025-10-02 00:39:54.530934', 'step': 16188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:54.584282', 'step': 16188, 'epoch': 2}
{'type': 'loss', 'content': 0.02967114746570587, 'timestamp': '2025-10-02 00:39:54.590465', 'step': 16189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:54.644773', 'step': 16189, 'epoch': 2}
{'type': 'loss', 'content': 0.04119381308555603, 'timestamp': '2025-10-02 00:39:54.647305', 'step': 16190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:54.701991', 'step': 16190, 'epoch': 2}
{'type': 'loss', 'content': 0.08221568167209625, 'timestamp': '2025-10-02 00:39:54.704722', 'step': 16191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:39:54.758356', 'step': 16191, 'epoch': 2}
{'type': 'loss', 'content': 0.07729647308588028, 'timestamp': '2025-10-02 00:39:54.764591', 'step': 16192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:54.824779', 'step': 16192, 'epoch': 2}
{'type': 'loss', 'content': 0.008699431084096432, 'timestamp': '2025-10-02 00:39:54.836038', 'step': 16193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:54.891115', 'step': 16193, 'epoch': 2}
{'type': 'loss', 'content': 0.13479603826999664, 'timestamp': '2025-10-02 00:39:54.895304', 'step': 16194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:54.949543', 'step': 16194, 'epoch': 2}
{'type': 'loss', 'content': 0.1153242215514183, 'timestamp': '2025-10-02 00:39:54.954044', 'step': 16195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:55.008254', 'step': 16195, 'epoch': 2}
{'type': 'loss', 'content': 0.06774353235960007, 'timestamp': '2025-10-02 00:39:55.014468', 'step': 16196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:55.068592', 'step': 16196, 'epoch': 2}
{'type': 'loss', 'content': 0.02001439593732357, 'timestamp': '2025-10-02 00:39:55.078268', 'step': 16197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:55.132423', 'step': 16197, 'epoch': 2}
{'type': 'loss', 'content': 0.03259458765387535, 'timestamp': '2025-10-02 00:39:55.138624', 'step': 16198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:55.192704', 'step': 16198, 'epoch': 2}
{'type': 'loss', 'content': 0.12935085594654083, 'timestamp': '2025-10-02 00:39:55.197038', 'step': 16199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:55.251172', 'step': 16199, 'epoch': 2}
{'type': 'loss', 'content': 0.06687995791435242, 'timestamp': '2025-10-02 00:39:55.257737', 'step': 16200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:39:55.324254', 'step': 16200, 'epoch': 2}
{'type': 'loss', 'content': 0.03589855507016182, 'timestamp': '2025-10-02 00:39:55.337904', 'step': 16201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:55.392265', 'step': 16201, 'epoch': 2}
{'type': 'loss', 'content': 0.09208472073078156, 'timestamp': '2025-10-02 00:39:55.394723', 'step': 16202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:55.456258', 'step': 16202, 'epoch': 2}
{'type': 'loss', 'content': 0.029470006003975868, 'timestamp': '2025-10-02 00:39:55.466918', 'step': 16203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:55.521667', 'step': 16203, 'epoch': 2}
{'type': 'loss', 'content': 0.05784828960895538, 'timestamp': '2025-10-02 00:39:55.528652', 'step': 16204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:55.582145', 'step': 16204, 'epoch': 2}
{'type': 'loss', 'content': 0.09972015768289566, 'timestamp': '2025-10-02 00:39:55.584770', 'step': 16205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:55.638944', 'step': 16205, 'epoch': 2}
{'type': 'loss', 'content': 0.09698328375816345, 'timestamp': '2025-10-02 00:39:55.641609', 'step': 16206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:55.698260', 'step': 16206, 'epoch': 2}
{'type': 'loss', 'content': 0.055028825998306274, 'timestamp': '2025-10-02 00:39:55.700915', 'step': 16207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:55.755747', 'step': 16207, 'epoch': 2}
{'type': 'loss', 'content': 0.08793652802705765, 'timestamp': '2025-10-02 00:39:55.761918', 'step': 16208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:55.816020', 'step': 16208, 'epoch': 2}
{'type': 'loss', 'content': 0.0675646960735321, 'timestamp': '2025-10-02 00:39:55.818855', 'step': 16209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:55.876943', 'step': 16209, 'epoch': 2}
{'type': 'loss', 'content': 0.11945026367902756, 'timestamp': '2025-10-02 00:39:55.879366', 'step': 16210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:55.940354', 'step': 16210, 'epoch': 2}
{'type': 'loss', 'content': 0.0316770114004612, 'timestamp': '2025-10-02 00:39:55.944560', 'step': 16211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:56.020762', 'step': 16211, 'epoch': 2}
{'type': 'loss', 'content': 0.09027750045061111, 'timestamp': '2025-10-02 00:39:56.031852', 'step': 16212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:56.103101', 'step': 16212, 'epoch': 2}
{'type': 'loss', 'content': 0.03254419192671776, 'timestamp': '2025-10-02 00:39:56.114506', 'step': 16213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:56.180089', 'step': 16213, 'epoch': 2}
{'type': 'loss', 'content': 0.07619521766901016, 'timestamp': '2025-10-02 00:39:56.184246', 'step': 16214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:56.244274', 'step': 16214, 'epoch': 2}
{'type': 'loss', 'content': 0.04237471893429756, 'timestamp': '2025-10-02 00:39:56.250566', 'step': 16215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:56.321875', 'step': 16215, 'epoch': 2}
{'type': 'loss', 'content': 0.021282115951180458, 'timestamp': '2025-10-02 00:39:56.333093', 'step': 16216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:56.390355', 'step': 16216, 'epoch': 2}
{'type': 'loss', 'content': 0.023854708299040794, 'timestamp': '2025-10-02 00:39:56.398900', 'step': 16217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:56.454378', 'step': 16217, 'epoch': 2}
{'type': 'loss', 'content': 0.20022200047969818, 'timestamp': '2025-10-02 00:39:56.460770', 'step': 16218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:56.517801', 'step': 16218, 'epoch': 2}
{'type': 'loss', 'content': 0.03763050585985184, 'timestamp': '2025-10-02 00:39:56.524119', 'step': 16219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:56.588934', 'step': 16219, 'epoch': 2}
{'type': 'loss', 'content': 0.05554560571908951, 'timestamp': '2025-10-02 00:39:56.595177', 'step': 16220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:56.654422', 'step': 16220, 'epoch': 2}
{'type': 'loss', 'content': 0.07612688094377518, 'timestamp': '2025-10-02 00:39:56.657636', 'step': 16221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:56.720994', 'step': 16221, 'epoch': 2}
{'type': 'loss', 'content': 0.027491124346852303, 'timestamp': '2025-10-02 00:39:56.723676', 'step': 16222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:39:56.792498', 'step': 16222, 'epoch': 2}
{'type': 'loss', 'content': 0.04076271876692772, 'timestamp': '2025-10-02 00:39:56.805126', 'step': 16223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:39:56.861056', 'step': 16223, 'epoch': 2}
{'type': 'loss', 'content': 0.07440919429063797, 'timestamp': '2025-10-02 00:39:56.867653', 'step': 16224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:56.926985', 'step': 16224, 'epoch': 2}
{'type': 'loss', 'content': 0.06396400183439255, 'timestamp': '2025-10-02 00:39:56.953795', 'step': 16225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:57.016673', 'step': 16225, 'epoch': 2}
{'type': 'loss', 'content': 0.04667755588889122, 'timestamp': '2025-10-02 00:39:57.032660', 'step': 16226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:57.101174', 'step': 16226, 'epoch': 2}
{'type': 'loss', 'content': 0.054894473403692245, 'timestamp': '2025-10-02 00:39:57.104111', 'step': 16227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:57.158026', 'step': 16227, 'epoch': 2}
{'type': 'loss', 'content': 0.1776237189769745, 'timestamp': '2025-10-02 00:39:57.164229', 'step': 16228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:57.222049', 'step': 16228, 'epoch': 2}
{'type': 'loss', 'content': 0.08315328508615494, 'timestamp': '2025-10-02 00:39:57.224656', 'step': 16229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:57.278835', 'step': 16229, 'epoch': 2}
{'type': 'loss', 'content': 0.06732668727636337, 'timestamp': '2025-10-02 00:39:57.281628', 'step': 16230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:57.336358', 'step': 16230, 'epoch': 2}
{'type': 'loss', 'content': 0.03555261343717575, 'timestamp': '2025-10-02 00:39:57.339103', 'step': 16231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:57.392561', 'step': 16231, 'epoch': 2}
{'type': 'loss', 'content': 0.17486809194087982, 'timestamp': '2025-10-02 00:39:57.398424', 'step': 16232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:57.452502', 'step': 16232, 'epoch': 2}
{'type': 'loss', 'content': 0.11177579313516617, 'timestamp': '2025-10-02 00:39:57.454885', 'step': 16233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:39:57.509443', 'step': 16233, 'epoch': 2}
{'type': 'loss', 'content': 0.05090609937906265, 'timestamp': '2025-10-02 00:39:57.515561', 'step': 16234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:57.569659', 'step': 16234, 'epoch': 2}
{'type': 'loss', 'content': 0.17532974481582642, 'timestamp': '2025-10-02 00:39:57.572501', 'step': 16235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:57.627611', 'step': 16235, 'epoch': 2}
{'type': 'loss', 'content': 0.06261730939149857, 'timestamp': '2025-10-02 00:39:57.636256', 'step': 16236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:57.690263', 'step': 16236, 'epoch': 2}
{'type': 'loss', 'content': 0.04275674372911453, 'timestamp': '2025-10-02 00:39:57.692627', 'step': 16237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:57.745874', 'step': 16237, 'epoch': 2}
{'type': 'loss', 'content': 0.10982933640480042, 'timestamp': '2025-10-02 00:39:57.748398', 'step': 16238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:39:57.802814', 'step': 16238, 'epoch': 2}
{'type': 'loss', 'content': 0.022319607436656952, 'timestamp': '2025-10-02 00:39:57.812408', 'step': 16239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:57.866551', 'step': 16239, 'epoch': 2}
{'type': 'loss', 'content': 0.06928405165672302, 'timestamp': '2025-10-02 00:39:57.873256', 'step': 16240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:57.927304', 'step': 16240, 'epoch': 2}
{'type': 'loss', 'content': 0.04751206189393997, 'timestamp': '2025-10-02 00:39:57.929908', 'step': 16241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:39:57.984033', 'step': 16241, 'epoch': 2}
{'type': 'loss', 'content': 0.06130913272500038, 'timestamp': '2025-10-02 00:39:57.991880', 'step': 16242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:58.045993', 'step': 16242, 'epoch': 2}
{'type': 'loss', 'content': 0.10555889457464218, 'timestamp': '2025-10-02 00:39:58.048645', 'step': 16243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:58.102626', 'step': 16243, 'epoch': 2}
{'type': 'loss', 'content': 0.10646787285804749, 'timestamp': '2025-10-02 00:39:58.109046', 'step': 16244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:58.163271', 'step': 16244, 'epoch': 2}
{'type': 'loss', 'content': 0.05693768337368965, 'timestamp': '2025-10-02 00:39:58.165830', 'step': 16245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:58.219497', 'step': 16245, 'epoch': 2}
{'type': 'loss', 'content': 0.08356510102748871, 'timestamp': '2025-10-02 00:39:58.222237', 'step': 16246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:39:58.275986', 'step': 16246, 'epoch': 2}
{'type': 'loss', 'content': 0.11807627230882645, 'timestamp': '2025-10-02 00:39:58.278491', 'step': 16247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:58.332811', 'step': 16247, 'epoch': 2}
{'type': 'loss', 'content': 0.07909510284662247, 'timestamp': '2025-10-02 00:39:58.338939', 'step': 16248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:58.399145', 'step': 16248, 'epoch': 2}
{'type': 'loss', 'content': 0.0216488279402256, 'timestamp': '2025-10-02 00:39:58.410630', 'step': 16249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:58.465214', 'step': 16249, 'epoch': 2}
{'type': 'loss', 'content': 0.19251997768878937, 'timestamp': '2025-10-02 00:39:58.468149', 'step': 16250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:58.522101', 'step': 16250, 'epoch': 2}
{'type': 'loss', 'content': 0.20529043674468994, 'timestamp': '2025-10-02 00:39:58.524983', 'step': 16251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:58.579475', 'step': 16251, 'epoch': 2}
{'type': 'loss', 'content': 0.11226318776607513, 'timestamp': '2025-10-02 00:39:58.585988', 'step': 16252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:58.639024', 'step': 16252, 'epoch': 2}
{'type': 'loss', 'content': 0.0973939374089241, 'timestamp': '2025-10-02 00:39:58.642037', 'step': 16253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:58.704675', 'step': 16253, 'epoch': 2}
{'type': 'loss', 'content': 0.039706550538539886, 'timestamp': '2025-10-02 00:39:58.715456', 'step': 16254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:39:58.770092', 'step': 16254, 'epoch': 2}
{'type': 'loss', 'content': 0.05333683267235756, 'timestamp': '2025-10-02 00:39:58.772879', 'step': 16255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:39:58.827486', 'step': 16255, 'epoch': 2}
{'type': 'loss', 'content': 0.06474253535270691, 'timestamp': '2025-10-02 00:39:58.833710', 'step': 16256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:39:58.894180', 'step': 16256, 'epoch': 2}
{'type': 'loss', 'content': 0.026839913800358772, 'timestamp': '2025-10-02 00:39:58.905774', 'step': 16257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:58.964731', 'step': 16257, 'epoch': 2}
{'type': 'loss', 'content': 0.012484954670071602, 'timestamp': '2025-10-02 00:39:58.974542', 'step': 16258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:39:59.029534', 'step': 16258, 'epoch': 2}
{'type': 'loss', 'content': 0.14770278334617615, 'timestamp': '2025-10-02 00:39:59.032386', 'step': 16259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:59.088268', 'step': 16259, 'epoch': 2}
{'type': 'loss', 'content': 0.05367748439311981, 'timestamp': '2025-10-02 00:39:59.094853', 'step': 16260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:59.152723', 'step': 16260, 'epoch': 2}
{'type': 'loss', 'content': 0.036740366369485855, 'timestamp': '2025-10-02 00:39:59.163950', 'step': 16261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:39:59.221671', 'step': 16261, 'epoch': 2}
{'type': 'loss', 'content': 0.05093495547771454, 'timestamp': '2025-10-02 00:39:59.231492', 'step': 16262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:39:59.287890', 'step': 16262, 'epoch': 2}
{'type': 'loss', 'content': 0.07914558798074722, 'timestamp': '2025-10-02 00:39:59.290824', 'step': 16263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:39:59.344932', 'step': 16263, 'epoch': 2}
{'type': 'loss', 'content': 0.07687952369451523, 'timestamp': '2025-10-02 00:39:59.351416', 'step': 16264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:59.404798', 'step': 16264, 'epoch': 2}
{'type': 'loss', 'content': 0.20091506838798523, 'timestamp': '2025-10-02 00:39:59.407725', 'step': 16265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:59.467402', 'step': 16265, 'epoch': 2}
{'type': 'loss', 'content': 0.002630780450999737, 'timestamp': '2025-10-02 00:39:59.477879', 'step': 16266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:39:59.537602', 'step': 16266, 'epoch': 2}
{'type': 'loss', 'content': 0.014509215019643307, 'timestamp': '2025-10-02 00:39:59.548048', 'step': 16267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:39:59.605027', 'step': 16267, 'epoch': 2}
{'type': 'loss', 'content': 0.045874837785959244, 'timestamp': '2025-10-02 00:39:59.611449', 'step': 16268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:39:59.687708', 'step': 16268, 'epoch': 2}
{'type': 'loss', 'content': 0.03264511749148369, 'timestamp': '2025-10-02 00:39:59.703079', 'step': 16269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:59.757632', 'step': 16269, 'epoch': 2}
{'type': 'loss', 'content': 0.05334208160638809, 'timestamp': '2025-10-02 00:39:59.760312', 'step': 16270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:39:59.815256', 'step': 16270, 'epoch': 2}
{'type': 'loss', 'content': 0.07090897858142853, 'timestamp': '2025-10-02 00:39:59.817665', 'step': 16271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:39:59.872389', 'step': 16271, 'epoch': 2}
{'type': 'loss', 'content': 0.052455492317676544, 'timestamp': '2025-10-02 00:39:59.880259', 'step': 16272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:39:59.935193', 'step': 16272, 'epoch': 2}
{'type': 'loss', 'content': 0.042345479130744934, 'timestamp': '2025-10-02 00:39:59.938091', 'step': 16273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:39:59.993138', 'step': 16273, 'epoch': 2}
{'type': 'loss', 'content': 0.019365891814231873, 'timestamp': '2025-10-02 00:39:59.996872', 'step': 16274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:00.054135', 'step': 16274, 'epoch': 2}
{'type': 'loss', 'content': 0.021648988127708435, 'timestamp': '2025-10-02 00:40:00.060339', 'step': 16275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:00.115684', 'step': 16275, 'epoch': 2}
{'type': 'loss', 'content': 0.07697127014398575, 'timestamp': '2025-10-02 00:40:00.121937', 'step': 16276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:00.179606', 'step': 16276, 'epoch': 2}
{'type': 'loss', 'content': 0.029645808041095734, 'timestamp': '2025-10-02 00:40:00.189429', 'step': 16277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:00.244120', 'step': 16277, 'epoch': 2}
{'type': 'loss', 'content': 0.08780437707901001, 'timestamp': '2025-10-02 00:40:00.247501', 'step': 16278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:00.305074', 'step': 16278, 'epoch': 2}
{'type': 'loss', 'content': 0.07602334767580032, 'timestamp': '2025-10-02 00:40:00.310643', 'step': 16279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:00.367648', 'step': 16279, 'epoch': 2}
{'type': 'loss', 'content': 0.09887953847646713, 'timestamp': '2025-10-02 00:40:00.376388', 'step': 16280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:00.434489', 'step': 16280, 'epoch': 2}
{'type': 'loss', 'content': 0.060327935963869095, 'timestamp': '2025-10-02 00:40:00.444464', 'step': 16281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:40:00.509162', 'step': 16281, 'epoch': 2}
{'type': 'loss', 'content': 0.04439229518175125, 'timestamp': '2025-10-02 00:40:00.520079', 'step': 16282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:00.575423', 'step': 16282, 'epoch': 2}
{'type': 'loss', 'content': 0.16735601425170898, 'timestamp': '2025-10-02 00:40:00.578141', 'step': 16283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:00.636247', 'step': 16283, 'epoch': 2}
{'type': 'loss', 'content': 0.046649347990751266, 'timestamp': '2025-10-02 00:40:00.646521', 'step': 16284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:00.702391', 'step': 16284, 'epoch': 2}
{'type': 'loss', 'content': 0.05382484570145607, 'timestamp': '2025-10-02 00:40:00.705708', 'step': 16285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:00.763776', 'step': 16285, 'epoch': 2}
{'type': 'loss', 'content': 0.07888209819793701, 'timestamp': '2025-10-02 00:40:00.766495', 'step': 16286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:00.821831', 'step': 16286, 'epoch': 2}
{'type': 'loss', 'content': 0.0546288900077343, 'timestamp': '2025-10-02 00:40:00.831616', 'step': 16287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:00.889329', 'step': 16287, 'epoch': 2}
{'type': 'loss', 'content': 0.08534111082553864, 'timestamp': '2025-10-02 00:40:00.896129', 'step': 16288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:00.951870', 'step': 16288, 'epoch': 2}
{'type': 'loss', 'content': 0.04494267702102661, 'timestamp': '2025-10-02 00:40:00.955589', 'step': 16289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:40:01.020072', 'step': 16289, 'epoch': 2}
{'type': 'loss', 'content': 0.07105638831853867, 'timestamp': '2025-10-02 00:40:01.031137', 'step': 16290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:01.090241', 'step': 16290, 'epoch': 2}
{'type': 'loss', 'content': 0.06946605443954468, 'timestamp': '2025-10-02 00:40:01.093424', 'step': 16291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:01.149258', 'step': 16291, 'epoch': 2}
{'type': 'loss', 'content': 0.04618160426616669, 'timestamp': '2025-10-02 00:40:01.156198', 'step': 16292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:01.211873', 'step': 16292, 'epoch': 2}
{'type': 'loss', 'content': 0.09122897684574127, 'timestamp': '2025-10-02 00:40:01.220311', 'step': 16293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:01.276149', 'step': 16293, 'epoch': 2}
{'type': 'loss', 'content': 0.060526035726070404, 'timestamp': '2025-10-02 00:40:01.285620', 'step': 16294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:40:01.342769', 'step': 16294, 'epoch': 2}
{'type': 'loss', 'content': 0.047038860619068146, 'timestamp': '2025-10-02 00:40:01.345741', 'step': 16295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:01.401365', 'step': 16295, 'epoch': 2}
{'type': 'loss', 'content': 0.015653390437364578, 'timestamp': '2025-10-02 00:40:01.408642', 'step': 16296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:01.465753', 'step': 16296, 'epoch': 2}
{'type': 'loss', 'content': 0.030799251049757004, 'timestamp': '2025-10-02 00:40:01.473425', 'step': 16297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:01.529541', 'step': 16297, 'epoch': 2}
{'type': 'loss', 'content': 0.027884213253855705, 'timestamp': '2025-10-02 00:40:01.532395', 'step': 16298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:01.587495', 'step': 16298, 'epoch': 2}
{'type': 'loss', 'content': 0.03415915369987488, 'timestamp': '2025-10-02 00:40:01.590748', 'step': 16299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:01.651428', 'step': 16299, 'epoch': 2}
{'type': 'loss', 'content': 0.11743851006031036, 'timestamp': '2025-10-02 00:40:01.658228', 'step': 16300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:01.711788', 'step': 16300, 'epoch': 2}
{'type': 'loss', 'content': 0.053580451756715775, 'timestamp': '2025-10-02 00:40:01.714703', 'step': 16301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:40:01.777719', 'step': 16301, 'epoch': 2}
{'type': 'loss', 'content': 0.03583759069442749, 'timestamp': '2025-10-02 00:40:01.788636', 'step': 16302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:01.843414', 'step': 16302, 'epoch': 2}
{'type': 'loss', 'content': 0.04950693994760513, 'timestamp': '2025-10-02 00:40:01.846216', 'step': 16303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:01.901251', 'step': 16303, 'epoch': 2}
{'type': 'loss', 'content': 0.060147035866975784, 'timestamp': '2025-10-02 00:40:01.907211', 'step': 16304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:01.961006', 'step': 16304, 'epoch': 2}
{'type': 'loss', 'content': 0.023302141577005386, 'timestamp': '2025-10-02 00:40:01.968573', 'step': 16305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:02.024872', 'step': 16305, 'epoch': 2}
{'type': 'loss', 'content': 0.011386170983314514, 'timestamp': '2025-10-02 00:40:02.027335', 'step': 16306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:02.083348', 'step': 16306, 'epoch': 2}
{'type': 'loss', 'content': 0.05580563470721245, 'timestamp': '2025-10-02 00:40:02.086516', 'step': 16307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:40:02.141373', 'step': 16307, 'epoch': 2}
{'type': 'loss', 'content': 0.046650271862745285, 'timestamp': '2025-10-02 00:40:02.147857', 'step': 16308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:02.202627', 'step': 16308, 'epoch': 2}
{'type': 'loss', 'content': 0.034160103648900986, 'timestamp': '2025-10-02 00:40:02.210179', 'step': 16309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:02.266515', 'step': 16309, 'epoch': 2}
{'type': 'loss', 'content': 0.1350536048412323, 'timestamp': '2025-10-02 00:40:02.269641', 'step': 16310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:02.324418', 'step': 16310, 'epoch': 2}
{'type': 'loss', 'content': 0.02598094753921032, 'timestamp': '2025-10-02 00:40:02.332048', 'step': 16311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:02.387367', 'step': 16311, 'epoch': 2}
{'type': 'loss', 'content': 0.06323467195034027, 'timestamp': '2025-10-02 00:40:02.393652', 'step': 16312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:02.448022', 'step': 16312, 'epoch': 2}
{'type': 'loss', 'content': 0.07889369130134583, 'timestamp': '2025-10-02 00:40:02.451415', 'step': 16313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:40:02.514869', 'step': 16313, 'epoch': 2}
{'type': 'loss', 'content': 0.04689379036426544, 'timestamp': '2025-10-02 00:40:02.525772', 'step': 16314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:02.583038', 'step': 16314, 'epoch': 2}
{'type': 'loss', 'content': 0.034211110323667526, 'timestamp': '2025-10-02 00:40:02.585824', 'step': 16315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:02.642133', 'step': 16315, 'epoch': 2}
{'type': 'loss', 'content': 0.038138728588819504, 'timestamp': '2025-10-02 00:40:02.648992', 'step': 16316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:02.703738', 'step': 16316, 'epoch': 2}
{'type': 'loss', 'content': 0.05502123013138771, 'timestamp': '2025-10-02 00:40:02.714127', 'step': 16317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:02.769452', 'step': 16317, 'epoch': 2}
{'type': 'loss', 'content': 0.0634642094373703, 'timestamp': '2025-10-02 00:40:02.772115', 'step': 16318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:02.829193', 'step': 16318, 'epoch': 2}
{'type': 'loss', 'content': 0.01009612251073122, 'timestamp': '2025-10-02 00:40:02.836612', 'step': 16319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:02.892576', 'step': 16319, 'epoch': 2}
{'type': 'loss', 'content': 0.014113066717982292, 'timestamp': '2025-10-02 00:40:02.900755', 'step': 16320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:02.954393', 'step': 16320, 'epoch': 2}
{'type': 'loss', 'content': 0.0979747548699379, 'timestamp': '2025-10-02 00:40:02.956858', 'step': 16321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:03.011260', 'step': 16321, 'epoch': 2}
{'type': 'loss', 'content': 0.09300686419010162, 'timestamp': '2025-10-02 00:40:03.014273', 'step': 16322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:03.069040', 'step': 16322, 'epoch': 2}
{'type': 'loss', 'content': 0.009304521605372429, 'timestamp': '2025-10-02 00:40:03.071531', 'step': 16323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:03.125977', 'step': 16323, 'epoch': 2}
{'type': 'loss', 'content': 0.02286827191710472, 'timestamp': '2025-10-02 00:40:03.134121', 'step': 16324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:03.188046', 'step': 16324, 'epoch': 2}
{'type': 'loss', 'content': 0.06808583438396454, 'timestamp': '2025-10-02 00:40:03.190551', 'step': 16325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:03.245222', 'step': 16325, 'epoch': 2}
{'type': 'loss', 'content': 0.055964574217796326, 'timestamp': '2025-10-02 00:40:03.247903', 'step': 16326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:03.303496', 'step': 16326, 'epoch': 2}
{'type': 'loss', 'content': 0.06872741132974625, 'timestamp': '2025-10-02 00:40:03.306121', 'step': 16327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:03.360422', 'step': 16327, 'epoch': 2}
{'type': 'loss', 'content': 0.08473710715770721, 'timestamp': '2025-10-02 00:40:03.366921', 'step': 16328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:03.422420', 'step': 16328, 'epoch': 2}
{'type': 'loss', 'content': 0.037321023643016815, 'timestamp': '2025-10-02 00:40:03.425460', 'step': 16329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:40:03.479915', 'step': 16329, 'epoch': 2}
{'type': 'loss', 'content': 0.06061975285410881, 'timestamp': '2025-10-02 00:40:03.483238', 'step': 16330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:03.538092', 'step': 16330, 'epoch': 2}
{'type': 'loss', 'content': 0.09379880875349045, 'timestamp': '2025-10-02 00:40:03.540782', 'step': 16331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:03.594828', 'step': 16331, 'epoch': 2}
{'type': 'loss', 'content': 0.20888447761535645, 'timestamp': '2025-10-02 00:40:03.601006', 'step': 16332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:03.654861', 'step': 16332, 'epoch': 2}
{'type': 'loss', 'content': 0.02863394469022751, 'timestamp': '2025-10-02 00:40:03.662455', 'step': 16333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:03.720060', 'step': 16333, 'epoch': 2}
{'type': 'loss', 'content': 0.0456104502081871, 'timestamp': '2025-10-02 00:40:03.722763', 'step': 16334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:03.777913', 'step': 16334, 'epoch': 2}
{'type': 'loss', 'content': 0.11217845231294632, 'timestamp': '2025-10-02 00:40:03.780798', 'step': 16335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:03.835838', 'step': 16335, 'epoch': 2}
{'type': 'loss', 'content': 0.061646878719329834, 'timestamp': '2025-10-02 00:40:03.842144', 'step': 16336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:03.896679', 'step': 16336, 'epoch': 2}
{'type': 'loss', 'content': 0.04132251441478729, 'timestamp': '2025-10-02 00:40:03.902598', 'step': 16337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:03.957294', 'step': 16337, 'epoch': 2}
{'type': 'loss', 'content': 0.04779241606593132, 'timestamp': '2025-10-02 00:40:03.959791', 'step': 16338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:04.014128', 'step': 16338, 'epoch': 2}
{'type': 'loss', 'content': 0.13846099376678467, 'timestamp': '2025-10-02 00:40:04.016840', 'step': 16339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:04.072153', 'step': 16339, 'epoch': 2}
{'type': 'loss', 'content': 0.051613517105579376, 'timestamp': '2025-10-02 00:40:04.080423', 'step': 16340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:04.135147', 'step': 16340, 'epoch': 2}
{'type': 'loss', 'content': 0.10910329222679138, 'timestamp': '2025-10-02 00:40:04.137579', 'step': 16341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:04.192375', 'step': 16341, 'epoch': 2}
{'type': 'loss', 'content': 0.06339580565690994, 'timestamp': '2025-10-02 00:40:04.199979', 'step': 16342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:04.254381', 'step': 16342, 'epoch': 2}
{'type': 'loss', 'content': 0.06168847903609276, 'timestamp': '2025-10-02 00:40:04.263801', 'step': 16343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:04.318618', 'step': 16343, 'epoch': 2}
{'type': 'loss', 'content': 0.057564251124858856, 'timestamp': '2025-10-02 00:40:04.328690', 'step': 16344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:04.383025', 'step': 16344, 'epoch': 2}
{'type': 'loss', 'content': 0.08458707481622696, 'timestamp': '2025-10-02 00:40:04.385368', 'step': 16345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:04.439905', 'step': 16345, 'epoch': 2}
{'type': 'loss', 'content': 0.024289321154356003, 'timestamp': '2025-10-02 00:40:04.442513', 'step': 16346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:04.497583', 'step': 16346, 'epoch': 2}
{'type': 'loss', 'content': 0.06969136744737625, 'timestamp': '2025-10-02 00:40:04.499885', 'step': 16347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:04.554552', 'step': 16347, 'epoch': 2}
{'type': 'loss', 'content': 0.06482924520969391, 'timestamp': '2025-10-02 00:40:04.564850', 'step': 16348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:04.618711', 'step': 16348, 'epoch': 2}
{'type': 'loss', 'content': 0.028004882857203484, 'timestamp': '2025-10-02 00:40:04.627117', 'step': 16349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:04.686885', 'step': 16349, 'epoch': 2}
{'type': 'loss', 'content': 0.039913360029459, 'timestamp': '2025-10-02 00:40:04.689815', 'step': 16350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:04.744521', 'step': 16350, 'epoch': 2}
{'type': 'loss', 'content': 0.04771531745791435, 'timestamp': '2025-10-02 00:40:04.750547', 'step': 16351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:04.813841', 'step': 16351, 'epoch': 2}
{'type': 'loss', 'content': 0.10697005689144135, 'timestamp': '2025-10-02 00:40:04.822063', 'step': 16352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:04.876278', 'step': 16352, 'epoch': 2}
{'type': 'loss', 'content': 0.0012169515248388052, 'timestamp': '2025-10-02 00:40:04.881060', 'step': 16353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:40:04.957211', 'step': 16353, 'epoch': 2}
{'type': 'loss', 'content': 0.006180710159242153, 'timestamp': '2025-10-02 00:40:04.969806', 'step': 16354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:05.026425', 'step': 16354, 'epoch': 2}
{'type': 'loss', 'content': 0.0782601460814476, 'timestamp': '2025-10-02 00:40:05.029039', 'step': 16355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:40:05.090197', 'step': 16355, 'epoch': 2}
{'type': 'loss', 'content': 0.02028854750096798, 'timestamp': '2025-10-02 00:40:05.101623', 'step': 16356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:40:05.161203', 'step': 16356, 'epoch': 2}
{'type': 'loss', 'content': 0.014189056120812893, 'timestamp': '2025-10-02 00:40:05.172714', 'step': 16357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:05.227992', 'step': 16357, 'epoch': 2}
{'type': 'loss', 'content': 0.0655108317732811, 'timestamp': '2025-10-02 00:40:05.231500', 'step': 16358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:05.287423', 'step': 16358, 'epoch': 2}
{'type': 'loss', 'content': 0.1265125572681427, 'timestamp': '2025-10-02 00:40:05.290129', 'step': 16359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:40:05.358595', 'step': 16359, 'epoch': 2}
{'type': 'loss', 'content': 0.015965212136507034, 'timestamp': '2025-10-02 00:40:05.371910', 'step': 16360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:05.426511', 'step': 16360, 'epoch': 2}
{'type': 'loss', 'content': 0.11692410707473755, 'timestamp': '2025-10-02 00:40:05.429389', 'step': 16361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:05.484812', 'step': 16361, 'epoch': 2}
{'type': 'loss', 'content': 0.06179971620440483, 'timestamp': '2025-10-02 00:40:05.487343', 'step': 16362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:05.543780', 'step': 16362, 'epoch': 2}
{'type': 'loss', 'content': 0.09043343365192413, 'timestamp': '2025-10-02 00:40:05.546516', 'step': 16363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:05.601925', 'step': 16363, 'epoch': 2}
{'type': 'loss', 'content': 0.1820332407951355, 'timestamp': '2025-10-02 00:40:05.608109', 'step': 16364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:05.661692', 'step': 16364, 'epoch': 2}
{'type': 'loss', 'content': 0.06699078530073166, 'timestamp': '2025-10-02 00:40:05.672160', 'step': 16365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:05.727682', 'step': 16365, 'epoch': 2}
{'type': 'loss', 'content': 0.1603444367647171, 'timestamp': '2025-10-02 00:40:05.730229', 'step': 16366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:05.785432', 'step': 16366, 'epoch': 2}
{'type': 'loss', 'content': 0.02292984165251255, 'timestamp': '2025-10-02 00:40:05.794958', 'step': 16367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:05.849365', 'step': 16367, 'epoch': 2}
{'type': 'loss', 'content': 0.032758403569459915, 'timestamp': '2025-10-02 00:40:05.855421', 'step': 16368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:05.908998', 'step': 16368, 'epoch': 2}
{'type': 'loss', 'content': 0.03721551224589348, 'timestamp': '2025-10-02 00:40:05.914779', 'step': 16369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:05.973587', 'step': 16369, 'epoch': 2}
{'type': 'loss', 'content': 0.05439668148756027, 'timestamp': '2025-10-02 00:40:05.984033', 'step': 16370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:06.039958', 'step': 16370, 'epoch': 2}
{'type': 'loss', 'content': 0.0553024485707283, 'timestamp': '2025-10-02 00:40:06.042646', 'step': 16371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:06.098143', 'step': 16371, 'epoch': 2}
{'type': 'loss', 'content': 0.02714265137910843, 'timestamp': '2025-10-02 00:40:06.104836', 'step': 16372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:06.159611', 'step': 16372, 'epoch': 2}
{'type': 'loss', 'content': 0.03326922655105591, 'timestamp': '2025-10-02 00:40:06.162158', 'step': 16373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:06.216427', 'step': 16373, 'epoch': 2}
{'type': 'loss', 'content': 0.05385170876979828, 'timestamp': '2025-10-02 00:40:06.219246', 'step': 16374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:06.274383', 'step': 16374, 'epoch': 2}
{'type': 'loss', 'content': 0.04703732952475548, 'timestamp': '2025-10-02 00:40:06.276945', 'step': 16375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:06.332702', 'step': 16375, 'epoch': 2}
{'type': 'loss', 'content': 0.09363369643688202, 'timestamp': '2025-10-02 00:40:06.339363', 'step': 16376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:06.395593', 'step': 16376, 'epoch': 2}
{'type': 'loss', 'content': 0.07899344712495804, 'timestamp': '2025-10-02 00:40:06.403283', 'step': 16377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:06.458421', 'step': 16377, 'epoch': 2}
{'type': 'loss', 'content': 0.07168340682983398, 'timestamp': '2025-10-02 00:40:06.461323', 'step': 16378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:06.516927', 'step': 16378, 'epoch': 2}
{'type': 'loss', 'content': 0.04475191608071327, 'timestamp': '2025-10-02 00:40:06.519515', 'step': 16379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:40:06.580963', 'step': 16379, 'epoch': 2}
{'type': 'loss', 'content': 0.020383819937705994, 'timestamp': '2025-10-02 00:40:06.592502', 'step': 16380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:06.646474', 'step': 16380, 'epoch': 2}
{'type': 'loss', 'content': 0.027552319690585136, 'timestamp': '2025-10-02 00:40:06.655839', 'step': 16381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:06.711605', 'step': 16381, 'epoch': 2}
{'type': 'loss', 'content': 0.03062576986849308, 'timestamp': '2025-10-02 00:40:06.717665', 'step': 16382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:06.772415', 'step': 16382, 'epoch': 2}
{'type': 'loss', 'content': 0.15433812141418457, 'timestamp': '2025-10-02 00:40:06.775057', 'step': 16383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:06.829602', 'step': 16383, 'epoch': 2}
{'type': 'loss', 'content': 0.11538135260343552, 'timestamp': '2025-10-02 00:40:06.835865', 'step': 16384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:06.890372', 'step': 16384, 'epoch': 2}
{'type': 'loss', 'content': 0.04529166594147682, 'timestamp': '2025-10-02 00:40:06.896268', 'step': 16385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:06.955142', 'step': 16385, 'epoch': 2}
{'type': 'loss', 'content': 0.022920764982700348, 'timestamp': '2025-10-02 00:40:06.965554', 'step': 16386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:07.020745', 'step': 16386, 'epoch': 2}
{'type': 'loss', 'content': 0.037521835416555405, 'timestamp': '2025-10-02 00:40:07.023761', 'step': 16387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:07.077757', 'step': 16387, 'epoch': 2}
{'type': 'loss', 'content': 0.14509741961956024, 'timestamp': '2025-10-02 00:40:07.083832', 'step': 16388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:07.137991', 'step': 16388, 'epoch': 2}
{'type': 'loss', 'content': 0.09602327644824982, 'timestamp': '2025-10-02 00:40:07.140508', 'step': 16389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:07.194543', 'step': 16389, 'epoch': 2}
{'type': 'loss', 'content': 0.22798505425453186, 'timestamp': '2025-10-02 00:40:07.197040', 'step': 16390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:07.254334', 'step': 16390, 'epoch': 2}
{'type': 'loss', 'content': 0.06471969932317734, 'timestamp': '2025-10-02 00:40:07.264062', 'step': 16391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:07.318367', 'step': 16391, 'epoch': 2}
{'type': 'loss', 'content': 0.11330097913742065, 'timestamp': '2025-10-02 00:40:07.324687', 'step': 16392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:07.378788', 'step': 16392, 'epoch': 2}
{'type': 'loss', 'content': 0.047810912132263184, 'timestamp': '2025-10-02 00:40:07.381570', 'step': 16393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:07.441411', 'step': 16393, 'epoch': 2}
{'type': 'loss', 'content': 0.003490985371172428, 'timestamp': '2025-10-02 00:40:07.451845', 'step': 16394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:07.511109', 'step': 16394, 'epoch': 2}
{'type': 'loss', 'content': 0.029003383591771126, 'timestamp': '2025-10-02 00:40:07.521549', 'step': 16395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:07.577416', 'step': 16395, 'epoch': 2}
{'type': 'loss', 'content': 0.01347280852496624, 'timestamp': '2025-10-02 00:40:07.587624', 'step': 16396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:07.641289', 'step': 16396, 'epoch': 2}
{'type': 'loss', 'content': 0.11142996698617935, 'timestamp': '2025-10-02 00:40:07.643802', 'step': 16397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:07.698505', 'step': 16397, 'epoch': 2}
{'type': 'loss', 'content': 0.10208724439144135, 'timestamp': '2025-10-02 00:40:07.704363', 'step': 16398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:40:07.759600', 'step': 16398, 'epoch': 2}
{'type': 'loss', 'content': 0.07786300778388977, 'timestamp': '2025-10-02 00:40:07.762300', 'step': 16399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:07.816077', 'step': 16399, 'epoch': 2}
{'type': 'loss', 'content': 0.05233198031783104, 'timestamp': '2025-10-02 00:40:07.822607', 'step': 16400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:07.876348', 'step': 16400, 'epoch': 2}
{'type': 'loss', 'content': 0.04287730157375336, 'timestamp': '2025-10-02 00:40:07.879218', 'step': 16401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:07.933574', 'step': 16401, 'epoch': 2}
{'type': 'loss', 'content': 0.07282959669828415, 'timestamp': '2025-10-02 00:40:07.936499', 'step': 16402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:07.991614', 'step': 16402, 'epoch': 2}
{'type': 'loss', 'content': 0.025302274152636528, 'timestamp': '2025-10-02 00:40:07.997485', 'step': 16403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:08.051696', 'step': 16403, 'epoch': 2}
{'type': 'loss', 'content': 0.12645652890205383, 'timestamp': '2025-10-02 00:40:08.058583', 'step': 16404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:08.112863', 'step': 16404, 'epoch': 2}
{'type': 'loss', 'content': 0.19489648938179016, 'timestamp': '2025-10-02 00:40:08.115467', 'step': 16405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:08.169261', 'step': 16405, 'epoch': 2}
{'type': 'loss', 'content': 0.0723377987742424, 'timestamp': '2025-10-02 00:40:08.172305', 'step': 16406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:08.226578', 'step': 16406, 'epoch': 2}
{'type': 'loss', 'content': 0.11979871243238449, 'timestamp': '2025-10-02 00:40:08.229387', 'step': 16407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:08.283929', 'step': 16407, 'epoch': 2}
{'type': 'loss', 'content': 0.03505545109510422, 'timestamp': '2025-10-02 00:40:08.290203', 'step': 16408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:40:08.352276', 'step': 16408, 'epoch': 2}
{'type': 'loss', 'content': 0.014717173762619495, 'timestamp': '2025-10-02 00:40:08.363802', 'step': 16409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:08.418716', 'step': 16409, 'epoch': 2}
{'type': 'loss', 'content': 0.11651334911584854, 'timestamp': '2025-10-02 00:40:08.421127', 'step': 16410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:08.475648', 'step': 16410, 'epoch': 2}
{'type': 'loss', 'content': 0.0814172551035881, 'timestamp': '2025-10-02 00:40:08.478802', 'step': 16411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:08.536572', 'step': 16411, 'epoch': 2}
{'type': 'loss', 'content': 0.07432916760444641, 'timestamp': '2025-10-02 00:40:08.543465', 'step': 16412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:40:08.616884', 'step': 16412, 'epoch': 2}
{'type': 'loss', 'content': 0.027624130249023438, 'timestamp': '2025-10-02 00:40:08.631522', 'step': 16413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:08.688671', 'step': 16413, 'epoch': 2}
{'type': 'loss', 'content': 0.07916226983070374, 'timestamp': '2025-10-02 00:40:08.696159', 'step': 16414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:08.768394', 'step': 16414, 'epoch': 2}
{'type': 'loss', 'content': 0.09211298078298569, 'timestamp': '2025-10-02 00:40:08.778746', 'step': 16415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:08.834610', 'step': 16415, 'epoch': 2}
{'type': 'loss', 'content': 0.08858802914619446, 'timestamp': '2025-10-02 00:40:08.845183', 'step': 16416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:08.901626', 'step': 16416, 'epoch': 2}
{'type': 'loss', 'content': 0.043182309716939926, 'timestamp': '2025-10-02 00:40:08.906439', 'step': 16417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:08.961932', 'step': 16417, 'epoch': 2}
{'type': 'loss', 'content': 0.07516701519489288, 'timestamp': '2025-10-02 00:40:08.964454', 'step': 16418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:09.021163', 'step': 16418, 'epoch': 2}
{'type': 'loss', 'content': 0.04219719395041466, 'timestamp': '2025-10-02 00:40:09.024422', 'step': 16419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:09.080800', 'step': 16419, 'epoch': 2}
{'type': 'loss', 'content': 0.10101388394832611, 'timestamp': '2025-10-02 00:40:09.087766', 'step': 16420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:09.142919', 'step': 16420, 'epoch': 2}
{'type': 'loss', 'content': 0.04002334177494049, 'timestamp': '2025-10-02 00:40:09.146140', 'step': 16421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:09.201941', 'step': 16421, 'epoch': 2}
{'type': 'loss', 'content': 0.015788143500685692, 'timestamp': '2025-10-02 00:40:09.209505', 'step': 16422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:09.264440', 'step': 16422, 'epoch': 2}
{'type': 'loss', 'content': 0.10574449598789215, 'timestamp': '2025-10-02 00:40:09.266855', 'step': 16423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:09.324134', 'step': 16423, 'epoch': 2}
{'type': 'loss', 'content': 0.053460102528333664, 'timestamp': '2025-10-02 00:40:09.331051', 'step': 16424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:09.387206', 'step': 16424, 'epoch': 2}
{'type': 'loss', 'content': 0.05078207701444626, 'timestamp': '2025-10-02 00:40:09.390417', 'step': 16425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:09.446450', 'step': 16425, 'epoch': 2}
{'type': 'loss', 'content': 0.07431664317846298, 'timestamp': '2025-10-02 00:40:09.449912', 'step': 16426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:09.510422', 'step': 16426, 'epoch': 2}
{'type': 'loss', 'content': 0.07230056822299957, 'timestamp': '2025-10-02 00:40:09.512955', 'step': 16427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:09.569730', 'step': 16427, 'epoch': 2}
{'type': 'loss', 'content': 0.09747990220785141, 'timestamp': '2025-10-02 00:40:09.576108', 'step': 16428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:09.632422', 'step': 16428, 'epoch': 2}
{'type': 'loss', 'content': 0.03451164439320564, 'timestamp': '2025-10-02 00:40:09.636096', 'step': 16429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:09.692916', 'step': 16429, 'epoch': 2}
{'type': 'loss', 'content': 0.02181980386376381, 'timestamp': '2025-10-02 00:40:09.700485', 'step': 16430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:09.757833', 'step': 16430, 'epoch': 2}
{'type': 'loss', 'content': 0.02337523363530636, 'timestamp': '2025-10-02 00:40:09.763526', 'step': 16431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:09.820013', 'step': 16431, 'epoch': 2}
{'type': 'loss', 'content': 0.10034399479627609, 'timestamp': '2025-10-02 00:40:09.828337', 'step': 16432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:09.883835', 'step': 16432, 'epoch': 2}
{'type': 'loss', 'content': 0.03523467853665352, 'timestamp': '2025-10-02 00:40:09.887380', 'step': 16433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:09.944063', 'step': 16433, 'epoch': 2}
{'type': 'loss', 'content': 0.08163468539714813, 'timestamp': '2025-10-02 00:40:09.947292', 'step': 16434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:10.004020', 'step': 16434, 'epoch': 2}
{'type': 'loss', 'content': 0.0771956741809845, 'timestamp': '2025-10-02 00:40:10.009803', 'step': 16435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:10.066779', 'step': 16435, 'epoch': 2}
{'type': 'loss', 'content': 0.1130550429224968, 'timestamp': '2025-10-02 00:40:10.073703', 'step': 16436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:10.130165', 'step': 16436, 'epoch': 2}
{'type': 'loss', 'content': 0.060345735400915146, 'timestamp': '2025-10-02 00:40:10.133028', 'step': 16437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:10.188401', 'step': 16437, 'epoch': 2}
{'type': 'loss', 'content': 0.1908649355173111, 'timestamp': '2025-10-02 00:40:10.191098', 'step': 16438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:10.246802', 'step': 16438, 'epoch': 2}
{'type': 'loss', 'content': 0.19488543272018433, 'timestamp': '2025-10-02 00:40:10.256535', 'step': 16439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:10.312005', 'step': 16439, 'epoch': 2}
{'type': 'loss', 'content': 0.051838718354701996, 'timestamp': '2025-10-02 00:40:10.320395', 'step': 16440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:10.375951', 'step': 16440, 'epoch': 2}
{'type': 'loss', 'content': 0.08120904117822647, 'timestamp': '2025-10-02 00:40:10.381844', 'step': 16441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:10.436340', 'step': 16441, 'epoch': 2}
{'type': 'loss', 'content': 0.06058017164468765, 'timestamp': '2025-10-02 00:40:10.443893', 'step': 16442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:10.502865', 'step': 16442, 'epoch': 2}
{'type': 'loss', 'content': 0.04409383237361908, 'timestamp': '2025-10-02 00:40:10.513328', 'step': 16443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:10.567501', 'step': 16443, 'epoch': 2}
{'type': 'loss', 'content': 0.06081576272845268, 'timestamp': '2025-10-02 00:40:10.574066', 'step': 16444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:10.628149', 'step': 16444, 'epoch': 2}
{'type': 'loss', 'content': 0.09741393476724625, 'timestamp': '2025-10-02 00:40:10.630825', 'step': 16445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:10.686152', 'step': 16445, 'epoch': 2}
{'type': 'loss', 'content': 0.03810752183198929, 'timestamp': '2025-10-02 00:40:10.693745', 'step': 16446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:10.754591', 'step': 16446, 'epoch': 2}
{'type': 'loss', 'content': 0.0074480571784079075, 'timestamp': '2025-10-02 00:40:10.765035', 'step': 16447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:10.820011', 'step': 16447, 'epoch': 2}
{'type': 'loss', 'content': 0.08294505625963211, 'timestamp': '2025-10-02 00:40:10.826332', 'step': 16448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:40:10.894244', 'step': 16448, 'epoch': 2}
{'type': 'loss', 'content': 0.0314866378903389, 'timestamp': '2025-10-02 00:40:10.908028', 'step': 16449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:10.962540', 'step': 16449, 'epoch': 2}
{'type': 'loss', 'content': 0.036308277398347855, 'timestamp': '2025-10-02 00:40:10.965069', 'step': 16450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:11.019674', 'step': 16450, 'epoch': 2}
{'type': 'loss', 'content': 0.10446204245090485, 'timestamp': '2025-10-02 00:40:11.022043', 'step': 16451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:11.077546', 'step': 16451, 'epoch': 2}
{'type': 'loss', 'content': 0.04035617783665657, 'timestamp': '2025-10-02 00:40:11.084078', 'step': 16452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:11.138754', 'step': 16452, 'epoch': 2}
{'type': 'loss', 'content': 0.028399717062711716, 'timestamp': '2025-10-02 00:40:11.141689', 'step': 16453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:40:11.195994', 'step': 16453, 'epoch': 2}
{'type': 'loss', 'content': 0.06998660415410995, 'timestamp': '2025-10-02 00:40:11.198569', 'step': 16454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:11.254365', 'step': 16454, 'epoch': 2}
{'type': 'loss', 'content': 0.06398391723632812, 'timestamp': '2025-10-02 00:40:11.257315', 'step': 16455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:11.311930', 'step': 16455, 'epoch': 2}
{'type': 'loss', 'content': 0.08418472111225128, 'timestamp': '2025-10-02 00:40:11.320803', 'step': 16456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:40:11.384526', 'step': 16456, 'epoch': 2}
{'type': 'loss', 'content': 0.1332026869058609, 'timestamp': '2025-10-02 00:40:11.387655', 'step': 16457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:11.442643', 'step': 16457, 'epoch': 2}
{'type': 'loss', 'content': 0.018949853256344795, 'timestamp': '2025-10-02 00:40:11.445608', 'step': 16458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:11.499725', 'step': 16458, 'epoch': 2}
{'type': 'loss', 'content': 0.10930489003658295, 'timestamp': '2025-10-02 00:40:11.502427', 'step': 16459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:11.558368', 'step': 16459, 'epoch': 2}
{'type': 'loss', 'content': 0.1073974221944809, 'timestamp': '2025-10-02 00:40:11.568877', 'step': 16460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:11.623661', 'step': 16460, 'epoch': 2}
{'type': 'loss', 'content': 0.018724171444773674, 'timestamp': '2025-10-02 00:40:11.634175', 'step': 16461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:11.690327', 'step': 16461, 'epoch': 2}
{'type': 'loss', 'content': 0.017133234068751335, 'timestamp': '2025-10-02 00:40:11.692944', 'step': 16462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:11.747309', 'step': 16462, 'epoch': 2}
{'type': 'loss', 'content': 0.08043774217367172, 'timestamp': '2025-10-02 00:40:11.750662', 'step': 16463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:40:11.812819', 'step': 16463, 'epoch': 2}
{'type': 'loss', 'content': 0.030091188848018646, 'timestamp': '2025-10-02 00:40:11.824501', 'step': 16464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:11.885955', 'step': 16464, 'epoch': 2}
{'type': 'loss', 'content': 0.13640525937080383, 'timestamp': '2025-10-02 00:40:11.889856', 'step': 16465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:11.945519', 'step': 16465, 'epoch': 2}
{'type': 'loss', 'content': 0.04209292680025101, 'timestamp': '2025-10-02 00:40:11.952749', 'step': 16466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:12.009739', 'step': 16466, 'epoch': 2}
{'type': 'loss', 'content': 0.027896635234355927, 'timestamp': '2025-10-02 00:40:12.015296', 'step': 16467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:12.076702', 'step': 16467, 'epoch': 2}
{'type': 'loss', 'content': 0.09755079448223114, 'timestamp': '2025-10-02 00:40:12.086296', 'step': 16468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:12.153532', 'step': 16468, 'epoch': 2}
{'type': 'loss', 'content': 0.027484068647027016, 'timestamp': '2025-10-02 00:40:12.165942', 'step': 16469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:12.226524', 'step': 16469, 'epoch': 2}
{'type': 'loss', 'content': 0.08691456913948059, 'timestamp': '2025-10-02 00:40:12.229304', 'step': 16470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:12.284541', 'step': 16470, 'epoch': 2}
{'type': 'loss', 'content': 0.03261306509375572, 'timestamp': '2025-10-02 00:40:12.287321', 'step': 16471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:12.341268', 'step': 16471, 'epoch': 2}
{'type': 'loss', 'content': 0.1411169320344925, 'timestamp': '2025-10-02 00:40:12.348151', 'step': 16472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:12.402777', 'step': 16472, 'epoch': 2}
{'type': 'loss', 'content': 0.07626059651374817, 'timestamp': '2025-10-02 00:40:12.405755', 'step': 16473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:12.464703', 'step': 16473, 'epoch': 2}
{'type': 'loss', 'content': 0.0651848241686821, 'timestamp': '2025-10-02 00:40:12.475141', 'step': 16474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:12.530828', 'step': 16474, 'epoch': 2}
{'type': 'loss', 'content': 0.07345729321241379, 'timestamp': '2025-10-02 00:40:12.533792', 'step': 16475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:12.588437', 'step': 16475, 'epoch': 2}
{'type': 'loss', 'content': 0.06068270653486252, 'timestamp': '2025-10-02 00:40:12.595274', 'step': 16476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:12.651054', 'step': 16476, 'epoch': 2}
{'type': 'loss', 'content': 0.052479375153779984, 'timestamp': '2025-10-02 00:40:12.654047', 'step': 16477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:12.708066', 'step': 16477, 'epoch': 2}
{'type': 'loss', 'content': 0.08640935271978378, 'timestamp': '2025-10-02 00:40:12.710614', 'step': 16478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:12.765285', 'step': 16478, 'epoch': 2}
{'type': 'loss', 'content': 0.05586695298552513, 'timestamp': '2025-10-02 00:40:12.767963', 'step': 16479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:12.823942', 'step': 16479, 'epoch': 2}
{'type': 'loss', 'content': 0.19698750972747803, 'timestamp': '2025-10-02 00:40:12.830718', 'step': 16480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:12.885358', 'step': 16480, 'epoch': 2}
{'type': 'loss', 'content': 0.06844443082809448, 'timestamp': '2025-10-02 00:40:12.895816', 'step': 16481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:12.950943', 'step': 16481, 'epoch': 2}
{'type': 'loss', 'content': 0.09978778660297394, 'timestamp': '2025-10-02 00:40:12.953485', 'step': 16482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:13.008263', 'step': 16482, 'epoch': 2}
{'type': 'loss', 'content': 0.036109838634729385, 'timestamp': '2025-10-02 00:40:13.011144', 'step': 16483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:13.064552', 'step': 16483, 'epoch': 2}
{'type': 'loss', 'content': 0.053988322615623474, 'timestamp': '2025-10-02 00:40:13.071460', 'step': 16484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:13.126041', 'step': 16484, 'epoch': 2}
{'type': 'loss', 'content': 0.1540716141462326, 'timestamp': '2025-10-02 00:40:13.128883', 'step': 16485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:13.184381', 'step': 16485, 'epoch': 2}
{'type': 'loss', 'content': 0.1226896345615387, 'timestamp': '2025-10-02 00:40:13.187004', 'step': 16486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:13.246413', 'step': 16486, 'epoch': 2}
{'type': 'loss', 'content': 0.09073484688997269, 'timestamp': '2025-10-02 00:40:13.256837', 'step': 16487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:13.313223', 'step': 16487, 'epoch': 2}
{'type': 'loss', 'content': 0.20553262531757355, 'timestamp': '2025-10-02 00:40:13.319892', 'step': 16488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:13.373863', 'step': 16488, 'epoch': 2}
{'type': 'loss', 'content': 0.053399696946144104, 'timestamp': '2025-10-02 00:40:13.379672', 'step': 16489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:13.434667', 'step': 16489, 'epoch': 2}
{'type': 'loss', 'content': 0.04339974373579025, 'timestamp': '2025-10-02 00:40:13.437409', 'step': 16490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:13.493050', 'step': 16490, 'epoch': 2}
{'type': 'loss', 'content': 0.02983623370528221, 'timestamp': '2025-10-02 00:40:13.502862', 'step': 16491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:13.561653', 'step': 16491, 'epoch': 2}
{'type': 'loss', 'content': 0.06024209037423134, 'timestamp': '2025-10-02 00:40:13.572874', 'step': 16492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:13.627428', 'step': 16492, 'epoch': 2}
{'type': 'loss', 'content': 0.060294970870018005, 'timestamp': '2025-10-02 00:40:13.630155', 'step': 16493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:13.684780', 'step': 16493, 'epoch': 2}
{'type': 'loss', 'content': 0.1346176117658615, 'timestamp': '2025-10-02 00:40:13.687409', 'step': 16494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:13.742183', 'step': 16494, 'epoch': 2}
{'type': 'loss', 'content': 0.05137762054800987, 'timestamp': '2025-10-02 00:40:13.744829', 'step': 16495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:40:13.814379', 'step': 16495, 'epoch': 2}
{'type': 'loss', 'content': 0.00746337091550231, 'timestamp': '2025-10-02 00:40:13.827864', 'step': 16496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:13.881619', 'step': 16496, 'epoch': 2}
{'type': 'loss', 'content': 0.0560554638504982, 'timestamp': '2025-10-02 00:40:13.884552', 'step': 16497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:13.939990', 'step': 16497, 'epoch': 2}
{'type': 'loss', 'content': 0.034936465322971344, 'timestamp': '2025-10-02 00:40:13.942884', 'step': 16498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:14.001644', 'step': 16498, 'epoch': 2}
{'type': 'loss', 'content': 0.024022288620471954, 'timestamp': '2025-10-02 00:40:14.012011', 'step': 16499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:14.067232', 'step': 16499, 'epoch': 2}
{'type': 'loss', 'content': 0.15050430595874786, 'timestamp': '2025-10-02 00:40:14.073581', 'step': 16500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 16500', 'timestamp': '2025-10-02 00:40:14.793670', 'step': 16500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:14.852722', 'step': 16500, 'epoch': 2}
{'type': 'loss', 'content': 0.15123704075813293, 'timestamp': '2025-10-02 00:40:14.856082', 'step': 16501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:14.911923', 'step': 16501, 'epoch': 2}
{'type': 'loss', 'content': 0.0922626480460167, 'timestamp': '2025-10-02 00:40:14.918411', 'step': 16502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:40:14.980379', 'step': 16502, 'epoch': 2}
{'type': 'loss', 'content': 0.016458652913570404, 'timestamp': '2025-10-02 00:40:14.991193', 'step': 16503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:15.046209', 'step': 16503, 'epoch': 2}
{'type': 'loss', 'content': 0.10076224058866501, 'timestamp': '2025-10-02 00:40:15.053110', 'step': 16504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:15.107255', 'step': 16504, 'epoch': 2}
{'type': 'loss', 'content': 0.027512501925230026, 'timestamp': '2025-10-02 00:40:15.114681', 'step': 16505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:15.170904', 'step': 16505, 'epoch': 2}
{'type': 'loss', 'content': 0.08625227212905884, 'timestamp': '2025-10-02 00:40:15.174356', 'step': 16506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:15.230408', 'step': 16506, 'epoch': 2}
{'type': 'loss', 'content': 0.05876292288303375, 'timestamp': '2025-10-02 00:40:15.237622', 'step': 16507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:15.292375', 'step': 16507, 'epoch': 2}
{'type': 'loss', 'content': 0.12223012000322342, 'timestamp': '2025-10-02 00:40:15.299400', 'step': 16508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:15.354862', 'step': 16508, 'epoch': 2}
{'type': 'loss', 'content': 0.13989347219467163, 'timestamp': '2025-10-02 00:40:15.357668', 'step': 16509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:15.412207', 'step': 16509, 'epoch': 2}
{'type': 'loss', 'content': 0.08467938750982285, 'timestamp': '2025-10-02 00:40:15.417723', 'step': 16510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:15.472143', 'step': 16510, 'epoch': 2}
{'type': 'loss', 'content': 0.056288570165634155, 'timestamp': '2025-10-02 00:40:15.477772', 'step': 16511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:40:15.540466', 'step': 16511, 'epoch': 2}
{'type': 'loss', 'content': 0.011844360269606113, 'timestamp': '2025-10-02 00:40:15.552366', 'step': 16512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:15.607294', 'step': 16512, 'epoch': 2}
{'type': 'loss', 'content': 0.02463502623140812, 'timestamp': '2025-10-02 00:40:15.612870', 'step': 16513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:15.667440', 'step': 16513, 'epoch': 2}
{'type': 'loss', 'content': 0.02902040258049965, 'timestamp': '2025-10-02 00:40:15.670162', 'step': 16514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:15.724657', 'step': 16514, 'epoch': 2}
{'type': 'loss', 'content': 0.059957996010780334, 'timestamp': '2025-10-02 00:40:15.727470', 'step': 16515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:15.782263', 'step': 16515, 'epoch': 2}
{'type': 'loss', 'content': 0.023927142843604088, 'timestamp': '2025-10-02 00:40:15.792468', 'step': 16516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:15.850492', 'step': 16516, 'epoch': 2}
{'type': 'loss', 'content': 0.0314866378903389, 'timestamp': '2025-10-02 00:40:15.860106', 'step': 16517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:15.967884', 'step': 16517, 'epoch': 2}
{'type': 'loss', 'content': 0.05471033602952957, 'timestamp': '2025-10-02 00:40:16.001056', 'step': 16518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:16.097429', 'step': 16518, 'epoch': 2}
{'type': 'loss', 'content': 0.04126005992293358, 'timestamp': '2025-10-02 00:40:16.110040', 'step': 16519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:40:16.201069', 'step': 16519, 'epoch': 2}
{'type': 'loss', 'content': 0.018767908215522766, 'timestamp': '2025-10-02 00:40:16.214479', 'step': 16520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:16.317922', 'step': 16520, 'epoch': 2}
{'type': 'loss', 'content': 0.052539657801389694, 'timestamp': '2025-10-02 00:40:16.326483', 'step': 16521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:16.404516', 'step': 16521, 'epoch': 2}
{'type': 'loss', 'content': 0.022248443216085434, 'timestamp': '2025-10-02 00:40:16.409915', 'step': 16522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:16.491043', 'step': 16522, 'epoch': 2}
{'type': 'loss', 'content': 0.02773774415254593, 'timestamp': '2025-10-02 00:40:16.504137', 'step': 16523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:16.590819', 'step': 16523, 'epoch': 2}
{'type': 'loss', 'content': 0.0334482304751873, 'timestamp': '2025-10-02 00:40:16.603284', 'step': 16524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:16.695859', 'step': 16524, 'epoch': 2}
{'type': 'loss', 'content': 0.043918024748563766, 'timestamp': '2025-10-02 00:40:16.721113', 'step': 16525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:16.795857', 'step': 16525, 'epoch': 2}
{'type': 'loss', 'content': 0.04575210064649582, 'timestamp': '2025-10-02 00:40:16.808147', 'step': 16526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:16.896578', 'step': 16526, 'epoch': 2}
{'type': 'loss', 'content': 0.031577177345752716, 'timestamp': '2025-10-02 00:40:16.909344', 'step': 16527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:17.029734', 'step': 16527, 'epoch': 2}
{'type': 'loss', 'content': 0.018310468643903732, 'timestamp': '2025-10-02 00:40:17.040295', 'step': 16528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:17.134143', 'step': 16528, 'epoch': 2}
{'type': 'loss', 'content': 0.032996516674757004, 'timestamp': '2025-10-02 00:40:17.149793', 'step': 16529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:40:17.245435', 'step': 16529, 'epoch': 2}
{'type': 'loss', 'content': 0.021266017109155655, 'timestamp': '2025-10-02 00:40:17.258091', 'step': 16530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:17.312703', 'step': 16530, 'epoch': 2}
{'type': 'loss', 'content': 0.1722642183303833, 'timestamp': '2025-10-02 00:40:17.315555', 'step': 16531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:17.371621', 'step': 16531, 'epoch': 2}
{'type': 'loss', 'content': 0.04787971451878548, 'timestamp': '2025-10-02 00:40:17.378346', 'step': 16532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:17.433259', 'step': 16532, 'epoch': 2}
{'type': 'loss', 'content': 0.014803005382418633, 'timestamp': '2025-10-02 00:40:17.442678', 'step': 16533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:17.497019', 'step': 16533, 'epoch': 2}
{'type': 'loss', 'content': 0.02881973795592785, 'timestamp': '2025-10-02 00:40:17.501153', 'step': 16534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:17.557311', 'step': 16534, 'epoch': 2}
{'type': 'loss', 'content': 0.028240328654646873, 'timestamp': '2025-10-02 00:40:17.564521', 'step': 16535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:17.619459', 'step': 16535, 'epoch': 2}
{'type': 'loss', 'content': 0.09631269425153732, 'timestamp': '2025-10-02 00:40:17.625603', 'step': 16536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:17.680071', 'step': 16536, 'epoch': 2}
{'type': 'loss', 'content': 0.01548543106764555, 'timestamp': '2025-10-02 00:40:17.682659', 'step': 16537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:17.737857', 'step': 16537, 'epoch': 2}
{'type': 'loss', 'content': 0.08579841256141663, 'timestamp': '2025-10-02 00:40:17.740842', 'step': 16538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:17.795860', 'step': 16538, 'epoch': 2}
{'type': 'loss', 'content': 0.07193709909915924, 'timestamp': '2025-10-02 00:40:17.801538', 'step': 16539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:17.857137', 'step': 16539, 'epoch': 2}
{'type': 'loss', 'content': 0.08166715502738953, 'timestamp': '2025-10-02 00:40:17.864570', 'step': 16540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:17.921203', 'step': 16540, 'epoch': 2}
{'type': 'loss', 'content': 0.08931528776884079, 'timestamp': '2025-10-02 00:40:17.924467', 'step': 16541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:17.979654', 'step': 16541, 'epoch': 2}
{'type': 'loss', 'content': 0.026952708140015602, 'timestamp': '2025-10-02 00:40:17.989194', 'step': 16542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:18.058142', 'step': 16542, 'epoch': 2}
{'type': 'loss', 'content': 0.033240024000406265, 'timestamp': '2025-10-02 00:40:18.067939', 'step': 16543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:18.124102', 'step': 16543, 'epoch': 2}
{'type': 'loss', 'content': 0.07632019370794296, 'timestamp': '2025-10-02 00:40:18.130640', 'step': 16544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:18.185613', 'step': 16544, 'epoch': 2}
{'type': 'loss', 'content': 0.09216585010290146, 'timestamp': '2025-10-02 00:40:18.188865', 'step': 16545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:18.246616', 'step': 16545, 'epoch': 2}
{'type': 'loss', 'content': 0.0572463758289814, 'timestamp': '2025-10-02 00:40:18.249720', 'step': 16546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:18.305911', 'step': 16546, 'epoch': 2}
{'type': 'loss', 'content': 0.07425422221422195, 'timestamp': '2025-10-02 00:40:18.309855', 'step': 16547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:18.367880', 'step': 16547, 'epoch': 2}
{'type': 'loss', 'content': 0.10870819538831711, 'timestamp': '2025-10-02 00:40:18.375132', 'step': 16548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:18.431106', 'step': 16548, 'epoch': 2}
{'type': 'loss', 'content': 0.0791827067732811, 'timestamp': '2025-10-02 00:40:18.440615', 'step': 16549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:18.499082', 'step': 16549, 'epoch': 2}
{'type': 'loss', 'content': 0.024792853742837906, 'timestamp': '2025-10-02 00:40:18.505154', 'step': 16550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:18.562012', 'step': 16550, 'epoch': 2}
{'type': 'loss', 'content': 0.03450533747673035, 'timestamp': '2025-10-02 00:40:18.567918', 'step': 16551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:18.629999', 'step': 16551, 'epoch': 2}
{'type': 'loss', 'content': 0.019259031862020493, 'timestamp': '2025-10-02 00:40:18.641086', 'step': 16552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:18.696259', 'step': 16552, 'epoch': 2}
{'type': 'loss', 'content': 0.03934291750192642, 'timestamp': '2025-10-02 00:40:18.699512', 'step': 16553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:18.755432', 'step': 16553, 'epoch': 2}
{'type': 'loss', 'content': 0.05881519988179207, 'timestamp': '2025-10-02 00:40:18.758712', 'step': 16554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:18.816772', 'step': 16554, 'epoch': 2}
{'type': 'loss', 'content': 0.06420941650867462, 'timestamp': '2025-10-02 00:40:18.820046', 'step': 16555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:40:18.877000', 'step': 16555, 'epoch': 2}
{'type': 'loss', 'content': 0.06825653463602066, 'timestamp': '2025-10-02 00:40:18.883914', 'step': 16556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:18.939119', 'step': 16556, 'epoch': 2}
{'type': 'loss', 'content': 0.08982788026332855, 'timestamp': '2025-10-02 00:40:18.942425', 'step': 16557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:19.003893', 'step': 16557, 'epoch': 2}
{'type': 'loss', 'content': 0.06821619719266891, 'timestamp': '2025-10-02 00:40:19.007737', 'step': 16558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:19.064180', 'step': 16558, 'epoch': 2}
{'type': 'loss', 'content': 0.03197864070534706, 'timestamp': '2025-10-02 00:40:19.070125', 'step': 16559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:40:19.133910', 'step': 16559, 'epoch': 2}
{'type': 'loss', 'content': 0.02218460477888584, 'timestamp': '2025-10-02 00:40:19.145603', 'step': 16560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:40:19.209627', 'step': 16560, 'epoch': 2}
{'type': 'loss', 'content': 0.010181395336985588, 'timestamp': '2025-10-02 00:40:19.221646', 'step': 16561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:19.277423', 'step': 16561, 'epoch': 2}
{'type': 'loss', 'content': 0.07224324345588684, 'timestamp': '2025-10-02 00:40:19.282020', 'step': 16562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:19.338848', 'step': 16562, 'epoch': 2}
{'type': 'loss', 'content': 0.09507844597101212, 'timestamp': '2025-10-02 00:40:19.341343', 'step': 16563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:19.398080', 'step': 16563, 'epoch': 2}
{'type': 'loss', 'content': 0.09546951949596405, 'timestamp': '2025-10-02 00:40:19.405098', 'step': 16564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:19.462240', 'step': 16564, 'epoch': 2}
{'type': 'loss', 'content': 0.0310651957988739, 'timestamp': '2025-10-02 00:40:19.471748', 'step': 16565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:19.528376', 'step': 16565, 'epoch': 2}
{'type': 'loss', 'content': 0.11390753835439682, 'timestamp': '2025-10-02 00:40:19.531892', 'step': 16566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:40:19.587895', 'step': 16566, 'epoch': 2}
{'type': 'loss', 'content': 0.04626088589429855, 'timestamp': '2025-10-02 00:40:19.590488', 'step': 16567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:19.647352', 'step': 16567, 'epoch': 2}
{'type': 'loss', 'content': 0.11221092194318771, 'timestamp': '2025-10-02 00:40:19.654521', 'step': 16568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:19.708683', 'step': 16568, 'epoch': 2}
{'type': 'loss', 'content': 0.07569295167922974, 'timestamp': '2025-10-02 00:40:19.711398', 'step': 16569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:19.766936', 'step': 16569, 'epoch': 2}
{'type': 'loss', 'content': 0.08175739645957947, 'timestamp': '2025-10-02 00:40:19.769768', 'step': 16570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:19.827479', 'step': 16570, 'epoch': 2}
{'type': 'loss', 'content': 0.04259580746293068, 'timestamp': '2025-10-02 00:40:19.833350', 'step': 16571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:40:19.896502', 'step': 16571, 'epoch': 2}
{'type': 'loss', 'content': 0.042231228202581406, 'timestamp': '2025-10-02 00:40:19.908027', 'step': 16572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:19.962751', 'step': 16572, 'epoch': 2}
{'type': 'loss', 'content': 0.005023208446800709, 'timestamp': '2025-10-02 00:40:19.970318', 'step': 16573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:40:20.031894', 'step': 16573, 'epoch': 2}
{'type': 'loss', 'content': 0.020792769268155098, 'timestamp': '2025-10-02 00:40:20.042640', 'step': 16574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:20.097263', 'step': 16574, 'epoch': 2}
{'type': 'loss', 'content': 0.04387300834059715, 'timestamp': '2025-10-02 00:40:20.104611', 'step': 16575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:20.159525', 'step': 16575, 'epoch': 2}
{'type': 'loss', 'content': 0.04660699516534805, 'timestamp': '2025-10-02 00:40:20.167890', 'step': 16576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:20.222966', 'step': 16576, 'epoch': 2}
{'type': 'loss', 'content': 0.050653938204050064, 'timestamp': '2025-10-02 00:40:20.225619', 'step': 16577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:40:20.299872', 'step': 16577, 'epoch': 2}
{'type': 'loss', 'content': 0.0354330912232399, 'timestamp': '2025-10-02 00:40:20.313563', 'step': 16578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:40:20.375530', 'step': 16578, 'epoch': 2}
{'type': 'loss', 'content': 0.04023704305291176, 'timestamp': '2025-10-02 00:40:20.386204', 'step': 16579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:20.441256', 'step': 16579, 'epoch': 2}
{'type': 'loss', 'content': 0.03701880946755409, 'timestamp': '2025-10-02 00:40:20.447586', 'step': 16580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:20.501668', 'step': 16580, 'epoch': 2}
{'type': 'loss', 'content': 0.021812554448843002, 'timestamp': '2025-10-02 00:40:20.509231', 'step': 16581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:20.568713', 'step': 16581, 'epoch': 2}
{'type': 'loss', 'content': 0.039635639637708664, 'timestamp': '2025-10-02 00:40:20.579102', 'step': 16582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:20.634569', 'step': 16582, 'epoch': 2}
{'type': 'loss', 'content': 0.027936600148677826, 'timestamp': '2025-10-02 00:40:20.636906', 'step': 16583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:20.691616', 'step': 16583, 'epoch': 2}
{'type': 'loss', 'content': 0.0568154975771904, 'timestamp': '2025-10-02 00:40:20.697956', 'step': 16584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:20.752371', 'step': 16584, 'epoch': 2}
{'type': 'loss', 'content': 0.10833809524774551, 'timestamp': '2025-10-02 00:40:20.754803', 'step': 16585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:20.809270', 'step': 16585, 'epoch': 2}
{'type': 'loss', 'content': 0.016789548099040985, 'timestamp': '2025-10-02 00:40:20.812118', 'step': 16586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:20.869240', 'step': 16586, 'epoch': 2}
{'type': 'loss', 'content': 0.05571664497256279, 'timestamp': '2025-10-02 00:40:20.879013', 'step': 16587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:20.934089', 'step': 16587, 'epoch': 2}
{'type': 'loss', 'content': 0.012253005988895893, 'timestamp': '2025-10-02 00:40:20.944351', 'step': 16588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:20.999422', 'step': 16588, 'epoch': 2}
{'type': 'loss', 'content': 0.05427553877234459, 'timestamp': '2025-10-02 00:40:21.002282', 'step': 16589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:40:21.071195', 'step': 16589, 'epoch': 2}
{'type': 'loss', 'content': 0.025895310565829277, 'timestamp': '2025-10-02 00:40:21.083745', 'step': 16590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:21.140084', 'step': 16590, 'epoch': 2}
{'type': 'loss', 'content': 0.028259936720132828, 'timestamp': '2025-10-02 00:40:21.149890', 'step': 16591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:21.206275', 'step': 16591, 'epoch': 2}
{'type': 'loss', 'content': 0.03470952436327934, 'timestamp': '2025-10-02 00:40:21.212825', 'step': 16592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:21.266847', 'step': 16592, 'epoch': 2}
{'type': 'loss', 'content': 0.1984367072582245, 'timestamp': '2025-10-02 00:40:21.269730', 'step': 16593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:40:21.324862', 'step': 16593, 'epoch': 2}
{'type': 'loss', 'content': 0.04764103144407272, 'timestamp': '2025-10-02 00:40:21.327664', 'step': 16594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:21.382448', 'step': 16594, 'epoch': 2}
{'type': 'loss', 'content': 0.05745096877217293, 'timestamp': '2025-10-02 00:40:21.391987', 'step': 16595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:21.446574', 'step': 16595, 'epoch': 2}
{'type': 'loss', 'content': 0.14495337009429932, 'timestamp': '2025-10-02 00:40:21.452899', 'step': 16596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:21.507539', 'step': 16596, 'epoch': 2}
{'type': 'loss', 'content': 0.057240523397922516, 'timestamp': '2025-10-02 00:40:21.515055', 'step': 16597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:21.570196', 'step': 16597, 'epoch': 2}
{'type': 'loss', 'content': 0.07736843824386597, 'timestamp': '2025-10-02 00:40:21.575975', 'step': 16598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:21.631134', 'step': 16598, 'epoch': 2}
{'type': 'loss', 'content': 0.06888631731271744, 'timestamp': '2025-10-02 00:40:21.636989', 'step': 16599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:21.691639', 'step': 16599, 'epoch': 2}
{'type': 'loss', 'content': 0.09314002841711044, 'timestamp': '2025-10-02 00:40:21.698262', 'step': 16600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:21.751925', 'step': 16600, 'epoch': 2}
{'type': 'loss', 'content': 0.09708046913146973, 'timestamp': '2025-10-02 00:40:21.755001', 'step': 16601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:21.810353', 'step': 16601, 'epoch': 2}
{'type': 'loss', 'content': 0.021794511005282402, 'timestamp': '2025-10-02 00:40:21.817830', 'step': 16602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:21.873394', 'step': 16602, 'epoch': 2}
{'type': 'loss', 'content': 0.0821470394730568, 'timestamp': '2025-10-02 00:40:21.875864', 'step': 16603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:21.931256', 'step': 16603, 'epoch': 2}
{'type': 'loss', 'content': 0.031965453177690506, 'timestamp': '2025-10-02 00:40:21.939511', 'step': 16604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:22.007887', 'step': 16604, 'epoch': 2}
{'type': 'loss', 'content': 0.01986437663435936, 'timestamp': '2025-10-02 00:40:22.010713', 'step': 16605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:22.065665', 'step': 16605, 'epoch': 2}
{'type': 'loss', 'content': 0.19142253696918488, 'timestamp': '2025-10-02 00:40:22.068390', 'step': 16606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:22.124500', 'step': 16606, 'epoch': 2}
{'type': 'loss', 'content': 0.06587845832109451, 'timestamp': '2025-10-02 00:40:22.127590', 'step': 16607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:22.182247', 'step': 16607, 'epoch': 2}
{'type': 'loss', 'content': 0.005494245793670416, 'timestamp': '2025-10-02 00:40:22.192822', 'step': 16608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:22.246869', 'step': 16608, 'epoch': 2}
{'type': 'loss', 'content': 0.08231810480356216, 'timestamp': '2025-10-02 00:40:22.249620', 'step': 16609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:22.309367', 'step': 16609, 'epoch': 2}
{'type': 'loss', 'content': 0.03624908998608589, 'timestamp': '2025-10-02 00:40:22.319728', 'step': 16610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:22.378525', 'step': 16610, 'epoch': 2}
{'type': 'loss', 'content': 0.025531843304634094, 'timestamp': '2025-10-02 00:40:22.388303', 'step': 16611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:22.442925', 'step': 16611, 'epoch': 2}
{'type': 'loss', 'content': 0.05709027871489525, 'timestamp': '2025-10-02 00:40:22.449651', 'step': 16612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:22.504196', 'step': 16612, 'epoch': 2}
{'type': 'loss', 'content': 0.11132603138685226, 'timestamp': '2025-10-02 00:40:22.507640', 'step': 16613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:22.563206', 'step': 16613, 'epoch': 2}
{'type': 'loss', 'content': 0.1421494036912918, 'timestamp': '2025-10-02 00:40:22.565756', 'step': 16614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:22.621989', 'step': 16614, 'epoch': 2}
{'type': 'loss', 'content': 0.04833576828241348, 'timestamp': '2025-10-02 00:40:22.624850', 'step': 16615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:22.681088', 'step': 16615, 'epoch': 2}
{'type': 'loss', 'content': 0.17686577141284943, 'timestamp': '2025-10-02 00:40:22.687750', 'step': 16616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:22.743095', 'step': 16616, 'epoch': 2}
{'type': 'loss', 'content': 0.17692556977272034, 'timestamp': '2025-10-02 00:40:22.745959', 'step': 16617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:22.802360', 'step': 16617, 'epoch': 2}
{'type': 'loss', 'content': 0.06897618621587753, 'timestamp': '2025-10-02 00:40:22.805013', 'step': 16618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:22.859176', 'step': 16618, 'epoch': 2}
{'type': 'loss', 'content': 0.0230731088668108, 'timestamp': '2025-10-02 00:40:22.866698', 'step': 16619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:22.922485', 'step': 16619, 'epoch': 2}
{'type': 'loss', 'content': 0.040780555456876755, 'timestamp': '2025-10-02 00:40:22.928898', 'step': 16620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:22.987373', 'step': 16620, 'epoch': 2}
{'type': 'loss', 'content': 0.03665059059858322, 'timestamp': '2025-10-02 00:40:22.998571', 'step': 16621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:23.054489', 'step': 16621, 'epoch': 2}
{'type': 'loss', 'content': 0.05087826028466225, 'timestamp': '2025-10-02 00:40:23.057788', 'step': 16622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:23.116325', 'step': 16622, 'epoch': 2}
{'type': 'loss', 'content': 0.07296966016292572, 'timestamp': '2025-10-02 00:40:23.119257', 'step': 16623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:23.174369', 'step': 16623, 'epoch': 2}
{'type': 'loss', 'content': 0.13813620805740356, 'timestamp': '2025-10-02 00:40:23.180948', 'step': 16624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:23.234906', 'step': 16624, 'epoch': 2}
{'type': 'loss', 'content': 0.029680144041776657, 'timestamp': '2025-10-02 00:40:23.240761', 'step': 16625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:23.294800', 'step': 16625, 'epoch': 2}
{'type': 'loss', 'content': 0.05248040705919266, 'timestamp': '2025-10-02 00:40:23.304268', 'step': 16626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:23.359260', 'step': 16626, 'epoch': 2}
{'type': 'loss', 'content': 0.029382171109318733, 'timestamp': '2025-10-02 00:40:23.362175', 'step': 16627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:23.417508', 'step': 16627, 'epoch': 2}
{'type': 'loss', 'content': 0.0857371985912323, 'timestamp': '2025-10-02 00:40:23.424053', 'step': 16628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:23.477714', 'step': 16628, 'epoch': 2}
{'type': 'loss', 'content': 0.08449777215719223, 'timestamp': '2025-10-02 00:40:23.480761', 'step': 16629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:40:23.539262', 'step': 16629, 'epoch': 2}
{'type': 'loss', 'content': 0.19389377534389496, 'timestamp': '2025-10-02 00:40:23.541988', 'step': 16630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:23.596428', 'step': 16630, 'epoch': 2}
{'type': 'loss', 'content': 0.05255422368645668, 'timestamp': '2025-10-02 00:40:23.602293', 'step': 16631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:23.657391', 'step': 16631, 'epoch': 2}
{'type': 'loss', 'content': 0.08476567268371582, 'timestamp': '2025-10-02 00:40:23.664059', 'step': 16632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:23.717835', 'step': 16632, 'epoch': 2}
{'type': 'loss', 'content': 0.08879628032445908, 'timestamp': '2025-10-02 00:40:23.720535', 'step': 16633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:40:23.782452', 'step': 16633, 'epoch': 2}
{'type': 'loss', 'content': 0.035221587866544724, 'timestamp': '2025-10-02 00:40:23.793263', 'step': 16634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:23.848485', 'step': 16634, 'epoch': 2}
{'type': 'loss', 'content': 0.0936838909983635, 'timestamp': '2025-10-02 00:40:23.851229', 'step': 16635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:23.906795', 'step': 16635, 'epoch': 2}
{'type': 'loss', 'content': 0.04908662661910057, 'timestamp': '2025-10-02 00:40:23.917375', 'step': 16636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:23.971417', 'step': 16636, 'epoch': 2}
{'type': 'loss', 'content': 0.05126125365495682, 'timestamp': '2025-10-02 00:40:23.974149', 'step': 16637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:24.029501', 'step': 16637, 'epoch': 2}
{'type': 'loss', 'content': 0.03942589834332466, 'timestamp': '2025-10-02 00:40:24.039230', 'step': 16638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:24.093754', 'step': 16638, 'epoch': 2}
{'type': 'loss', 'content': 0.07809888571500778, 'timestamp': '2025-10-02 00:40:24.096392', 'step': 16639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:24.151367', 'step': 16639, 'epoch': 2}
{'type': 'loss', 'content': 0.12517349421977997, 'timestamp': '2025-10-02 00:40:24.161485', 'step': 16640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:24.215035', 'step': 16640, 'epoch': 2}
{'type': 'loss', 'content': 0.08944229036569595, 'timestamp': '2025-10-02 00:40:24.225484', 'step': 16641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:24.280862', 'step': 16641, 'epoch': 2}
{'type': 'loss', 'content': 0.08225121349096298, 'timestamp': '2025-10-02 00:40:24.283562', 'step': 16642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:24.338198', 'step': 16642, 'epoch': 2}
{'type': 'loss', 'content': 0.11033166199922562, 'timestamp': '2025-10-02 00:40:24.341113', 'step': 16643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:24.396771', 'step': 16643, 'epoch': 2}
{'type': 'loss', 'content': 0.027688562870025635, 'timestamp': '2025-10-02 00:40:24.403001', 'step': 16644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:24.456637', 'step': 16644, 'epoch': 2}
{'type': 'loss', 'content': 0.09187392145395279, 'timestamp': '2025-10-02 00:40:24.459915', 'step': 16645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:24.517397', 'step': 16645, 'epoch': 2}
{'type': 'loss', 'content': 0.041170161217451096, 'timestamp': '2025-10-02 00:40:24.524927', 'step': 16646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:24.580061', 'step': 16646, 'epoch': 2}
{'type': 'loss', 'content': 0.06648261100053787, 'timestamp': '2025-10-02 00:40:24.587339', 'step': 16647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:24.642487', 'step': 16647, 'epoch': 2}
{'type': 'loss', 'content': 0.2022121399641037, 'timestamp': '2025-10-02 00:40:24.648679', 'step': 16648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:24.703680', 'step': 16648, 'epoch': 2}
{'type': 'loss', 'content': 0.08967308700084686, 'timestamp': '2025-10-02 00:40:24.706628', 'step': 16649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:24.760863', 'step': 16649, 'epoch': 2}
{'type': 'loss', 'content': 0.14385077357292175, 'timestamp': '2025-10-02 00:40:24.764052', 'step': 16650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:24.818621', 'step': 16650, 'epoch': 2}
{'type': 'loss', 'content': 0.09857113659381866, 'timestamp': '2025-10-02 00:40:24.821643', 'step': 16651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:24.877046', 'step': 16651, 'epoch': 2}
{'type': 'loss', 'content': 0.20036324858665466, 'timestamp': '2025-10-02 00:40:24.883438', 'step': 16652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:24.937641', 'step': 16652, 'epoch': 2}
{'type': 'loss', 'content': 0.06507455557584763, 'timestamp': '2025-10-02 00:40:24.940457', 'step': 16653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:24.994623', 'step': 16653, 'epoch': 2}
{'type': 'loss', 'content': 0.02129017375409603, 'timestamp': '2025-10-02 00:40:25.000623', 'step': 16654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:25.056366', 'step': 16654, 'epoch': 2}
{'type': 'loss', 'content': 0.05771329998970032, 'timestamp': '2025-10-02 00:40:25.058787', 'step': 16655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:25.113048', 'step': 16655, 'epoch': 2}
{'type': 'loss', 'content': 0.1011144369840622, 'timestamp': '2025-10-02 00:40:25.119909', 'step': 16656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:25.173920', 'step': 16656, 'epoch': 2}
{'type': 'loss', 'content': 0.04815579950809479, 'timestamp': '2025-10-02 00:40:25.176435', 'step': 16657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:25.230720', 'step': 16657, 'epoch': 2}
{'type': 'loss', 'content': 0.028213050216436386, 'timestamp': '2025-10-02 00:40:25.234081', 'step': 16658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:25.289541', 'step': 16658, 'epoch': 2}
{'type': 'loss', 'content': 0.10796931385993958, 'timestamp': '2025-10-02 00:40:25.292362', 'step': 16659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:25.348942', 'step': 16659, 'epoch': 2}
{'type': 'loss', 'content': 0.06087946891784668, 'timestamp': '2025-10-02 00:40:25.355702', 'step': 16660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:25.411480', 'step': 16660, 'epoch': 2}
{'type': 'loss', 'content': 0.09293371438980103, 'timestamp': '2025-10-02 00:40:25.414872', 'step': 16661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:25.471201', 'step': 16661, 'epoch': 2}
{'type': 'loss', 'content': 0.034685611724853516, 'timestamp': '2025-10-02 00:40:25.480965', 'step': 16662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:25.536098', 'step': 16662, 'epoch': 2}
{'type': 'loss', 'content': 0.12902972102165222, 'timestamp': '2025-10-02 00:40:25.538706', 'step': 16663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:25.593132', 'step': 16663, 'epoch': 2}
{'type': 'loss', 'content': 0.048252545297145844, 'timestamp': '2025-10-02 00:40:25.599609', 'step': 16664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:25.653371', 'step': 16664, 'epoch': 2}
{'type': 'loss', 'content': 0.01153602171689272, 'timestamp': '2025-10-02 00:40:25.656076', 'step': 16665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:25.710484', 'step': 16665, 'epoch': 2}
{'type': 'loss', 'content': 0.039914652705192566, 'timestamp': '2025-10-02 00:40:25.716402', 'step': 16666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:40:25.787183', 'step': 16666, 'epoch': 2}
{'type': 'loss', 'content': 0.04840534180402756, 'timestamp': '2025-10-02 00:40:25.800027', 'step': 16667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:25.855257', 'step': 16667, 'epoch': 2}
{'type': 'loss', 'content': 0.06248171627521515, 'timestamp': '2025-10-02 00:40:25.861552', 'step': 16668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:25.915258', 'step': 16668, 'epoch': 2}
{'type': 'loss', 'content': 0.017878375947475433, 'timestamp': '2025-10-02 00:40:25.918483', 'step': 16669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:25.974121', 'step': 16669, 'epoch': 2}
{'type': 'loss', 'content': 0.04951462522149086, 'timestamp': '2025-10-02 00:40:25.983933', 'step': 16670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:40:26.046021', 'step': 16670, 'epoch': 2}
{'type': 'loss', 'content': 0.030350474640727043, 'timestamp': '2025-10-02 00:40:26.056724', 'step': 16671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:26.111645', 'step': 16671, 'epoch': 2}
{'type': 'loss', 'content': 0.032851506024599075, 'timestamp': '2025-10-02 00:40:26.118267', 'step': 16672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:26.172416', 'step': 16672, 'epoch': 2}
{'type': 'loss', 'content': 0.05057361349463463, 'timestamp': '2025-10-02 00:40:26.175245', 'step': 16673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:26.230491', 'step': 16673, 'epoch': 2}
{'type': 'loss', 'content': 0.04801042005419731, 'timestamp': '2025-10-02 00:40:26.233302', 'step': 16674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:26.292273', 'step': 16674, 'epoch': 2}
{'type': 'loss', 'content': 0.014763635583221912, 'timestamp': '2025-10-02 00:40:26.302638', 'step': 16675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:26.358695', 'step': 16675, 'epoch': 2}
{'type': 'loss', 'content': 0.037108298391103745, 'timestamp': '2025-10-02 00:40:26.365436', 'step': 16676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:26.419085', 'step': 16676, 'epoch': 2}
{'type': 'loss', 'content': 0.10993898659944534, 'timestamp': '2025-10-02 00:40:26.421949', 'step': 16677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:26.476864', 'step': 16677, 'epoch': 2}
{'type': 'loss', 'content': 0.043069105595350266, 'timestamp': '2025-10-02 00:40:26.479820', 'step': 16678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:26.534991', 'step': 16678, 'epoch': 2}
{'type': 'loss', 'content': 0.051167432218790054, 'timestamp': '2025-10-02 00:40:26.537957', 'step': 16679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:26.592354', 'step': 16679, 'epoch': 2}
{'type': 'loss', 'content': 0.17157569527626038, 'timestamp': '2025-10-02 00:40:26.598439', 'step': 16680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:26.653900', 'step': 16680, 'epoch': 2}
{'type': 'loss', 'content': 0.040370624512434006, 'timestamp': '2025-10-02 00:40:26.659583', 'step': 16681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:26.714504', 'step': 16681, 'epoch': 2}
{'type': 'loss', 'content': 0.07371018826961517, 'timestamp': '2025-10-02 00:40:26.721822', 'step': 16682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:26.778887', 'step': 16682, 'epoch': 2}
{'type': 'loss', 'content': 0.025698291137814522, 'timestamp': '2025-10-02 00:40:26.788273', 'step': 16683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:26.843986', 'step': 16683, 'epoch': 2}
{'type': 'loss', 'content': 0.04783160984516144, 'timestamp': '2025-10-02 00:40:26.851099', 'step': 16684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:26.906704', 'step': 16684, 'epoch': 2}
{'type': 'loss', 'content': 0.09102104604244232, 'timestamp': '2025-10-02 00:40:26.910663', 'step': 16685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:26.967278', 'step': 16685, 'epoch': 2}
{'type': 'loss', 'content': 0.060278262943029404, 'timestamp': '2025-10-02 00:40:26.971627', 'step': 16686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:27.031753', 'step': 16686, 'epoch': 2}
{'type': 'loss', 'content': 0.07947059720754623, 'timestamp': '2025-10-02 00:40:27.036488', 'step': 16687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:27.091058', 'step': 16687, 'epoch': 2}
{'type': 'loss', 'content': 0.06542954593896866, 'timestamp': '2025-10-02 00:40:27.097455', 'step': 16688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:27.153671', 'step': 16688, 'epoch': 2}
{'type': 'loss', 'content': 0.06930653750896454, 'timestamp': '2025-10-02 00:40:27.156450', 'step': 16689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:27.211615', 'step': 16689, 'epoch': 2}
{'type': 'loss', 'content': 0.05661923810839653, 'timestamp': '2025-10-02 00:40:27.214303', 'step': 16690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:27.269685', 'step': 16690, 'epoch': 2}
{'type': 'loss', 'content': 0.07727431505918503, 'timestamp': '2025-10-02 00:40:27.276909', 'step': 16691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:27.332866', 'step': 16691, 'epoch': 2}
{'type': 'loss', 'content': 0.0773235633969307, 'timestamp': '2025-10-02 00:40:27.339246', 'step': 16692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:27.393471', 'step': 16692, 'epoch': 2}
{'type': 'loss', 'content': 0.0366855226457119, 'timestamp': '2025-10-02 00:40:27.396368', 'step': 16693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:27.452367', 'step': 16693, 'epoch': 2}
{'type': 'loss', 'content': 0.049433812499046326, 'timestamp': '2025-10-02 00:40:27.455266', 'step': 16694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:27.513863', 'step': 16694, 'epoch': 2}
{'type': 'loss', 'content': 0.13773567974567413, 'timestamp': '2025-10-02 00:40:27.516818', 'step': 16695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:27.572627', 'step': 16695, 'epoch': 2}
{'type': 'loss', 'content': 0.06213373318314552, 'timestamp': '2025-10-02 00:40:27.579069', 'step': 16696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:27.633686', 'step': 16696, 'epoch': 2}
{'type': 'loss', 'content': 0.05639878287911415, 'timestamp': '2025-10-02 00:40:27.643202', 'step': 16697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:27.698506', 'step': 16697, 'epoch': 2}
{'type': 'loss', 'content': 0.09720019996166229, 'timestamp': '2025-10-02 00:40:27.701157', 'step': 16698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:27.757211', 'step': 16698, 'epoch': 2}
{'type': 'loss', 'content': 0.033292047679424286, 'timestamp': '2025-10-02 00:40:27.761230', 'step': 16699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:27.820782', 'step': 16699, 'epoch': 2}
{'type': 'loss', 'content': 0.13829900324344635, 'timestamp': '2025-10-02 00:40:27.831346', 'step': 16700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:27.889559', 'step': 16700, 'epoch': 2}
{'type': 'loss', 'content': 0.12808845937252045, 'timestamp': '2025-10-02 00:40:27.894749', 'step': 16701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:27.954628', 'step': 16701, 'epoch': 2}
{'type': 'loss', 'content': 0.004618468694388866, 'timestamp': '2025-10-02 00:40:27.964207', 'step': 16702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:40:28.028462', 'step': 16702, 'epoch': 2}
{'type': 'loss', 'content': 0.009977497160434723, 'timestamp': '2025-10-02 00:40:28.039362', 'step': 16703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:28.097167', 'step': 16703, 'epoch': 2}
{'type': 'loss', 'content': 0.04739074781537056, 'timestamp': '2025-10-02 00:40:28.107758', 'step': 16704, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:40:54.975955', 'step': 16704, 'epoch': 2}
{'type': 'pplx', 'content': 92.94354554586812, 'timestamp': '2025-10-02 00:40:54.980718', 'step': 16704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:55.038041', 'step': 16704, 'epoch': 2}
{'type': 'loss', 'content': 0.13274893164634705, 'timestamp': '2025-10-02 00:40:55.041231', 'step': 16705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:55.101111', 'step': 16705, 'epoch': 2}
{'type': 'loss', 'content': 0.0668463408946991, 'timestamp': '2025-10-02 00:40:55.103564', 'step': 16706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:55.164796', 'step': 16706, 'epoch': 2}
{'type': 'loss', 'content': 0.07918045669794083, 'timestamp': '2025-10-02 00:40:55.167836', 'step': 16707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:55.226106', 'step': 16707, 'epoch': 2}
{'type': 'loss', 'content': 0.09961327910423279, 'timestamp': '2025-10-02 00:40:55.233355', 'step': 16708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:55.309119', 'step': 16708, 'epoch': 2}
{'type': 'loss', 'content': 0.17242954671382904, 'timestamp': '2025-10-02 00:40:55.319613', 'step': 16709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:55.386258', 'step': 16709, 'epoch': 2}
{'type': 'loss', 'content': 0.011137070134282112, 'timestamp': '2025-10-02 00:40:55.390592', 'step': 16710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:40:55.449382', 'step': 16710, 'epoch': 2}
{'type': 'loss', 'content': 0.045213956385850906, 'timestamp': '2025-10-02 00:40:55.451669', 'step': 16711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:55.519260', 'step': 16711, 'epoch': 2}
{'type': 'loss', 'content': 0.10845718532800674, 'timestamp': '2025-10-02 00:40:55.526675', 'step': 16712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:55.591596', 'step': 16712, 'epoch': 2}
{'type': 'loss', 'content': 0.010966256260871887, 'timestamp': '2025-10-02 00:40:55.599234', 'step': 16713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:55.657613', 'step': 16713, 'epoch': 2}
{'type': 'loss', 'content': 0.07230353355407715, 'timestamp': '2025-10-02 00:40:55.666721', 'step': 16714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:55.737911', 'step': 16714, 'epoch': 2}
{'type': 'loss', 'content': 0.09731428325176239, 'timestamp': '2025-10-02 00:40:55.741742', 'step': 16715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:55.798661', 'step': 16715, 'epoch': 2}
{'type': 'loss', 'content': 0.03898964449763298, 'timestamp': '2025-10-02 00:40:55.807069', 'step': 16716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:55.875392', 'step': 16716, 'epoch': 2}
{'type': 'loss', 'content': 0.056283362209796906, 'timestamp': '2025-10-02 00:40:55.885186', 'step': 16717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:55.961817', 'step': 16717, 'epoch': 2}
{'type': 'loss', 'content': 0.12136447429656982, 'timestamp': '2025-10-02 00:40:55.967972', 'step': 16718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:56.039923', 'step': 16718, 'epoch': 2}
{'type': 'loss', 'content': 0.03530276566743851, 'timestamp': '2025-10-02 00:40:56.044228', 'step': 16719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:56.112621', 'step': 16719, 'epoch': 2}
{'type': 'loss', 'content': 0.04366455227136612, 'timestamp': '2025-10-02 00:40:56.125075', 'step': 16720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:56.188232', 'step': 16720, 'epoch': 2}
{'type': 'loss', 'content': 0.06913360953330994, 'timestamp': '2025-10-02 00:40:56.199216', 'step': 16721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:56.261299', 'step': 16721, 'epoch': 2}
{'type': 'loss', 'content': 0.15555442869663239, 'timestamp': '2025-10-02 00:40:56.264954', 'step': 16722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:56.332252', 'step': 16722, 'epoch': 2}
{'type': 'loss', 'content': 0.05354899913072586, 'timestamp': '2025-10-02 00:40:56.336156', 'step': 16723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:56.400843', 'step': 16723, 'epoch': 2}
{'type': 'loss', 'content': 0.02201586775481701, 'timestamp': '2025-10-02 00:40:56.408041', 'step': 16724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:40:56.485654', 'step': 16724, 'epoch': 2}
{'type': 'loss', 'content': 0.04889194294810295, 'timestamp': '2025-10-02 00:40:56.497426', 'step': 16725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:56.561709', 'step': 16725, 'epoch': 2}
{'type': 'loss', 'content': 0.0378507561981678, 'timestamp': '2025-10-02 00:40:56.569323', 'step': 16726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:56.627418', 'step': 16726, 'epoch': 2}
{'type': 'loss', 'content': 0.10769067704677582, 'timestamp': '2025-10-02 00:40:56.630866', 'step': 16727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:56.694694', 'step': 16727, 'epoch': 2}
{'type': 'loss', 'content': 0.07156795263290405, 'timestamp': '2025-10-02 00:40:56.702162', 'step': 16728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:56.770815', 'step': 16728, 'epoch': 2}
{'type': 'loss', 'content': 0.06064746156334877, 'timestamp': '2025-10-02 00:40:56.778492', 'step': 16729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:40:56.836171', 'step': 16729, 'epoch': 2}
{'type': 'loss', 'content': 0.08912624418735504, 'timestamp': '2025-10-02 00:40:56.840547', 'step': 16730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:56.898464', 'step': 16730, 'epoch': 2}
{'type': 'loss', 'content': 0.10385166108608246, 'timestamp': '2025-10-02 00:40:56.903118', 'step': 16731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:56.965988', 'step': 16731, 'epoch': 2}
{'type': 'loss', 'content': 0.12949855625629425, 'timestamp': '2025-10-02 00:40:56.973667', 'step': 16732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:57.038211', 'step': 16732, 'epoch': 2}
{'type': 'loss', 'content': 0.07984005659818649, 'timestamp': '2025-10-02 00:40:57.048424', 'step': 16733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:40:57.108984', 'step': 16733, 'epoch': 2}
{'type': 'loss', 'content': 0.030870346352458, 'timestamp': '2025-10-02 00:40:57.112538', 'step': 16734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:40:57.176532', 'step': 16734, 'epoch': 2}
{'type': 'loss', 'content': 0.09907203167676926, 'timestamp': '2025-10-02 00:40:57.181578', 'step': 16735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:57.246070', 'step': 16735, 'epoch': 2}
{'type': 'loss', 'content': 0.0913640484213829, 'timestamp': '2025-10-02 00:40:57.256307', 'step': 16736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:40:57.328386', 'step': 16736, 'epoch': 2}
{'type': 'loss', 'content': 0.11731275916099548, 'timestamp': '2025-10-02 00:40:57.333675', 'step': 16737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:57.406346', 'step': 16737, 'epoch': 2}
{'type': 'loss', 'content': 0.06998596340417862, 'timestamp': '2025-10-02 00:40:57.412399', 'step': 16738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:57.481343', 'step': 16738, 'epoch': 2}
{'type': 'loss', 'content': 0.06509032845497131, 'timestamp': '2025-10-02 00:40:57.489494', 'step': 16739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:57.555669', 'step': 16739, 'epoch': 2}
{'type': 'loss', 'content': 0.02927667461335659, 'timestamp': '2025-10-02 00:40:57.565685', 'step': 16740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:57.626109', 'step': 16740, 'epoch': 2}
{'type': 'loss', 'content': 0.12831778824329376, 'timestamp': '2025-10-02 00:40:57.629369', 'step': 16741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:40:57.698044', 'step': 16741, 'epoch': 2}
{'type': 'loss', 'content': 0.1254211664199829, 'timestamp': '2025-10-02 00:40:57.703248', 'step': 16742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:57.768513', 'step': 16742, 'epoch': 2}
{'type': 'loss', 'content': 0.20169317722320557, 'timestamp': '2025-10-02 00:40:57.772557', 'step': 16743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:40:57.838417', 'step': 16743, 'epoch': 2}
{'type': 'loss', 'content': 0.04237860441207886, 'timestamp': '2025-10-02 00:40:57.849830', 'step': 16744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:40:57.909122', 'step': 16744, 'epoch': 2}
{'type': 'loss', 'content': 0.039886731654405594, 'timestamp': '2025-10-02 00:40:57.918620', 'step': 16745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:57.983624', 'step': 16745, 'epoch': 2}
{'type': 'loss', 'content': 0.08871342986822128, 'timestamp': '2025-10-02 00:40:57.988385', 'step': 16746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:58.056239', 'step': 16746, 'epoch': 2}
{'type': 'loss', 'content': 0.02034246176481247, 'timestamp': '2025-10-02 00:40:58.061470', 'step': 16747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:58.135951', 'step': 16747, 'epoch': 2}
{'type': 'loss', 'content': 0.008846358396112919, 'timestamp': '2025-10-02 00:40:58.149394', 'step': 16748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:58.208024', 'step': 16748, 'epoch': 2}
{'type': 'loss', 'content': 0.06781872361898422, 'timestamp': '2025-10-02 00:40:58.215022', 'step': 16749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:40:58.286344', 'step': 16749, 'epoch': 2}
{'type': 'loss', 'content': 0.038073863834142685, 'timestamp': '2025-10-02 00:40:58.290294', 'step': 16750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:58.353456', 'step': 16750, 'epoch': 2}
{'type': 'loss', 'content': 0.03994875028729439, 'timestamp': '2025-10-02 00:40:58.361014', 'step': 16751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:58.419226', 'step': 16751, 'epoch': 2}
{'type': 'loss', 'content': 0.0076009719632565975, 'timestamp': '2025-10-02 00:40:58.428645', 'step': 16752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:40:58.486073', 'step': 16752, 'epoch': 2}
{'type': 'loss', 'content': 0.12603482604026794, 'timestamp': '2025-10-02 00:40:58.497260', 'step': 16753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:40:58.563829', 'step': 16753, 'epoch': 2}
{'type': 'loss', 'content': 0.09311867505311966, 'timestamp': '2025-10-02 00:40:58.569845', 'step': 16754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:58.627389', 'step': 16754, 'epoch': 2}
{'type': 'loss', 'content': 0.08359616994857788, 'timestamp': '2025-10-02 00:40:58.633911', 'step': 16755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:58.700480', 'step': 16755, 'epoch': 2}
{'type': 'loss', 'content': 0.08525096625089645, 'timestamp': '2025-10-02 00:40:58.707548', 'step': 16756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:58.768651', 'step': 16756, 'epoch': 2}
{'type': 'loss', 'content': 0.09743645042181015, 'timestamp': '2025-10-02 00:40:58.772692', 'step': 16757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:58.834808', 'step': 16757, 'epoch': 2}
{'type': 'loss', 'content': 0.04093007743358612, 'timestamp': '2025-10-02 00:40:58.844971', 'step': 16758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:58.908618', 'step': 16758, 'epoch': 2}
{'type': 'loss', 'content': 0.14304809272289276, 'timestamp': '2025-10-02 00:40:58.911865', 'step': 16759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:40:58.976292', 'step': 16759, 'epoch': 2}
{'type': 'loss', 'content': 0.024361364543437958, 'timestamp': '2025-10-02 00:40:58.984684', 'step': 16760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:59.044251', 'step': 16760, 'epoch': 2}
{'type': 'loss', 'content': 0.03203969821333885, 'timestamp': '2025-10-02 00:40:59.054512', 'step': 16761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:59.112301', 'step': 16761, 'epoch': 2}
{'type': 'loss', 'content': 0.15181122720241547, 'timestamp': '2025-10-02 00:40:59.115620', 'step': 16762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:59.177184', 'step': 16762, 'epoch': 2}
{'type': 'loss', 'content': 0.10437685251235962, 'timestamp': '2025-10-02 00:40:59.179782', 'step': 16763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:40:59.253515', 'step': 16763, 'epoch': 2}
{'type': 'loss', 'content': 0.05359739810228348, 'timestamp': '2025-10-02 00:40:59.265545', 'step': 16764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:59.321114', 'step': 16764, 'epoch': 2}
{'type': 'loss', 'content': 0.09443105012178421, 'timestamp': '2025-10-02 00:40:59.325227', 'step': 16765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:40:59.380198', 'step': 16765, 'epoch': 2}
{'type': 'loss', 'content': 0.10980802029371262, 'timestamp': '2025-10-02 00:40:59.383599', 'step': 16766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:40:59.447136', 'step': 16766, 'epoch': 2}
{'type': 'loss', 'content': 0.03142409026622772, 'timestamp': '2025-10-02 00:40:59.456641', 'step': 16767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:40:59.520424', 'step': 16767, 'epoch': 2}
{'type': 'loss', 'content': 0.04922117665410042, 'timestamp': '2025-10-02 00:40:59.527103', 'step': 16768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:40:59.584049', 'step': 16768, 'epoch': 2}
{'type': 'loss', 'content': 0.022798461839556694, 'timestamp': '2025-10-02 00:40:59.587548', 'step': 16769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:40:59.644509', 'step': 16769, 'epoch': 2}
{'type': 'loss', 'content': 0.08293543756008148, 'timestamp': '2025-10-02 00:40:59.654265', 'step': 16770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:59.731262', 'step': 16770, 'epoch': 2}
{'type': 'loss', 'content': 0.08902059495449066, 'timestamp': '2025-10-02 00:40:59.741831', 'step': 16771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:59.816708', 'step': 16771, 'epoch': 2}
{'type': 'loss', 'content': 0.06684795767068863, 'timestamp': '2025-10-02 00:40:59.824656', 'step': 16772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:40:59.893996', 'step': 16772, 'epoch': 2}
{'type': 'loss', 'content': 0.10339733958244324, 'timestamp': '2025-10-02 00:40:59.906030', 'step': 16773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:40:59.972613', 'step': 16773, 'epoch': 2}
{'type': 'loss', 'content': 0.06485583633184433, 'timestamp': '2025-10-02 00:40:59.979477', 'step': 16774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:00.056100', 'step': 16774, 'epoch': 2}
{'type': 'loss', 'content': 0.009535974822938442, 'timestamp': '2025-10-02 00:41:00.064934', 'step': 16775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:00.133123', 'step': 16775, 'epoch': 2}
{'type': 'loss', 'content': 0.16778597235679626, 'timestamp': '2025-10-02 00:41:00.141897', 'step': 16776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:00.210453', 'step': 16776, 'epoch': 2}
{'type': 'loss', 'content': 0.017856718972325325, 'timestamp': '2025-10-02 00:41:00.220005', 'step': 16777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:00.299361', 'step': 16777, 'epoch': 2}
{'type': 'loss', 'content': 0.03679928928613663, 'timestamp': '2025-10-02 00:41:00.306785', 'step': 16778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:00.380246', 'step': 16778, 'epoch': 2}
{'type': 'loss', 'content': 0.06443069875240326, 'timestamp': '2025-10-02 00:41:00.383357', 'step': 16779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:00.443312', 'step': 16779, 'epoch': 2}
{'type': 'loss', 'content': 0.05494202300906181, 'timestamp': '2025-10-02 00:41:00.455707', 'step': 16780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:00.528430', 'step': 16780, 'epoch': 2}
{'type': 'loss', 'content': 0.0301631111651659, 'timestamp': '2025-10-02 00:41:00.537082', 'step': 16781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:00.611929', 'step': 16781, 'epoch': 2}
{'type': 'loss', 'content': 0.034699972718954086, 'timestamp': '2025-10-02 00:41:00.619392', 'step': 16782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:00.693117', 'step': 16782, 'epoch': 2}
{'type': 'loss', 'content': 0.03424171358346939, 'timestamp': '2025-10-02 00:41:00.711871', 'step': 16783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:00.803332', 'step': 16783, 'epoch': 2}
{'type': 'loss', 'content': 0.05711673945188522, 'timestamp': '2025-10-02 00:41:00.812024', 'step': 16784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:00.888186', 'step': 16784, 'epoch': 2}
{'type': 'loss', 'content': 0.1314074844121933, 'timestamp': '2025-10-02 00:41:00.900081', 'step': 16785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:00.992749', 'step': 16785, 'epoch': 2}
{'type': 'loss', 'content': 0.006054646335542202, 'timestamp': '2025-10-02 00:41:01.003190', 'step': 16786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:01.082420', 'step': 16786, 'epoch': 2}
{'type': 'loss', 'content': 0.013171729631721973, 'timestamp': '2025-10-02 00:41:01.103534', 'step': 16787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:01.181424', 'step': 16787, 'epoch': 2}
{'type': 'loss', 'content': 0.09176386892795563, 'timestamp': '2025-10-02 00:41:01.196041', 'step': 16788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:01.260304', 'step': 16788, 'epoch': 2}
{'type': 'loss', 'content': 0.01538260281085968, 'timestamp': '2025-10-02 00:41:01.271659', 'step': 16789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:01.347403', 'step': 16789, 'epoch': 2}
{'type': 'loss', 'content': 0.06939727813005447, 'timestamp': '2025-10-02 00:41:01.361068', 'step': 16790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:01.446508', 'step': 16790, 'epoch': 2}
{'type': 'loss', 'content': 0.08751489967107773, 'timestamp': '2025-10-02 00:41:01.450517', 'step': 16791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:01.542749', 'step': 16791, 'epoch': 2}
{'type': 'loss', 'content': 0.07511825114488602, 'timestamp': '2025-10-02 00:41:01.554761', 'step': 16792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:01.617754', 'step': 16792, 'epoch': 2}
{'type': 'loss', 'content': 0.07302001118659973, 'timestamp': '2025-10-02 00:41:01.629374', 'step': 16793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:01.700529', 'step': 16793, 'epoch': 2}
{'type': 'loss', 'content': 0.028561564162373543, 'timestamp': '2025-10-02 00:41:01.710490', 'step': 16794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:01.780673', 'step': 16794, 'epoch': 2}
{'type': 'loss', 'content': 0.13226380944252014, 'timestamp': '2025-10-02 00:41:01.788523', 'step': 16795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:01.871363', 'step': 16795, 'epoch': 2}
{'type': 'loss', 'content': 0.195490762591362, 'timestamp': '2025-10-02 00:41:01.879365', 'step': 16796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:01.935619', 'step': 16796, 'epoch': 2}
{'type': 'loss', 'content': 0.011972581967711449, 'timestamp': '2025-10-02 00:41:01.948120', 'step': 16797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:02.042612', 'step': 16797, 'epoch': 2}
{'type': 'loss', 'content': 0.14422748982906342, 'timestamp': '2025-10-02 00:41:02.055091', 'step': 16798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:02.138424', 'step': 16798, 'epoch': 2}
{'type': 'loss', 'content': 0.046655189245939255, 'timestamp': '2025-10-02 00:41:02.149605', 'step': 16799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:41:02.231890', 'step': 16799, 'epoch': 2}
{'type': 'loss', 'content': 0.08256809413433075, 'timestamp': '2025-10-02 00:41:02.247716', 'step': 16800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:02.347345', 'step': 16800, 'epoch': 2}
{'type': 'loss', 'content': 0.11979563534259796, 'timestamp': '2025-10-02 00:41:02.358217', 'step': 16801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:02.442691', 'step': 16801, 'epoch': 2}
{'type': 'loss', 'content': 0.07668537646532059, 'timestamp': '2025-10-02 00:41:02.453186', 'step': 16802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:02.527622', 'step': 16802, 'epoch': 2}
{'type': 'loss', 'content': 0.02236560545861721, 'timestamp': '2025-10-02 00:41:02.530634', 'step': 16803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:02.592458', 'step': 16803, 'epoch': 2}
{'type': 'loss', 'content': 0.078854039311409, 'timestamp': '2025-10-02 00:41:02.605266', 'step': 16804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:02.676876', 'step': 16804, 'epoch': 2}
{'type': 'loss', 'content': 0.016620654612779617, 'timestamp': '2025-10-02 00:41:02.688698', 'step': 16805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:02.759243', 'step': 16805, 'epoch': 2}
{'type': 'loss', 'content': 0.1662347912788391, 'timestamp': '2025-10-02 00:41:02.767548', 'step': 16806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:02.842638', 'step': 16806, 'epoch': 2}
{'type': 'loss', 'content': 0.05018661543726921, 'timestamp': '2025-10-02 00:41:02.851903', 'step': 16807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:02.924238', 'step': 16807, 'epoch': 2}
{'type': 'loss', 'content': 0.038875188678503036, 'timestamp': '2025-10-02 00:41:02.938393', 'step': 16808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:03.011649', 'step': 16808, 'epoch': 2}
{'type': 'loss', 'content': 0.07505461573600769, 'timestamp': '2025-10-02 00:41:03.014947', 'step': 16809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:03.087189', 'step': 16809, 'epoch': 2}
{'type': 'loss', 'content': 0.014097031205892563, 'timestamp': '2025-10-02 00:41:03.095793', 'step': 16810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:03.175496', 'step': 16810, 'epoch': 2}
{'type': 'loss', 'content': 0.06422867625951767, 'timestamp': '2025-10-02 00:41:03.184863', 'step': 16811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:03.258307', 'step': 16811, 'epoch': 2}
{'type': 'loss', 'content': 0.04967832192778587, 'timestamp': '2025-10-02 00:41:03.270359', 'step': 16812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:03.333444', 'step': 16812, 'epoch': 2}
{'type': 'loss', 'content': 0.02234003320336342, 'timestamp': '2025-10-02 00:41:03.342034', 'step': 16813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:03.413689', 'step': 16813, 'epoch': 2}
{'type': 'loss', 'content': 0.08694573491811752, 'timestamp': '2025-10-02 00:41:03.421934', 'step': 16814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:03.496778', 'step': 16814, 'epoch': 2}
{'type': 'loss', 'content': 0.028788413852453232, 'timestamp': '2025-10-02 00:41:03.506403', 'step': 16815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:03.591527', 'step': 16815, 'epoch': 2}
{'type': 'loss', 'content': 0.09014450013637543, 'timestamp': '2025-10-02 00:41:03.597724', 'step': 16816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:03.677742', 'step': 16816, 'epoch': 2}
{'type': 'loss', 'content': 0.014524780213832855, 'timestamp': '2025-10-02 00:41:03.685916', 'step': 16817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:03.758915', 'step': 16817, 'epoch': 2}
{'type': 'loss', 'content': 0.021318165585398674, 'timestamp': '2025-10-02 00:41:03.769726', 'step': 16818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:03.837375', 'step': 16818, 'epoch': 2}
{'type': 'loss', 'content': 0.07068664580583572, 'timestamp': '2025-10-02 00:41:03.844943', 'step': 16819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:03.919499', 'step': 16819, 'epoch': 2}
{'type': 'loss', 'content': 0.1448439210653305, 'timestamp': '2025-10-02 00:41:03.931453', 'step': 16820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:04.014378', 'step': 16820, 'epoch': 2}
{'type': 'loss', 'content': 0.028489960357546806, 'timestamp': '2025-10-02 00:41:04.025729', 'step': 16821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:41:04.089391', 'step': 16821, 'epoch': 2}
{'type': 'loss', 'content': 0.08182438462972641, 'timestamp': '2025-10-02 00:41:04.099269', 'step': 16822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:04.171810', 'step': 16822, 'epoch': 2}
{'type': 'loss', 'content': 0.0789596363902092, 'timestamp': '2025-10-02 00:41:04.180336', 'step': 16823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:04.252060', 'step': 16823, 'epoch': 2}
{'type': 'loss', 'content': 0.09708293527364731, 'timestamp': '2025-10-02 00:41:04.268495', 'step': 16824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:04.344278', 'step': 16824, 'epoch': 2}
{'type': 'loss', 'content': 0.04852072894573212, 'timestamp': '2025-10-02 00:41:04.355274', 'step': 16825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:04.420628', 'step': 16825, 'epoch': 2}
{'type': 'loss', 'content': 0.07672026753425598, 'timestamp': '2025-10-02 00:41:04.423759', 'step': 16826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:04.488782', 'step': 16826, 'epoch': 2}
{'type': 'loss', 'content': 0.0285198837518692, 'timestamp': '2025-10-02 00:41:04.499344', 'step': 16827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:04.555983', 'step': 16827, 'epoch': 2}
{'type': 'loss', 'content': 0.05188392102718353, 'timestamp': '2025-10-02 00:41:04.562496', 'step': 16828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:04.618053', 'step': 16828, 'epoch': 2}
{'type': 'loss', 'content': 0.08296911418437958, 'timestamp': '2025-10-02 00:41:04.621806', 'step': 16829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:04.677776', 'step': 16829, 'epoch': 2}
{'type': 'loss', 'content': 0.11379465460777283, 'timestamp': '2025-10-02 00:41:04.682013', 'step': 16830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:04.739872', 'step': 16830, 'epoch': 2}
{'type': 'loss', 'content': 0.11477790772914886, 'timestamp': '2025-10-02 00:41:04.742514', 'step': 16831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:04.797872', 'step': 16831, 'epoch': 2}
{'type': 'loss', 'content': 0.05816437304019928, 'timestamp': '2025-10-02 00:41:04.804325', 'step': 16832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:04.871619', 'step': 16832, 'epoch': 2}
{'type': 'loss', 'content': 0.010400930419564247, 'timestamp': '2025-10-02 00:41:04.883134', 'step': 16833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:04.946992', 'step': 16833, 'epoch': 2}
{'type': 'loss', 'content': 0.03108789026737213, 'timestamp': '2025-10-02 00:41:04.957491', 'step': 16834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:05.022942', 'step': 16834, 'epoch': 2}
{'type': 'loss', 'content': 0.07365015149116516, 'timestamp': '2025-10-02 00:41:05.033584', 'step': 16835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:05.089093', 'step': 16835, 'epoch': 2}
{'type': 'loss', 'content': 0.10170243680477142, 'timestamp': '2025-10-02 00:41:05.095804', 'step': 16836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:05.150853', 'step': 16836, 'epoch': 2}
{'type': 'loss', 'content': 0.15229764580726624, 'timestamp': '2025-10-02 00:41:05.154589', 'step': 16837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:05.211192', 'step': 16837, 'epoch': 2}
{'type': 'loss', 'content': 0.015289840288460255, 'timestamp': '2025-10-02 00:41:05.213560', 'step': 16838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:05.269784', 'step': 16838, 'epoch': 2}
{'type': 'loss', 'content': 0.09455669671297073, 'timestamp': '2025-10-02 00:41:05.273050', 'step': 16839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:05.328414', 'step': 16839, 'epoch': 2}
{'type': 'loss', 'content': 0.07557611167430878, 'timestamp': '2025-10-02 00:41:05.335450', 'step': 16840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:05.392217', 'step': 16840, 'epoch': 2}
{'type': 'loss', 'content': 0.002927821595221758, 'timestamp': '2025-10-02 00:41:05.400052', 'step': 16841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:05.456448', 'step': 16841, 'epoch': 2}
{'type': 'loss', 'content': 0.023291518911719322, 'timestamp': '2025-10-02 00:41:05.464066', 'step': 16842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:05.521408', 'step': 16842, 'epoch': 2}
{'type': 'loss', 'content': 0.08957383781671524, 'timestamp': '2025-10-02 00:41:05.526445', 'step': 16843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:05.583504', 'step': 16843, 'epoch': 2}
{'type': 'loss', 'content': 0.0370132252573967, 'timestamp': '2025-10-02 00:41:05.590401', 'step': 16844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:05.649358', 'step': 16844, 'epoch': 2}
{'type': 'loss', 'content': 0.011764521710574627, 'timestamp': '2025-10-02 00:41:05.657076', 'step': 16845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:05.715306', 'step': 16845, 'epoch': 2}
{'type': 'loss', 'content': 0.03957344591617584, 'timestamp': '2025-10-02 00:41:05.724831', 'step': 16846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:05.780721', 'step': 16846, 'epoch': 2}
{'type': 'loss', 'content': 0.066310353577137, 'timestamp': '2025-10-02 00:41:05.784085', 'step': 16847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:05.839606', 'step': 16847, 'epoch': 2}
{'type': 'loss', 'content': 0.05074496567249298, 'timestamp': '2025-10-02 00:41:05.846025', 'step': 16848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:41:05.906980', 'step': 16848, 'epoch': 2}
{'type': 'loss', 'content': 0.05284012481570244, 'timestamp': '2025-10-02 00:41:05.918723', 'step': 16849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:05.975454', 'step': 16849, 'epoch': 2}
{'type': 'loss', 'content': 0.031470201909542084, 'timestamp': '2025-10-02 00:41:05.979872', 'step': 16850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:06.037178', 'step': 16850, 'epoch': 2}
{'type': 'loss', 'content': 0.04582570865750313, 'timestamp': '2025-10-02 00:41:06.043099', 'step': 16851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:06.099269', 'step': 16851, 'epoch': 2}
{'type': 'loss', 'content': 0.12163036316633224, 'timestamp': '2025-10-02 00:41:06.106464', 'step': 16852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:41:06.176414', 'step': 16852, 'epoch': 2}
{'type': 'loss', 'content': 0.013867597095668316, 'timestamp': '2025-10-02 00:41:06.189781', 'step': 16853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:06.255252', 'step': 16853, 'epoch': 2}
{'type': 'loss', 'content': 0.013603787869215012, 'timestamp': '2025-10-02 00:41:06.265734', 'step': 16854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:06.323508', 'step': 16854, 'epoch': 2}
{'type': 'loss', 'content': 0.14060312509536743, 'timestamp': '2025-10-02 00:41:06.326912', 'step': 16855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:06.383516', 'step': 16855, 'epoch': 2}
{'type': 'loss', 'content': 0.04692745581269264, 'timestamp': '2025-10-02 00:41:06.390091', 'step': 16856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:06.445892', 'step': 16856, 'epoch': 2}
{'type': 'loss', 'content': 0.05058713257312775, 'timestamp': '2025-10-02 00:41:06.452037', 'step': 16857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:06.511355', 'step': 16857, 'epoch': 2}
{'type': 'loss', 'content': 0.06408531218767166, 'timestamp': '2025-10-02 00:41:06.520890', 'step': 16858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:06.577842', 'step': 16858, 'epoch': 2}
{'type': 'loss', 'content': 0.1450783759355545, 'timestamp': '2025-10-02 00:41:06.580748', 'step': 16859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:06.636298', 'step': 16859, 'epoch': 2}
{'type': 'loss', 'content': 0.1259320229291916, 'timestamp': '2025-10-02 00:41:06.646656', 'step': 16860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:06.702709', 'step': 16860, 'epoch': 2}
{'type': 'loss', 'content': 0.035419855266809464, 'timestamp': '2025-10-02 00:41:06.710378', 'step': 16861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:06.766083', 'step': 16861, 'epoch': 2}
{'type': 'loss', 'content': 0.10830049216747284, 'timestamp': '2025-10-02 00:41:06.775436', 'step': 16862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:06.831977', 'step': 16862, 'epoch': 2}
{'type': 'loss', 'content': 0.017787335440516472, 'timestamp': '2025-10-02 00:41:06.841311', 'step': 16863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:06.898116', 'step': 16863, 'epoch': 2}
{'type': 'loss', 'content': 0.0059694391675293446, 'timestamp': '2025-10-02 00:41:06.908450', 'step': 16864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:06.964346', 'step': 16864, 'epoch': 2}
{'type': 'loss', 'content': 0.18941707909107208, 'timestamp': '2025-10-02 00:41:06.967722', 'step': 16865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:07.022509', 'step': 16865, 'epoch': 2}
{'type': 'loss', 'content': 0.16932915151119232, 'timestamp': '2025-10-02 00:41:07.025330', 'step': 16866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:07.081125', 'step': 16866, 'epoch': 2}
{'type': 'loss', 'content': 0.08648642897605896, 'timestamp': '2025-10-02 00:41:07.083752', 'step': 16867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:07.138618', 'step': 16867, 'epoch': 2}
{'type': 'loss', 'content': 0.0468277707695961, 'timestamp': '2025-10-02 00:41:07.146961', 'step': 16868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:07.202055', 'step': 16868, 'epoch': 2}
{'type': 'loss', 'content': 0.08170752972364426, 'timestamp': '2025-10-02 00:41:07.204529', 'step': 16869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:07.259635', 'step': 16869, 'epoch': 2}
{'type': 'loss', 'content': 0.05160900577902794, 'timestamp': '2025-10-02 00:41:07.262013', 'step': 16870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:07.316069', 'step': 16870, 'epoch': 2}
{'type': 'loss', 'content': 0.09018518030643463, 'timestamp': '2025-10-02 00:41:07.318658', 'step': 16871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:07.374137', 'step': 16871, 'epoch': 2}
{'type': 'loss', 'content': 0.032903894782066345, 'timestamp': '2025-10-02 00:41:07.380584', 'step': 16872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:07.433825', 'step': 16872, 'epoch': 2}
{'type': 'loss', 'content': 0.1130140945315361, 'timestamp': '2025-10-02 00:41:07.437127', 'step': 16873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:41:07.507195', 'step': 16873, 'epoch': 2}
{'type': 'loss', 'content': 0.017491672188043594, 'timestamp': '2025-10-02 00:41:07.519627', 'step': 16874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:07.574170', 'step': 16874, 'epoch': 2}
{'type': 'loss', 'content': 0.0637425035238266, 'timestamp': '2025-10-02 00:41:07.576492', 'step': 16875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:41:07.638660', 'step': 16875, 'epoch': 2}
{'type': 'loss', 'content': 0.06186020001769066, 'timestamp': '2025-10-02 00:41:07.650263', 'step': 16876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:07.706102', 'step': 16876, 'epoch': 2}
{'type': 'loss', 'content': 0.018869077786803246, 'timestamp': '2025-10-02 00:41:07.712228', 'step': 16877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:07.770847', 'step': 16877, 'epoch': 2}
{'type': 'loss', 'content': 0.1184321939945221, 'timestamp': '2025-10-02 00:41:07.774026', 'step': 16878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:07.830052', 'step': 16878, 'epoch': 2}
{'type': 'loss', 'content': 0.07172655314207077, 'timestamp': '2025-10-02 00:41:07.832596', 'step': 16879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:41:07.891141', 'step': 16879, 'epoch': 2}
{'type': 'loss', 'content': 0.03716091811656952, 'timestamp': '2025-10-02 00:41:07.899797', 'step': 16880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:07.953110', 'step': 16880, 'epoch': 2}
{'type': 'loss', 'content': 0.1960454136133194, 'timestamp': '2025-10-02 00:41:07.956646', 'step': 16881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:08.013663', 'step': 16881, 'epoch': 2}
{'type': 'loss', 'content': 0.025144299492239952, 'timestamp': '2025-10-02 00:41:08.023192', 'step': 16882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:08.077793', 'step': 16882, 'epoch': 2}
{'type': 'loss', 'content': 0.06684567779302597, 'timestamp': '2025-10-02 00:41:08.080291', 'step': 16883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:08.136989', 'step': 16883, 'epoch': 2}
{'type': 'loss', 'content': 0.056314799934625626, 'timestamp': '2025-10-02 00:41:08.143586', 'step': 16884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:08.199126', 'step': 16884, 'epoch': 2}
{'type': 'loss', 'content': 0.012606427073478699, 'timestamp': '2025-10-02 00:41:08.206832', 'step': 16885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:08.270191', 'step': 16885, 'epoch': 2}
{'type': 'loss', 'content': 0.016223503276705742, 'timestamp': '2025-10-02 00:41:08.280696', 'step': 16886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:08.335905', 'step': 16886, 'epoch': 2}
{'type': 'loss', 'content': 0.19015108048915863, 'timestamp': '2025-10-02 00:41:08.338518', 'step': 16887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:08.393220', 'step': 16887, 'epoch': 2}
{'type': 'loss', 'content': 0.03477011248469353, 'timestamp': '2025-10-02 00:41:08.399351', 'step': 16888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:08.453454', 'step': 16888, 'epoch': 2}
{'type': 'loss', 'content': 0.1760001927614212, 'timestamp': '2025-10-02 00:41:08.456160', 'step': 16889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:08.511215', 'step': 16889, 'epoch': 2}
{'type': 'loss', 'content': 0.02480611950159073, 'timestamp': '2025-10-02 00:41:08.514461', 'step': 16890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:08.571676', 'step': 16890, 'epoch': 2}
{'type': 'loss', 'content': 0.11845909804105759, 'timestamp': '2025-10-02 00:41:08.574372', 'step': 16891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:08.634996', 'step': 16891, 'epoch': 2}
{'type': 'loss', 'content': 0.029248032718896866, 'timestamp': '2025-10-02 00:41:08.645935', 'step': 16892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:08.701788', 'step': 16892, 'epoch': 2}
{'type': 'loss', 'content': 0.06077176332473755, 'timestamp': '2025-10-02 00:41:08.704491', 'step': 16893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:08.760014', 'step': 16893, 'epoch': 2}
{'type': 'loss', 'content': 0.10109303891658783, 'timestamp': '2025-10-02 00:41:08.764724', 'step': 16894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:08.822056', 'step': 16894, 'epoch': 2}
{'type': 'loss', 'content': 0.13852253556251526, 'timestamp': '2025-10-02 00:41:08.824506', 'step': 16895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:08.879981', 'step': 16895, 'epoch': 2}
{'type': 'loss', 'content': 0.08059366047382355, 'timestamp': '2025-10-02 00:41:08.887050', 'step': 16896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:08.942489', 'step': 16896, 'epoch': 2}
{'type': 'loss', 'content': 0.06947297602891922, 'timestamp': '2025-10-02 00:41:08.946380', 'step': 16897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:09.003604', 'step': 16897, 'epoch': 2}
{'type': 'loss', 'content': 0.056938640773296356, 'timestamp': '2025-10-02 00:41:09.006506', 'step': 16898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:09.061703', 'step': 16898, 'epoch': 2}
{'type': 'loss', 'content': 0.01499087456613779, 'timestamp': '2025-10-02 00:41:09.064839', 'step': 16899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:09.120178', 'step': 16899, 'epoch': 2}
{'type': 'loss', 'content': 0.14090940356254578, 'timestamp': '2025-10-02 00:41:09.127432', 'step': 16900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:09.181886', 'step': 16900, 'epoch': 2}
{'type': 'loss', 'content': 0.09036789834499359, 'timestamp': '2025-10-02 00:41:09.184564', 'step': 16901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:41:09.239321', 'step': 16901, 'epoch': 2}
{'type': 'loss', 'content': 0.13236145675182343, 'timestamp': '2025-10-02 00:41:09.241798', 'step': 16902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:09.300391', 'step': 16902, 'epoch': 2}
{'type': 'loss', 'content': 0.018319062888622284, 'timestamp': '2025-10-02 00:41:09.302987', 'step': 16903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:09.358363', 'step': 16903, 'epoch': 2}
{'type': 'loss', 'content': 0.033272165805101395, 'timestamp': '2025-10-02 00:41:09.365768', 'step': 16904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:09.420029', 'step': 16904, 'epoch': 2}
{'type': 'loss', 'content': 0.06608352065086365, 'timestamp': '2025-10-02 00:41:09.422871', 'step': 16905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:09.480446', 'step': 16905, 'epoch': 2}
{'type': 'loss', 'content': 0.09001554548740387, 'timestamp': '2025-10-02 00:41:09.483081', 'step': 16906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:09.537929', 'step': 16906, 'epoch': 2}
{'type': 'loss', 'content': 0.018976671621203423, 'timestamp': '2025-10-02 00:41:09.540657', 'step': 16907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:09.596233', 'step': 16907, 'epoch': 2}
{'type': 'loss', 'content': 0.07784783840179443, 'timestamp': '2025-10-02 00:41:09.602882', 'step': 16908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:09.657006', 'step': 16908, 'epoch': 2}
{'type': 'loss', 'content': 0.12304871529340744, 'timestamp': '2025-10-02 00:41:09.667242', 'step': 16909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:09.728907', 'step': 16909, 'epoch': 2}
{'type': 'loss', 'content': 0.040390025824308395, 'timestamp': '2025-10-02 00:41:09.739101', 'step': 16910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:09.795007', 'step': 16910, 'epoch': 2}
{'type': 'loss', 'content': 0.00026381525094620883, 'timestamp': '2025-10-02 00:41:09.802608', 'step': 16911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:41:09.865491', 'step': 16911, 'epoch': 2}
{'type': 'loss', 'content': 0.01877421699464321, 'timestamp': '2025-10-02 00:41:09.877120', 'step': 16912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:09.931522', 'step': 16912, 'epoch': 2}
{'type': 'loss', 'content': 0.05374163016676903, 'timestamp': '2025-10-02 00:41:09.941825', 'step': 16913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:10.001198', 'step': 16913, 'epoch': 2}
{'type': 'loss', 'content': 0.05205246061086655, 'timestamp': '2025-10-02 00:41:10.004080', 'step': 16914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:10.060208', 'step': 16914, 'epoch': 2}
{'type': 'loss', 'content': 0.046884600073099136, 'timestamp': '2025-10-02 00:41:10.067671', 'step': 16915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:10.122699', 'step': 16915, 'epoch': 2}
{'type': 'loss', 'content': 0.0475652813911438, 'timestamp': '2025-10-02 00:41:10.128698', 'step': 16916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:10.187066', 'step': 16916, 'epoch': 2}
{'type': 'loss', 'content': 0.05019155517220497, 'timestamp': '2025-10-02 00:41:10.198026', 'step': 16917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:41:10.261939', 'step': 16917, 'epoch': 2}
{'type': 'loss', 'content': 0.026734821498394012, 'timestamp': '2025-10-02 00:41:10.272805', 'step': 16918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:10.327265', 'step': 16918, 'epoch': 2}
{'type': 'loss', 'content': 0.04712030291557312, 'timestamp': '2025-10-02 00:41:10.329658', 'step': 16919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:10.383838', 'step': 16919, 'epoch': 2}
{'type': 'loss', 'content': 0.03397994861006737, 'timestamp': '2025-10-02 00:41:10.390706', 'step': 16920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:10.444233', 'step': 16920, 'epoch': 2}
{'type': 'loss', 'content': 0.2330740988254547, 'timestamp': '2025-10-02 00:41:10.446790', 'step': 16921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:10.502074', 'step': 16921, 'epoch': 2}
{'type': 'loss', 'content': 0.03502935171127319, 'timestamp': '2025-10-02 00:41:10.504475', 'step': 16922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:10.558950', 'step': 16922, 'epoch': 2}
{'type': 'loss', 'content': 0.0729036033153534, 'timestamp': '2025-10-02 00:41:10.561352', 'step': 16923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:10.615796', 'step': 16923, 'epoch': 2}
{'type': 'loss', 'content': 0.01946214586496353, 'timestamp': '2025-10-02 00:41:10.621966', 'step': 16924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:10.675862', 'step': 16924, 'epoch': 2}
{'type': 'loss', 'content': 0.04314495250582695, 'timestamp': '2025-10-02 00:41:10.681869', 'step': 16925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:10.737161', 'step': 16925, 'epoch': 2}
{'type': 'loss', 'content': 0.036547910422086716, 'timestamp': '2025-10-02 00:41:10.744620', 'step': 16926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:10.799290', 'step': 16926, 'epoch': 2}
{'type': 'loss', 'content': 0.05800691619515419, 'timestamp': '2025-10-02 00:41:10.805315', 'step': 16927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:10.860046', 'step': 16927, 'epoch': 2}
{'type': 'loss', 'content': 0.02298078127205372, 'timestamp': '2025-10-02 00:41:10.866811', 'step': 16928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:10.921326', 'step': 16928, 'epoch': 2}
{'type': 'loss', 'content': 0.05533367767930031, 'timestamp': '2025-10-02 00:41:10.924391', 'step': 16929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:10.977844', 'step': 16929, 'epoch': 2}
{'type': 'loss', 'content': 0.15581592917442322, 'timestamp': '2025-10-02 00:41:10.980714', 'step': 16930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:11.035758', 'step': 16930, 'epoch': 2}
{'type': 'loss', 'content': 0.10783611983060837, 'timestamp': '2025-10-02 00:41:11.038294', 'step': 16931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:11.093032', 'step': 16931, 'epoch': 2}
{'type': 'loss', 'content': 0.1735973358154297, 'timestamp': '2025-10-02 00:41:11.099310', 'step': 16932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:11.154022', 'step': 16932, 'epoch': 2}
{'type': 'loss', 'content': 0.07398295402526855, 'timestamp': '2025-10-02 00:41:11.156379', 'step': 16933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:11.211083', 'step': 16933, 'epoch': 2}
{'type': 'loss', 'content': 0.08461705595254898, 'timestamp': '2025-10-02 00:41:11.213399', 'step': 16934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:41:11.297249', 'step': 16934, 'epoch': 2}
{'type': 'loss', 'content': 0.02264564484357834, 'timestamp': '2025-10-02 00:41:11.311033', 'step': 16935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:11.366811', 'step': 16935, 'epoch': 2}
{'type': 'loss', 'content': 0.025581752881407738, 'timestamp': '2025-10-02 00:41:11.372855', 'step': 16936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:11.427351', 'step': 16936, 'epoch': 2}
{'type': 'loss', 'content': 0.0320005901157856, 'timestamp': '2025-10-02 00:41:11.429743', 'step': 16937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:11.484292', 'step': 16937, 'epoch': 2}
{'type': 'loss', 'content': 0.03534905984997749, 'timestamp': '2025-10-02 00:41:11.486990', 'step': 16938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:11.541029', 'step': 16938, 'epoch': 2}
{'type': 'loss', 'content': 0.039047446101903915, 'timestamp': '2025-10-02 00:41:11.548629', 'step': 16939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:11.602507', 'step': 16939, 'epoch': 2}
{'type': 'loss', 'content': 0.028825119137763977, 'timestamp': '2025-10-02 00:41:11.608520', 'step': 16940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:11.661907', 'step': 16940, 'epoch': 2}
{'type': 'loss', 'content': 0.04332137852907181, 'timestamp': '2025-10-02 00:41:11.664388', 'step': 16941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:11.719666', 'step': 16941, 'epoch': 2}
{'type': 'loss', 'content': 0.12869364023208618, 'timestamp': '2025-10-02 00:41:11.722879', 'step': 16942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:11.778128', 'step': 16942, 'epoch': 2}
{'type': 'loss', 'content': 0.15100273489952087, 'timestamp': '2025-10-02 00:41:11.780965', 'step': 16943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:11.835680', 'step': 16943, 'epoch': 2}
{'type': 'loss', 'content': 0.06485986709594727, 'timestamp': '2025-10-02 00:41:11.842013', 'step': 16944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:11.902333', 'step': 16944, 'epoch': 2}
{'type': 'loss', 'content': 0.0612245611846447, 'timestamp': '2025-10-02 00:41:11.913290', 'step': 16945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:11.969338', 'step': 16945, 'epoch': 2}
{'type': 'loss', 'content': 0.0865858942270279, 'timestamp': '2025-10-02 00:41:11.972338', 'step': 16946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:41:12.047217', 'step': 16946, 'epoch': 2}
{'type': 'loss', 'content': 0.012241111136972904, 'timestamp': '2025-10-02 00:41:12.060418', 'step': 16947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:12.116743', 'step': 16947, 'epoch': 2}
{'type': 'loss', 'content': 0.03700568899512291, 'timestamp': '2025-10-02 00:41:12.127070', 'step': 16948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:12.181169', 'step': 16948, 'epoch': 2}
{'type': 'loss', 'content': 0.1359664797782898, 'timestamp': '2025-10-02 00:41:12.183814', 'step': 16949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:12.238210', 'step': 16949, 'epoch': 2}
{'type': 'loss', 'content': 0.024077976122498512, 'timestamp': '2025-10-02 00:41:12.240828', 'step': 16950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:12.300295', 'step': 16950, 'epoch': 2}
{'type': 'loss', 'content': 0.07392991334199905, 'timestamp': '2025-10-02 00:41:12.310465', 'step': 16951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:12.372586', 'step': 16951, 'epoch': 2}
{'type': 'loss', 'content': 0.021406294777989388, 'timestamp': '2025-10-02 00:41:12.383786', 'step': 16952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:12.439093', 'step': 16952, 'epoch': 2}
{'type': 'loss', 'content': 0.008006840012967587, 'timestamp': '2025-10-02 00:41:12.441964', 'step': 16953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:12.499395', 'step': 16953, 'epoch': 2}
{'type': 'loss', 'content': 0.034799735993146896, 'timestamp': '2025-10-02 00:41:12.508913', 'step': 16954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:12.563432', 'step': 16954, 'epoch': 2}
{'type': 'loss', 'content': 0.061447612941265106, 'timestamp': '2025-10-02 00:41:12.565918', 'step': 16955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:12.622412', 'step': 16955, 'epoch': 2}
{'type': 'loss', 'content': 0.03606359288096428, 'timestamp': '2025-10-02 00:41:12.632759', 'step': 16956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:12.687894', 'step': 16956, 'epoch': 2}
{'type': 'loss', 'content': 0.032480329275131226, 'timestamp': '2025-10-02 00:41:12.691790', 'step': 16957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:12.746873', 'step': 16957, 'epoch': 2}
{'type': 'loss', 'content': 0.044249922037124634, 'timestamp': '2025-10-02 00:41:12.754517', 'step': 16958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:12.809614', 'step': 16958, 'epoch': 2}
{'type': 'loss', 'content': 0.0639987513422966, 'timestamp': '2025-10-02 00:41:12.817290', 'step': 16959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:12.872276', 'step': 16959, 'epoch': 2}
{'type': 'loss', 'content': 0.12623021006584167, 'timestamp': '2025-10-02 00:41:12.878400', 'step': 16960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:12.932785', 'step': 16960, 'epoch': 2}
{'type': 'loss', 'content': 0.034106794744729996, 'timestamp': '2025-10-02 00:41:12.943066', 'step': 16961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:12.998629', 'step': 16961, 'epoch': 2}
{'type': 'loss', 'content': 0.07557429373264313, 'timestamp': '2025-10-02 00:41:13.008194', 'step': 16962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:13.064310', 'step': 16962, 'epoch': 2}
{'type': 'loss', 'content': 0.11613839864730835, 'timestamp': '2025-10-02 00:41:13.071882', 'step': 16963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:13.126745', 'step': 16963, 'epoch': 2}
{'type': 'loss', 'content': 0.06425236165523529, 'timestamp': '2025-10-02 00:41:13.132855', 'step': 16964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:13.187305', 'step': 16964, 'epoch': 2}
{'type': 'loss', 'content': 0.03967227041721344, 'timestamp': '2025-10-02 00:41:13.196949', 'step': 16965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:13.259514', 'step': 16965, 'epoch': 2}
{'type': 'loss', 'content': 0.05168034881353378, 'timestamp': '2025-10-02 00:41:13.270136', 'step': 16966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:13.326333', 'step': 16966, 'epoch': 2}
{'type': 'loss', 'content': 0.08855437487363815, 'timestamp': '2025-10-02 00:41:13.335882', 'step': 16967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:13.390751', 'step': 16967, 'epoch': 2}
{'type': 'loss', 'content': 0.1372738629579544, 'timestamp': '2025-10-02 00:41:13.396892', 'step': 16968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:13.450202', 'step': 16968, 'epoch': 2}
{'type': 'loss', 'content': 0.04299762472510338, 'timestamp': '2025-10-02 00:41:13.452795', 'step': 16969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:13.508207', 'step': 16969, 'epoch': 2}
{'type': 'loss', 'content': 0.02324129454791546, 'timestamp': '2025-10-02 00:41:13.517538', 'step': 16970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:13.573675', 'step': 16970, 'epoch': 2}
{'type': 'loss', 'content': 0.021560296416282654, 'timestamp': '2025-10-02 00:41:13.582977', 'step': 16971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:13.638406', 'step': 16971, 'epoch': 2}
{'type': 'loss', 'content': 0.023398106917738914, 'timestamp': '2025-10-02 00:41:13.645079', 'step': 16972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:41:13.700004', 'step': 16972, 'epoch': 2}
{'type': 'loss', 'content': 0.08531053364276886, 'timestamp': '2025-10-02 00:41:13.702895', 'step': 16973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:13.757507', 'step': 16973, 'epoch': 2}
{'type': 'loss', 'content': 0.150472030043602, 'timestamp': '2025-10-02 00:41:13.759883', 'step': 16974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:13.815197', 'step': 16974, 'epoch': 2}
{'type': 'loss', 'content': 0.17518265545368195, 'timestamp': '2025-10-02 00:41:13.818123', 'step': 16975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:13.872425', 'step': 16975, 'epoch': 2}
{'type': 'loss', 'content': 0.09215798228979111, 'timestamp': '2025-10-02 00:41:13.879316', 'step': 16976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:13.933694', 'step': 16976, 'epoch': 2}
{'type': 'loss', 'content': 0.09058253467082977, 'timestamp': '2025-10-02 00:41:13.937510', 'step': 16977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:13.997052', 'step': 16977, 'epoch': 2}
{'type': 'loss', 'content': 0.05477564036846161, 'timestamp': '2025-10-02 00:41:14.004595', 'step': 16978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:14.062670', 'step': 16978, 'epoch': 2}
{'type': 'loss', 'content': 0.003271046793088317, 'timestamp': '2025-10-02 00:41:14.065543', 'step': 16979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:14.128430', 'step': 16979, 'epoch': 2}
{'type': 'loss', 'content': 0.020825263112783432, 'timestamp': '2025-10-02 00:41:14.139862', 'step': 16980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:14.194511', 'step': 16980, 'epoch': 2}
{'type': 'loss', 'content': 0.056575000286102295, 'timestamp': '2025-10-02 00:41:14.200558', 'step': 16981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:14.255523', 'step': 16981, 'epoch': 2}
{'type': 'loss', 'content': 0.19827118515968323, 'timestamp': '2025-10-02 00:41:14.258228', 'step': 16982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:14.314477', 'step': 16982, 'epoch': 2}
{'type': 'loss', 'content': 0.06751712411642075, 'timestamp': '2025-10-02 00:41:14.323991', 'step': 16983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:14.379850', 'step': 16983, 'epoch': 2}
{'type': 'loss', 'content': 0.09195616096258163, 'timestamp': '2025-10-02 00:41:14.386372', 'step': 16984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:14.441159', 'step': 16984, 'epoch': 2}
{'type': 'loss', 'content': 0.04832480847835541, 'timestamp': '2025-10-02 00:41:14.444239', 'step': 16985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:14.498602', 'step': 16985, 'epoch': 2}
{'type': 'loss', 'content': 0.09699523448944092, 'timestamp': '2025-10-02 00:41:14.501843', 'step': 16986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:14.556828', 'step': 16986, 'epoch': 2}
{'type': 'loss', 'content': 0.030673835426568985, 'timestamp': '2025-10-02 00:41:14.564443', 'step': 16987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:14.619896', 'step': 16987, 'epoch': 2}
{'type': 'loss', 'content': 0.08639990538358688, 'timestamp': '2025-10-02 00:41:14.628838', 'step': 16988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:14.683408', 'step': 16988, 'epoch': 2}
{'type': 'loss', 'content': 0.09650086611509323, 'timestamp': '2025-10-02 00:41:14.685712', 'step': 16989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:14.740039', 'step': 16989, 'epoch': 2}
{'type': 'loss', 'content': 0.0716494470834732, 'timestamp': '2025-10-02 00:41:14.746101', 'step': 16990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:14.800424', 'step': 16990, 'epoch': 2}
{'type': 'loss', 'content': 0.06281434744596481, 'timestamp': '2025-10-02 00:41:14.803352', 'step': 16991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:14.858340', 'step': 16991, 'epoch': 2}
{'type': 'loss', 'content': 0.030468741431832314, 'timestamp': '2025-10-02 00:41:14.866979', 'step': 16992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:14.920400', 'step': 16992, 'epoch': 2}
{'type': 'loss', 'content': 0.1596352607011795, 'timestamp': '2025-10-02 00:41:14.922806', 'step': 16993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:14.977798', 'step': 16993, 'epoch': 2}
{'type': 'loss', 'content': 0.01599622704088688, 'timestamp': '2025-10-02 00:41:14.985463', 'step': 16994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:15.039836', 'step': 16994, 'epoch': 2}
{'type': 'loss', 'content': 0.02228878065943718, 'timestamp': '2025-10-02 00:41:15.042560', 'step': 16995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:15.106610', 'step': 16995, 'epoch': 2}
{'type': 'loss', 'content': 0.13884983956813812, 'timestamp': '2025-10-02 00:41:15.118062', 'step': 16996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:15.172498', 'step': 16996, 'epoch': 2}
{'type': 'loss', 'content': 0.13519041240215302, 'timestamp': '2025-10-02 00:41:15.174788', 'step': 16997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:15.229711', 'step': 16997, 'epoch': 2}
{'type': 'loss', 'content': 0.053980905562639236, 'timestamp': '2025-10-02 00:41:15.231945', 'step': 16998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:15.288040', 'step': 16998, 'epoch': 2}
{'type': 'loss', 'content': 0.021820221096277237, 'timestamp': '2025-10-02 00:41:15.297524', 'step': 16999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:15.352455', 'step': 16999, 'epoch': 2}
{'type': 'loss', 'content': 0.01726905256509781, 'timestamp': '2025-10-02 00:41:15.360699', 'step': 17000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 17000', 'timestamp': '2025-10-02 00:41:16.007169', 'step': 17000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:16.084224', 'step': 17000, 'epoch': 2}
{'type': 'loss', 'content': 0.2125442922115326, 'timestamp': '2025-10-02 00:41:16.096314', 'step': 17001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:16.184786', 'step': 17001, 'epoch': 2}
{'type': 'loss', 'content': 0.14434440433979034, 'timestamp': '2025-10-02 00:41:16.192199', 'step': 17002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:41:16.276903', 'step': 17002, 'epoch': 2}
{'type': 'loss', 'content': 0.04180034622550011, 'timestamp': '2025-10-02 00:41:16.288637', 'step': 17003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:16.367178', 'step': 17003, 'epoch': 2}
{'type': 'loss', 'content': 0.051157016307115555, 'timestamp': '2025-10-02 00:41:16.386031', 'step': 17004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:16.460856', 'step': 17004, 'epoch': 2}
{'type': 'loss', 'content': 0.09601705521345139, 'timestamp': '2025-10-02 00:41:16.470578', 'step': 17005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:16.557848', 'step': 17005, 'epoch': 2}
{'type': 'loss', 'content': 0.06313217431306839, 'timestamp': '2025-10-02 00:41:16.560659', 'step': 17006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:16.619839', 'step': 17006, 'epoch': 2}
{'type': 'loss', 'content': 0.018760064616799355, 'timestamp': '2025-10-02 00:41:16.624129', 'step': 17007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:16.704522', 'step': 17007, 'epoch': 2}
{'type': 'loss', 'content': 0.08998388797044754, 'timestamp': '2025-10-02 00:41:16.720173', 'step': 17008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:16.809857', 'step': 17008, 'epoch': 2}
{'type': 'loss', 'content': 0.10023868083953857, 'timestamp': '2025-10-02 00:41:16.820916', 'step': 17009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:16.906008', 'step': 17009, 'epoch': 2}
{'type': 'loss', 'content': 0.07119160145521164, 'timestamp': '2025-10-02 00:41:16.909612', 'step': 17010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:16.967503', 'step': 17010, 'epoch': 2}
{'type': 'loss', 'content': 0.039601996541023254, 'timestamp': '2025-10-02 00:41:16.978049', 'step': 17011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:41:17.075649', 'step': 17011, 'epoch': 2}
{'type': 'loss', 'content': 0.010507783852517605, 'timestamp': '2025-10-02 00:41:17.090524', 'step': 17012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:17.152692', 'step': 17012, 'epoch': 2}
{'type': 'loss', 'content': 0.08195232599973679, 'timestamp': '2025-10-02 00:41:17.155847', 'step': 17013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:17.240009', 'step': 17013, 'epoch': 2}
{'type': 'loss', 'content': 0.04528401046991348, 'timestamp': '2025-10-02 00:41:17.252603', 'step': 17014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:17.323518', 'step': 17014, 'epoch': 2}
{'type': 'loss', 'content': 0.0715818852186203, 'timestamp': '2025-10-02 00:41:17.327779', 'step': 17015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:17.397219', 'step': 17015, 'epoch': 2}
{'type': 'loss', 'content': 0.046709463000297546, 'timestamp': '2025-10-02 00:41:17.412570', 'step': 17016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:17.478475', 'step': 17016, 'epoch': 2}
{'type': 'loss', 'content': 0.04290620982646942, 'timestamp': '2025-10-02 00:41:17.488140', 'step': 17017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:17.546953', 'step': 17017, 'epoch': 2}
{'type': 'loss', 'content': 0.08547177165746689, 'timestamp': '2025-10-02 00:41:17.550577', 'step': 17018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:17.610072', 'step': 17018, 'epoch': 2}
{'type': 'loss', 'content': 0.04583575576543808, 'timestamp': '2025-10-02 00:41:17.617478', 'step': 17019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:17.682560', 'step': 17019, 'epoch': 2}
{'type': 'loss', 'content': 0.06512802839279175, 'timestamp': '2025-10-02 00:41:17.690768', 'step': 17020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:17.766897', 'step': 17020, 'epoch': 2}
{'type': 'loss', 'content': 0.0950770154595375, 'timestamp': '2025-10-02 00:41:17.772017', 'step': 17021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:17.839495', 'step': 17021, 'epoch': 2}
{'type': 'loss', 'content': 0.010871159844100475, 'timestamp': '2025-10-02 00:41:17.845533', 'step': 17022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:17.912084', 'step': 17022, 'epoch': 2}
{'type': 'loss', 'content': 0.05417805537581444, 'timestamp': '2025-10-02 00:41:17.923829', 'step': 17023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:17.993710', 'step': 17023, 'epoch': 2}
{'type': 'loss', 'content': 0.07461189478635788, 'timestamp': '2025-10-02 00:41:18.004678', 'step': 17024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:18.063664', 'step': 17024, 'epoch': 2}
{'type': 'loss', 'content': 0.03416094556450844, 'timestamp': '2025-10-02 00:41:18.073915', 'step': 17025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:18.134615', 'step': 17025, 'epoch': 2}
{'type': 'loss', 'content': 0.04329498112201691, 'timestamp': '2025-10-02 00:41:18.137805', 'step': 17026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:18.206050', 'step': 17026, 'epoch': 2}
{'type': 'loss', 'content': 0.07072277367115021, 'timestamp': '2025-10-02 00:41:18.210050', 'step': 17027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:18.270066', 'step': 17027, 'epoch': 2}
{'type': 'loss', 'content': 0.025066792964935303, 'timestamp': '2025-10-02 00:41:18.278218', 'step': 17028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:18.359102', 'step': 17028, 'epoch': 2}
{'type': 'loss', 'content': 0.013501139357686043, 'timestamp': '2025-10-02 00:41:18.366769', 'step': 17029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:18.445051', 'step': 17029, 'epoch': 2}
{'type': 'loss', 'content': 0.16194617748260498, 'timestamp': '2025-10-02 00:41:18.448947', 'step': 17030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:41:18.516184', 'step': 17030, 'epoch': 2}
{'type': 'loss', 'content': 0.11646930873394012, 'timestamp': '2025-10-02 00:41:18.520761', 'step': 17031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:18.591065', 'step': 17031, 'epoch': 2}
{'type': 'loss', 'content': 0.027646111324429512, 'timestamp': '2025-10-02 00:41:18.598008', 'step': 17032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:18.668188', 'step': 17032, 'epoch': 2}
{'type': 'loss', 'content': 0.014617961831390858, 'timestamp': '2025-10-02 00:41:18.679114', 'step': 17033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:18.740915', 'step': 17033, 'epoch': 2}
{'type': 'loss', 'content': 0.11041919887065887, 'timestamp': '2025-10-02 00:41:18.745345', 'step': 17034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:18.824249', 'step': 17034, 'epoch': 2}
{'type': 'loss', 'content': 0.0740894079208374, 'timestamp': '2025-10-02 00:41:18.828307', 'step': 17035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:18.884444', 'step': 17035, 'epoch': 2}
{'type': 'loss', 'content': 0.12275893241167068, 'timestamp': '2025-10-02 00:41:18.892237', 'step': 17036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:18.950711', 'step': 17036, 'epoch': 2}
{'type': 'loss', 'content': 0.028183376416563988, 'timestamp': '2025-10-02 00:41:18.955312', 'step': 17037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:19.035427', 'step': 17037, 'epoch': 2}
{'type': 'loss', 'content': 0.10724253952503204, 'timestamp': '2025-10-02 00:41:19.041439', 'step': 17038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:19.100063', 'step': 17038, 'epoch': 2}
{'type': 'loss', 'content': 0.052276611328125, 'timestamp': '2025-10-02 00:41:19.105045', 'step': 17039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:19.165012', 'step': 17039, 'epoch': 2}
{'type': 'loss', 'content': 0.03767631575465202, 'timestamp': '2025-10-02 00:41:19.173442', 'step': 17040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:19.245352', 'step': 17040, 'epoch': 2}
{'type': 'loss', 'content': 0.06766554713249207, 'timestamp': '2025-10-02 00:41:19.256324', 'step': 17041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:19.316615', 'step': 17041, 'epoch': 2}
{'type': 'loss', 'content': 0.0009131905389949679, 'timestamp': '2025-10-02 00:41:19.331682', 'step': 17042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:19.391516', 'step': 17042, 'epoch': 2}
{'type': 'loss', 'content': 0.02756466530263424, 'timestamp': '2025-10-02 00:41:19.395625', 'step': 17043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:19.457830', 'step': 17043, 'epoch': 2}
{'type': 'loss', 'content': 0.03168722614645958, 'timestamp': '2025-10-02 00:41:19.466709', 'step': 17044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:19.534419', 'step': 17044, 'epoch': 2}
{'type': 'loss', 'content': 0.041400715708732605, 'timestamp': '2025-10-02 00:41:19.543983', 'step': 17045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:19.606081', 'step': 17045, 'epoch': 2}
{'type': 'loss', 'content': 0.04955463856458664, 'timestamp': '2025-10-02 00:41:19.609533', 'step': 17046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:19.668777', 'step': 17046, 'epoch': 2}
{'type': 'loss', 'content': 0.056694481521844864, 'timestamp': '2025-10-02 00:41:19.671887', 'step': 17047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:19.740292', 'step': 17047, 'epoch': 2}
{'type': 'loss', 'content': 0.03694968298077583, 'timestamp': '2025-10-02 00:41:19.746733', 'step': 17048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:19.806392', 'step': 17048, 'epoch': 2}
{'type': 'loss', 'content': 0.07098115235567093, 'timestamp': '2025-10-02 00:41:19.811772', 'step': 17049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:19.879313', 'step': 17049, 'epoch': 2}
{'type': 'loss', 'content': 0.10427455604076385, 'timestamp': '2025-10-02 00:41:19.884241', 'step': 17050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:19.984639', 'step': 17050, 'epoch': 2}
{'type': 'loss', 'content': 0.046990975737571716, 'timestamp': '2025-10-02 00:41:19.992384', 'step': 17051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:20.064293', 'step': 17051, 'epoch': 2}
{'type': 'loss', 'content': 0.041602883487939835, 'timestamp': '2025-10-02 00:41:20.072694', 'step': 17052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:20.143192', 'step': 17052, 'epoch': 2}
{'type': 'loss', 'content': 0.031729187816381454, 'timestamp': '2025-10-02 00:41:20.147383', 'step': 17053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:20.216817', 'step': 17053, 'epoch': 2}
{'type': 'loss', 'content': 0.011773377656936646, 'timestamp': '2025-10-02 00:41:20.221288', 'step': 17054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:20.304223', 'step': 17054, 'epoch': 2}
{'type': 'loss', 'content': 0.0036921442952007055, 'timestamp': '2025-10-02 00:41:20.310088', 'step': 17055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:20.368683', 'step': 17055, 'epoch': 2}
{'type': 'loss', 'content': 0.054089900106191635, 'timestamp': '2025-10-02 00:41:20.377074', 'step': 17056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:20.440749', 'step': 17056, 'epoch': 2}
{'type': 'loss', 'content': 0.06442134082317352, 'timestamp': '2025-10-02 00:41:20.456594', 'step': 17057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:20.523390', 'step': 17057, 'epoch': 2}
{'type': 'loss', 'content': 0.07516466081142426, 'timestamp': '2025-10-02 00:41:20.527748', 'step': 17058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:20.599945', 'step': 17058, 'epoch': 2}
{'type': 'loss', 'content': 0.020948374643921852, 'timestamp': '2025-10-02 00:41:20.607490', 'step': 17059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:20.682202', 'step': 17059, 'epoch': 2}
{'type': 'loss', 'content': 0.0484289787709713, 'timestamp': '2025-10-02 00:41:20.698536', 'step': 17060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:20.765135', 'step': 17060, 'epoch': 2}
{'type': 'loss', 'content': 0.08516931533813477, 'timestamp': '2025-10-02 00:41:20.770584', 'step': 17061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:20.861863', 'step': 17061, 'epoch': 2}
{'type': 'loss', 'content': 0.04480218142271042, 'timestamp': '2025-10-02 00:41:20.871245', 'step': 17062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:20.934407', 'step': 17062, 'epoch': 2}
{'type': 'loss', 'content': 0.0369223989546299, 'timestamp': '2025-10-02 00:41:20.950840', 'step': 17063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:21.013323', 'step': 17063, 'epoch': 2}
{'type': 'loss', 'content': 0.06177079305052757, 'timestamp': '2025-10-02 00:41:21.021245', 'step': 17064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:21.082121', 'step': 17064, 'epoch': 2}
{'type': 'loss', 'content': 0.05836380273103714, 'timestamp': '2025-10-02 00:41:21.087544', 'step': 17065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:21.168039', 'step': 17065, 'epoch': 2}
{'type': 'loss', 'content': 0.17313773930072784, 'timestamp': '2025-10-02 00:41:21.172955', 'step': 17066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:21.252756', 'step': 17066, 'epoch': 2}
{'type': 'loss', 'content': 0.01619977317750454, 'timestamp': '2025-10-02 00:41:21.262955', 'step': 17067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:21.322469', 'step': 17067, 'epoch': 2}
{'type': 'loss', 'content': 0.0752127543091774, 'timestamp': '2025-10-02 00:41:21.338665', 'step': 17068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:41:21.412861', 'step': 17068, 'epoch': 2}
{'type': 'loss', 'content': 0.032053276896476746, 'timestamp': '2025-10-02 00:41:21.426395', 'step': 17069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:21.499718', 'step': 17069, 'epoch': 2}
{'type': 'loss', 'content': 0.023572653532028198, 'timestamp': '2025-10-02 00:41:21.505386', 'step': 17070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:21.566331', 'step': 17070, 'epoch': 2}
{'type': 'loss', 'content': 0.15745094418525696, 'timestamp': '2025-10-02 00:41:21.571372', 'step': 17071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:21.632098', 'step': 17071, 'epoch': 2}
{'type': 'loss', 'content': 0.11095849424600601, 'timestamp': '2025-10-02 00:41:21.640766', 'step': 17072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:21.710263', 'step': 17072, 'epoch': 2}
{'type': 'loss', 'content': 0.12816931307315826, 'timestamp': '2025-10-02 00:41:21.715115', 'step': 17073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:21.775253', 'step': 17073, 'epoch': 2}
{'type': 'loss', 'content': 0.06103264540433884, 'timestamp': '2025-10-02 00:41:21.778456', 'step': 17074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:21.849008', 'step': 17074, 'epoch': 2}
{'type': 'loss', 'content': 0.03965717554092407, 'timestamp': '2025-10-02 00:41:21.853683', 'step': 17075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:21.929007', 'step': 17075, 'epoch': 2}
{'type': 'loss', 'content': 0.022080834954977036, 'timestamp': '2025-10-02 00:41:21.940286', 'step': 17076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:22.008332', 'step': 17076, 'epoch': 2}
{'type': 'loss', 'content': 0.02670658752322197, 'timestamp': '2025-10-02 00:41:22.017797', 'step': 17077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:22.077282', 'step': 17077, 'epoch': 2}
{'type': 'loss', 'content': 0.1534406542778015, 'timestamp': '2025-10-02 00:41:22.080280', 'step': 17078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:22.152901', 'step': 17078, 'epoch': 2}
{'type': 'loss', 'content': 0.08397787064313889, 'timestamp': '2025-10-02 00:41:22.156427', 'step': 17079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:22.216088', 'step': 17079, 'epoch': 2}
{'type': 'loss', 'content': 0.08491247892379761, 'timestamp': '2025-10-02 00:41:22.222267', 'step': 17080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:22.288417', 'step': 17080, 'epoch': 2}
{'type': 'loss', 'content': 0.058363523334264755, 'timestamp': '2025-10-02 00:41:22.297649', 'step': 17081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:22.356501', 'step': 17081, 'epoch': 2}
{'type': 'loss', 'content': 0.207918182015419, 'timestamp': '2025-10-02 00:41:22.359936', 'step': 17082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:22.429297', 'step': 17082, 'epoch': 2}
{'type': 'loss', 'content': 0.010198106989264488, 'timestamp': '2025-10-02 00:41:22.439424', 'step': 17083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:22.496503', 'step': 17083, 'epoch': 2}
{'type': 'loss', 'content': 0.09835337102413177, 'timestamp': '2025-10-02 00:41:22.503758', 'step': 17084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:22.571708', 'step': 17084, 'epoch': 2}
{'type': 'loss', 'content': 0.10190405696630478, 'timestamp': '2025-10-02 00:41:22.575889', 'step': 17085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:22.635754', 'step': 17085, 'epoch': 2}
{'type': 'loss', 'content': 0.028707319870591164, 'timestamp': '2025-10-02 00:41:22.639412', 'step': 17086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:22.705264', 'step': 17086, 'epoch': 2}
{'type': 'loss', 'content': 0.05821540206670761, 'timestamp': '2025-10-02 00:41:22.709979', 'step': 17087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:22.768701', 'step': 17087, 'epoch': 2}
{'type': 'loss', 'content': 0.14381876587867737, 'timestamp': '2025-10-02 00:41:22.776735', 'step': 17088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:22.833943', 'step': 17088, 'epoch': 2}
{'type': 'loss', 'content': 0.06541883945465088, 'timestamp': '2025-10-02 00:41:22.836920', 'step': 17089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:22.898329', 'step': 17089, 'epoch': 2}
{'type': 'loss', 'content': 0.07614290714263916, 'timestamp': '2025-10-02 00:41:22.902119', 'step': 17090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:22.973369', 'step': 17090, 'epoch': 2}
{'type': 'loss', 'content': 0.05521897226572037, 'timestamp': '2025-10-02 00:41:22.987058', 'step': 17091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:23.054078', 'step': 17091, 'epoch': 2}
{'type': 'loss', 'content': 0.023851530626416206, 'timestamp': '2025-10-02 00:41:23.061265', 'step': 17092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:23.120717', 'step': 17092, 'epoch': 2}
{'type': 'loss', 'content': 0.1331000030040741, 'timestamp': '2025-10-02 00:41:23.135121', 'step': 17093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:23.214153', 'step': 17093, 'epoch': 2}
{'type': 'loss', 'content': 0.023414164781570435, 'timestamp': '2025-10-02 00:41:23.218507', 'step': 17094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:23.276601', 'step': 17094, 'epoch': 2}
{'type': 'loss', 'content': 0.07307276129722595, 'timestamp': '2025-10-02 00:41:23.280692', 'step': 17095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:23.348395', 'step': 17095, 'epoch': 2}
{'type': 'loss', 'content': 0.07503040134906769, 'timestamp': '2025-10-02 00:41:23.357844', 'step': 17096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:23.418105', 'step': 17096, 'epoch': 2}
{'type': 'loss', 'content': 0.038040198385715485, 'timestamp': '2025-10-02 00:41:23.428141', 'step': 17097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:23.525391', 'step': 17097, 'epoch': 2}
{'type': 'loss', 'content': 0.011476504616439342, 'timestamp': '2025-10-02 00:41:23.539419', 'step': 17098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:23.617895', 'step': 17098, 'epoch': 2}
{'type': 'loss', 'content': 0.05019168183207512, 'timestamp': '2025-10-02 00:41:23.633907', 'step': 17099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:23.731212', 'step': 17099, 'epoch': 2}
{'type': 'loss', 'content': 0.03842423856258392, 'timestamp': '2025-10-02 00:41:23.749368', 'step': 17100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:23.833584', 'step': 17100, 'epoch': 2}
{'type': 'loss', 'content': 0.09723068028688431, 'timestamp': '2025-10-02 00:41:23.850343', 'step': 17101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:41:23.918123', 'step': 17101, 'epoch': 2}
{'type': 'loss', 'content': 0.17059671878814697, 'timestamp': '2025-10-02 00:41:23.920809', 'step': 17102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:23.989247', 'step': 17102, 'epoch': 2}
{'type': 'loss', 'content': 0.01304646022617817, 'timestamp': '2025-10-02 00:41:24.000786', 'step': 17103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:41:24.069289', 'step': 17103, 'epoch': 2}
{'type': 'loss', 'content': 0.0749862790107727, 'timestamp': '2025-10-02 00:41:24.077500', 'step': 17104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:24.134656', 'step': 17104, 'epoch': 2}
{'type': 'loss', 'content': 0.04737547039985657, 'timestamp': '2025-10-02 00:41:24.140555', 'step': 17105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:24.207422', 'step': 17105, 'epoch': 2}
{'type': 'loss', 'content': 0.12304764240980148, 'timestamp': '2025-10-02 00:41:24.221848', 'step': 17106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:24.308685', 'step': 17106, 'epoch': 2}
{'type': 'loss', 'content': 0.12017647176980972, 'timestamp': '2025-10-02 00:41:24.312118', 'step': 17107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:24.404754', 'step': 17107, 'epoch': 2}
{'type': 'loss', 'content': 0.03712410852313042, 'timestamp': '2025-10-02 00:41:24.418552', 'step': 17108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:24.490368', 'step': 17108, 'epoch': 2}
{'type': 'loss', 'content': 0.1850704550743103, 'timestamp': '2025-10-02 00:41:24.494010', 'step': 17109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:24.559514', 'step': 17109, 'epoch': 2}
{'type': 'loss', 'content': 0.05156783387064934, 'timestamp': '2025-10-02 00:41:24.568812', 'step': 17110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:24.631707', 'step': 17110, 'epoch': 2}
{'type': 'loss', 'content': 0.07531752437353134, 'timestamp': '2025-10-02 00:41:24.641424', 'step': 17111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:24.713532', 'step': 17111, 'epoch': 2}
{'type': 'loss', 'content': 0.043081365525722504, 'timestamp': '2025-10-02 00:41:24.720241', 'step': 17112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:24.790617', 'step': 17112, 'epoch': 2}
{'type': 'loss', 'content': 0.0928889811038971, 'timestamp': '2025-10-02 00:41:24.799036', 'step': 17113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:24.867526', 'step': 17113, 'epoch': 2}
{'type': 'loss', 'content': 0.04894360154867172, 'timestamp': '2025-10-02 00:41:24.870436', 'step': 17114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:24.945753', 'step': 17114, 'epoch': 2}
{'type': 'loss', 'content': 0.05867167189717293, 'timestamp': '2025-10-02 00:41:24.949347', 'step': 17115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:25.005798', 'step': 17115, 'epoch': 2}
{'type': 'loss', 'content': 0.07452698051929474, 'timestamp': '2025-10-02 00:41:25.018713', 'step': 17116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:25.087146', 'step': 17116, 'epoch': 2}
{'type': 'loss', 'content': 0.020960956811904907, 'timestamp': '2025-10-02 00:41:25.098460', 'step': 17117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:25.154011', 'step': 17117, 'epoch': 2}
{'type': 'loss', 'content': 0.06422270089387894, 'timestamp': '2025-10-02 00:41:25.161542', 'step': 17118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:25.233361', 'step': 17118, 'epoch': 2}
{'type': 'loss', 'content': 0.0641191303730011, 'timestamp': '2025-10-02 00:41:25.243787', 'step': 17119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:25.308318', 'step': 17119, 'epoch': 2}
{'type': 'loss', 'content': 0.05389977619051933, 'timestamp': '2025-10-02 00:41:25.320138', 'step': 17120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:25.376199', 'step': 17120, 'epoch': 2}
{'type': 'loss', 'content': 0.07498420774936676, 'timestamp': '2025-10-02 00:41:25.379925', 'step': 17121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:25.447636', 'step': 17121, 'epoch': 2}
{'type': 'loss', 'content': 0.03462135046720505, 'timestamp': '2025-10-02 00:41:25.451354', 'step': 17122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:25.517672', 'step': 17122, 'epoch': 2}
{'type': 'loss', 'content': 0.05130636692047119, 'timestamp': '2025-10-02 00:41:25.529560', 'step': 17123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:25.623754', 'step': 17123, 'epoch': 2}
{'type': 'loss', 'content': 0.03973883017897606, 'timestamp': '2025-10-02 00:41:25.630561', 'step': 17124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:25.719473', 'step': 17124, 'epoch': 2}
{'type': 'loss', 'content': 0.040651045739650726, 'timestamp': '2025-10-02 00:41:25.727067', 'step': 17125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:25.783335', 'step': 17125, 'epoch': 2}
{'type': 'loss', 'content': 0.0253895353525877, 'timestamp': '2025-10-02 00:41:25.786896', 'step': 17126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:25.872820', 'step': 17126, 'epoch': 2}
{'type': 'loss', 'content': 0.026052765548229218, 'timestamp': '2025-10-02 00:41:25.875548', 'step': 17127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:25.942681', 'step': 17127, 'epoch': 2}
{'type': 'loss', 'content': 0.045460913330316544, 'timestamp': '2025-10-02 00:41:25.953635', 'step': 17128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:26.016260', 'step': 17128, 'epoch': 2}
{'type': 'loss', 'content': 0.05339771509170532, 'timestamp': '2025-10-02 00:41:26.022210', 'step': 17129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:26.081760', 'step': 17129, 'epoch': 2}
{'type': 'loss', 'content': 0.005248456262052059, 'timestamp': '2025-10-02 00:41:26.084872', 'step': 17130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:41:26.144441', 'step': 17130, 'epoch': 2}
{'type': 'loss', 'content': 0.05640646070241928, 'timestamp': '2025-10-02 00:41:26.147161', 'step': 17131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:41:26.205344', 'step': 17131, 'epoch': 2}
{'type': 'loss', 'content': 0.0404229462146759, 'timestamp': '2025-10-02 00:41:26.212601', 'step': 17132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:26.268331', 'step': 17132, 'epoch': 2}
{'type': 'loss', 'content': 0.1526576280593872, 'timestamp': '2025-10-02 00:41:26.271879', 'step': 17133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:26.338627', 'step': 17133, 'epoch': 2}
{'type': 'loss', 'content': 0.04414334148168564, 'timestamp': '2025-10-02 00:41:26.351088', 'step': 17134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:26.414203', 'step': 17134, 'epoch': 2}
{'type': 'loss', 'content': 0.09687613695859909, 'timestamp': '2025-10-02 00:41:26.417621', 'step': 17135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:26.475792', 'step': 17135, 'epoch': 2}
{'type': 'loss', 'content': 0.015094052068889141, 'timestamp': '2025-10-02 00:41:26.484162', 'step': 17136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:26.558351', 'step': 17136, 'epoch': 2}
{'type': 'loss', 'content': 0.06686899811029434, 'timestamp': '2025-10-02 00:41:26.567836', 'step': 17137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:26.631612', 'step': 17137, 'epoch': 2}
{'type': 'loss', 'content': 0.00181523896753788, 'timestamp': '2025-10-02 00:41:26.639180', 'step': 17138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:41:26.722638', 'step': 17138, 'epoch': 2}
{'type': 'loss', 'content': 0.012885978445410728, 'timestamp': '2025-10-02 00:41:26.735798', 'step': 17139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:26.793150', 'step': 17139, 'epoch': 2}
{'type': 'loss', 'content': 0.06362280994653702, 'timestamp': '2025-10-02 00:41:26.799927', 'step': 17140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:26.856644', 'step': 17140, 'epoch': 2}
{'type': 'loss', 'content': 0.06902416050434113, 'timestamp': '2025-10-02 00:41:26.859817', 'step': 17141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:26.923735', 'step': 17141, 'epoch': 2}
{'type': 'loss', 'content': 0.14034707844257355, 'timestamp': '2025-10-02 00:41:26.927761', 'step': 17142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:27.002327', 'step': 17142, 'epoch': 2}
{'type': 'loss', 'content': 0.07031777501106262, 'timestamp': '2025-10-02 00:41:27.018519', 'step': 17143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:27.076095', 'step': 17143, 'epoch': 2}
{'type': 'loss', 'content': 0.06710542738437653, 'timestamp': '2025-10-02 00:41:27.083066', 'step': 17144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:27.153897', 'step': 17144, 'epoch': 2}
{'type': 'loss', 'content': 0.04822073504328728, 'timestamp': '2025-10-02 00:41:27.158853', 'step': 17145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:27.228285', 'step': 17145, 'epoch': 2}
{'type': 'loss', 'content': 0.024797210469841957, 'timestamp': '2025-10-02 00:41:27.238817', 'step': 17146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:27.304664', 'step': 17146, 'epoch': 2}
{'type': 'loss', 'content': 0.011700720526278019, 'timestamp': '2025-10-02 00:41:27.312735', 'step': 17147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:27.374915', 'step': 17147, 'epoch': 2}
{'type': 'loss', 'content': 0.024513592943549156, 'timestamp': '2025-10-02 00:41:27.382978', 'step': 17148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:27.450623', 'step': 17148, 'epoch': 2}
{'type': 'loss', 'content': 0.04108250513672829, 'timestamp': '2025-10-02 00:41:27.453477', 'step': 17149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:27.517980', 'step': 17149, 'epoch': 2}
{'type': 'loss', 'content': 0.0006710508605465293, 'timestamp': '2025-10-02 00:41:27.528786', 'step': 17150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:27.602790', 'step': 17150, 'epoch': 2}
{'type': 'loss', 'content': 0.009923260658979416, 'timestamp': '2025-10-02 00:41:27.613439', 'step': 17151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:27.704767', 'step': 17151, 'epoch': 2}
{'type': 'loss', 'content': 0.04790796339511871, 'timestamp': '2025-10-02 00:41:27.713375', 'step': 17152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:27.790313', 'step': 17152, 'epoch': 2}
{'type': 'loss', 'content': 0.07977383583784103, 'timestamp': '2025-10-02 00:41:27.803152', 'step': 17153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:27.860288', 'step': 17153, 'epoch': 2}
{'type': 'loss', 'content': 0.03412482887506485, 'timestamp': '2025-10-02 00:41:27.863870', 'step': 17154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:27.927565', 'step': 17154, 'epoch': 2}
{'type': 'loss', 'content': 0.02343437448143959, 'timestamp': '2025-10-02 00:41:27.935051', 'step': 17155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:27.991020', 'step': 17155, 'epoch': 2}
{'type': 'loss', 'content': 0.11653480678796768, 'timestamp': '2025-10-02 00:41:27.997416', 'step': 17156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:28.052603', 'step': 17156, 'epoch': 2}
{'type': 'loss', 'content': 0.057092152535915375, 'timestamp': '2025-10-02 00:41:28.055677', 'step': 17157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:28.118289', 'step': 17157, 'epoch': 2}
{'type': 'loss', 'content': 0.025006834417581558, 'timestamp': '2025-10-02 00:41:28.128964', 'step': 17158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:28.185901', 'step': 17158, 'epoch': 2}
{'type': 'loss', 'content': 0.06010504812002182, 'timestamp': '2025-10-02 00:41:28.195430', 'step': 17159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:41:28.254315', 'step': 17159, 'epoch': 2}
{'type': 'loss', 'content': 0.05746147781610489, 'timestamp': '2025-10-02 00:41:28.263078', 'step': 17160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:28.341664', 'step': 17160, 'epoch': 2}
{'type': 'loss', 'content': 0.016030464321374893, 'timestamp': '2025-10-02 00:41:28.351948', 'step': 17161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:28.417745', 'step': 17161, 'epoch': 2}
{'type': 'loss', 'content': 0.06301441043615341, 'timestamp': '2025-10-02 00:41:28.421804', 'step': 17162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:28.493738', 'step': 17162, 'epoch': 2}
{'type': 'loss', 'content': 0.2209891527891159, 'timestamp': '2025-10-02 00:41:28.497183', 'step': 17163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:28.555210', 'step': 17163, 'epoch': 2}
{'type': 'loss', 'content': 0.04334967955946922, 'timestamp': '2025-10-02 00:41:28.562499', 'step': 17164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:28.619853', 'step': 17164, 'epoch': 2}
{'type': 'loss', 'content': 0.12595513463020325, 'timestamp': '2025-10-02 00:41:28.625713', 'step': 17165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:28.683246', 'step': 17165, 'epoch': 2}
{'type': 'loss', 'content': 0.01747742109000683, 'timestamp': '2025-10-02 00:41:28.692803', 'step': 17166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:28.749505', 'step': 17166, 'epoch': 2}
{'type': 'loss', 'content': 0.007768011651933193, 'timestamp': '2025-10-02 00:41:28.755453', 'step': 17167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:28.813820', 'step': 17167, 'epoch': 2}
{'type': 'loss', 'content': 0.08888451009988785, 'timestamp': '2025-10-02 00:41:28.821138', 'step': 17168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:28.882118', 'step': 17168, 'epoch': 2}
{'type': 'loss', 'content': 0.04194461181759834, 'timestamp': '2025-10-02 00:41:28.885407', 'step': 17169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:28.948018', 'step': 17169, 'epoch': 2}
{'type': 'loss', 'content': 0.05619121342897415, 'timestamp': '2025-10-02 00:41:28.958467', 'step': 17170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:29.014346', 'step': 17170, 'epoch': 2}
{'type': 'loss', 'content': 0.10820110887289047, 'timestamp': '2025-10-02 00:41:29.017861', 'step': 17171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:29.075901', 'step': 17171, 'epoch': 2}
{'type': 'loss', 'content': 0.067290760576725, 'timestamp': '2025-10-02 00:41:29.082641', 'step': 17172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:29.137872', 'step': 17172, 'epoch': 2}
{'type': 'loss', 'content': 0.12532106041908264, 'timestamp': '2025-10-02 00:41:29.141829', 'step': 17173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:29.200779', 'step': 17173, 'epoch': 2}
{'type': 'loss', 'content': 0.06602214276790619, 'timestamp': '2025-10-02 00:41:29.203902', 'step': 17174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:29.260630', 'step': 17174, 'epoch': 2}
{'type': 'loss', 'content': 0.03559177368879318, 'timestamp': '2025-10-02 00:41:29.270481', 'step': 17175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:29.338537', 'step': 17175, 'epoch': 2}
{'type': 'loss', 'content': 0.03610976040363312, 'timestamp': '2025-10-02 00:41:29.351893', 'step': 17176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:29.417426', 'step': 17176, 'epoch': 2}
{'type': 'loss', 'content': 0.06921987980604172, 'timestamp': '2025-10-02 00:41:29.423987', 'step': 17177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:29.490146', 'step': 17177, 'epoch': 2}
{'type': 'loss', 'content': 0.01883731409907341, 'timestamp': '2025-10-02 00:41:29.500368', 'step': 17178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:29.567949', 'step': 17178, 'epoch': 2}
{'type': 'loss', 'content': 0.15489161014556885, 'timestamp': '2025-10-02 00:41:29.573865', 'step': 17179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:29.646208', 'step': 17179, 'epoch': 2}
{'type': 'loss', 'content': 0.03584959730505943, 'timestamp': '2025-10-02 00:41:29.659203', 'step': 17180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:29.718489', 'step': 17180, 'epoch': 2}
{'type': 'loss', 'content': 0.014255069196224213, 'timestamp': '2025-10-02 00:41:29.722907', 'step': 17181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:29.800727', 'step': 17181, 'epoch': 2}
{'type': 'loss', 'content': 0.048210710287094116, 'timestamp': '2025-10-02 00:41:29.811384', 'step': 17182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:29.877985', 'step': 17182, 'epoch': 2}
{'type': 'loss', 'content': 0.0733533725142479, 'timestamp': '2025-10-02 00:41:29.884987', 'step': 17183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:29.953780', 'step': 17183, 'epoch': 2}
{'type': 'loss', 'content': 0.18571555614471436, 'timestamp': '2025-10-02 00:41:29.961038', 'step': 17184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:30.020424', 'step': 17184, 'epoch': 2}
{'type': 'loss', 'content': 0.04580002650618553, 'timestamp': '2025-10-02 00:41:30.026633', 'step': 17185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:30.086227', 'step': 17185, 'epoch': 2}
{'type': 'loss', 'content': 0.10014314949512482, 'timestamp': '2025-10-02 00:41:30.095054', 'step': 17186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:30.170278', 'step': 17186, 'epoch': 2}
{'type': 'loss', 'content': 0.026164764538407326, 'timestamp': '2025-10-02 00:41:30.180777', 'step': 17187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:30.247462', 'step': 17187, 'epoch': 2}
{'type': 'loss', 'content': 0.15767639875411987, 'timestamp': '2025-10-02 00:41:30.255373', 'step': 17188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:30.326655', 'step': 17188, 'epoch': 2}
{'type': 'loss', 'content': 0.07113947719335556, 'timestamp': '2025-10-02 00:41:30.329649', 'step': 17189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:30.387790', 'step': 17189, 'epoch': 2}
{'type': 'loss', 'content': 0.12496764957904816, 'timestamp': '2025-10-02 00:41:30.391516', 'step': 17190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:30.456846', 'step': 17190, 'epoch': 2}
{'type': 'loss', 'content': 0.07986988872289658, 'timestamp': '2025-10-02 00:41:30.460651', 'step': 17191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:30.528677', 'step': 17191, 'epoch': 2}
{'type': 'loss', 'content': 0.01605479046702385, 'timestamp': '2025-10-02 00:41:30.540597', 'step': 17192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:30.616277', 'step': 17192, 'epoch': 2}
{'type': 'loss', 'content': 0.07079026103019714, 'timestamp': '2025-10-02 00:41:30.623833', 'step': 17193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:41:30.680735', 'step': 17193, 'epoch': 2}
{'type': 'loss', 'content': 0.14104698598384857, 'timestamp': '2025-10-02 00:41:30.690502', 'step': 17194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:30.760643', 'step': 17194, 'epoch': 2}
{'type': 'loss', 'content': 0.19494666159152985, 'timestamp': '2025-10-02 00:41:30.763790', 'step': 17195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:30.830830', 'step': 17195, 'epoch': 2}
{'type': 'loss', 'content': 0.09007825702428818, 'timestamp': '2025-10-02 00:41:30.837773', 'step': 17196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:30.899399', 'step': 17196, 'epoch': 2}
{'type': 'loss', 'content': 0.04548069089651108, 'timestamp': '2025-10-02 00:41:30.908111', 'step': 17197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:30.970134', 'step': 17197, 'epoch': 2}
{'type': 'loss', 'content': 0.011927218176424503, 'timestamp': '2025-10-02 00:41:30.977277', 'step': 17198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:31.035042', 'step': 17198, 'epoch': 2}
{'type': 'loss', 'content': 0.028159642592072487, 'timestamp': '2025-10-02 00:41:31.044107', 'step': 17199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:31.118268', 'step': 17199, 'epoch': 2}
{'type': 'loss', 'content': 0.02379566617310047, 'timestamp': '2025-10-02 00:41:31.129696', 'step': 17200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:41:31.197605', 'step': 17200, 'epoch': 2}
{'type': 'loss', 'content': 0.006970548536628485, 'timestamp': '2025-10-02 00:41:31.209401', 'step': 17201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:31.280416', 'step': 17201, 'epoch': 2}
{'type': 'loss', 'content': 0.05297412723302841, 'timestamp': '2025-10-02 00:41:31.288465', 'step': 17202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:31.364357', 'step': 17202, 'epoch': 2}
{'type': 'loss', 'content': 0.05708427354693413, 'timestamp': '2025-10-02 00:41:31.368198', 'step': 17203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:31.457209', 'step': 17203, 'epoch': 2}
{'type': 'loss', 'content': 0.11151773482561111, 'timestamp': '2025-10-02 00:41:31.463950', 'step': 17204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:31.540230', 'step': 17204, 'epoch': 2}
{'type': 'loss', 'content': 0.038200780749320984, 'timestamp': '2025-10-02 00:41:31.543030', 'step': 17205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:31.599901', 'step': 17205, 'epoch': 2}
{'type': 'loss', 'content': 0.11719133704900742, 'timestamp': '2025-10-02 00:41:31.603237', 'step': 17206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:31.664252', 'step': 17206, 'epoch': 2}
{'type': 'loss', 'content': 0.08638712763786316, 'timestamp': '2025-10-02 00:41:31.667921', 'step': 17207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:31.733306', 'step': 17207, 'epoch': 2}
{'type': 'loss', 'content': 0.05740668252110481, 'timestamp': '2025-10-02 00:41:31.739686', 'step': 17208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:41:31.802067', 'step': 17208, 'epoch': 2}
{'type': 'loss', 'content': 0.03490990772843361, 'timestamp': '2025-10-02 00:41:31.813845', 'step': 17209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:31.870527', 'step': 17209, 'epoch': 2}
{'type': 'loss', 'content': 0.03141161799430847, 'timestamp': '2025-10-02 00:41:31.879988', 'step': 17210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:31.935122', 'step': 17210, 'epoch': 2}
{'type': 'loss', 'content': 0.15259797871112823, 'timestamp': '2025-10-02 00:41:31.937874', 'step': 17211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:31.993457', 'step': 17211, 'epoch': 2}
{'type': 'loss', 'content': 0.010940871201455593, 'timestamp': '2025-10-02 00:41:32.003483', 'step': 17212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:32.057739', 'step': 17212, 'epoch': 2}
{'type': 'loss', 'content': 0.07319493591785431, 'timestamp': '2025-10-02 00:41:32.060509', 'step': 17213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:32.115228', 'step': 17213, 'epoch': 2}
{'type': 'loss', 'content': 0.09006627649068832, 'timestamp': '2025-10-02 00:41:32.118526', 'step': 17214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:32.174056', 'step': 17214, 'epoch': 2}
{'type': 'loss', 'content': 0.06831961870193481, 'timestamp': '2025-10-02 00:41:32.183564', 'step': 17215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:32.238725', 'step': 17215, 'epoch': 2}
{'type': 'loss', 'content': 0.06764332205057144, 'timestamp': '2025-10-02 00:41:32.246946', 'step': 17216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:32.301754', 'step': 17216, 'epoch': 2}
{'type': 'loss', 'content': 0.1336439996957779, 'timestamp': '2025-10-02 00:41:32.315555', 'step': 17217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:32.376956', 'step': 17217, 'epoch': 2}
{'type': 'loss', 'content': 0.03727347403764725, 'timestamp': '2025-10-02 00:41:32.387408', 'step': 17218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:32.442763', 'step': 17218, 'epoch': 2}
{'type': 'loss', 'content': 0.03427095338702202, 'timestamp': '2025-10-02 00:41:32.449970', 'step': 17219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:32.505398', 'step': 17219, 'epoch': 2}
{'type': 'loss', 'content': 0.15611335635185242, 'timestamp': '2025-10-02 00:41:32.512324', 'step': 17220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:32.572804', 'step': 17220, 'epoch': 2}
{'type': 'loss', 'content': 0.0957774668931961, 'timestamp': '2025-10-02 00:41:32.584396', 'step': 17221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:32.643464', 'step': 17221, 'epoch': 2}
{'type': 'loss', 'content': 0.061304859817028046, 'timestamp': '2025-10-02 00:41:32.649095', 'step': 17222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:41:32.733504', 'step': 17222, 'epoch': 2}
{'type': 'loss', 'content': 0.0009985103970393538, 'timestamp': '2025-10-02 00:41:32.746652', 'step': 17223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:32.827005', 'step': 17223, 'epoch': 2}
{'type': 'loss', 'content': 0.009846149943768978, 'timestamp': '2025-10-02 00:41:32.836716', 'step': 17224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:32.891308', 'step': 17224, 'epoch': 2}
{'type': 'loss', 'content': 0.1360825151205063, 'timestamp': '2025-10-02 00:41:32.893814', 'step': 17225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:32.955752', 'step': 17225, 'epoch': 2}
{'type': 'loss', 'content': 0.014584806747734547, 'timestamp': '2025-10-02 00:41:32.966392', 'step': 17226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:33.022148', 'step': 17226, 'epoch': 2}
{'type': 'loss', 'content': 0.08397429436445236, 'timestamp': '2025-10-02 00:41:33.027288', 'step': 17227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:33.082680', 'step': 17227, 'epoch': 2}
{'type': 'loss', 'content': 0.06298486888408661, 'timestamp': '2025-10-02 00:41:33.089715', 'step': 17228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:33.144621', 'step': 17228, 'epoch': 2}
{'type': 'loss', 'content': 0.060476794838905334, 'timestamp': '2025-10-02 00:41:33.147351', 'step': 17229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:33.203122', 'step': 17229, 'epoch': 2}
{'type': 'loss', 'content': 0.013457408174872398, 'timestamp': '2025-10-02 00:41:33.212584', 'step': 17230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:33.268440', 'step': 17230, 'epoch': 2}
{'type': 'loss', 'content': 0.08016858994960785, 'timestamp': '2025-10-02 00:41:33.273441', 'step': 17231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:33.329369', 'step': 17231, 'epoch': 2}
{'type': 'loss', 'content': 0.041204072535037994, 'timestamp': '2025-10-02 00:41:33.335560', 'step': 17232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:33.396425', 'step': 17232, 'epoch': 2}
{'type': 'loss', 'content': 0.016164854168891907, 'timestamp': '2025-10-02 00:41:33.407762', 'step': 17233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:33.469158', 'step': 17233, 'epoch': 2}
{'type': 'loss', 'content': 0.03304596617817879, 'timestamp': '2025-10-02 00:41:33.479600', 'step': 17234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:41:33.535296', 'step': 17234, 'epoch': 2}
{'type': 'loss', 'content': 0.1517646610736847, 'timestamp': '2025-10-02 00:41:33.537658', 'step': 17235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:33.592677', 'step': 17235, 'epoch': 2}
{'type': 'loss', 'content': 0.05144956707954407, 'timestamp': '2025-10-02 00:41:33.599246', 'step': 17236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:41:33.653227', 'step': 17236, 'epoch': 2}
{'type': 'loss', 'content': 0.14609001576900482, 'timestamp': '2025-10-02 00:41:33.669744', 'step': 17237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:33.735131', 'step': 17237, 'epoch': 2}
{'type': 'loss', 'content': 0.02993166446685791, 'timestamp': '2025-10-02 00:41:33.738570', 'step': 17238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:41:33.809518', 'step': 17238, 'epoch': 2}
{'type': 'loss', 'content': 0.03277776017785072, 'timestamp': '2025-10-02 00:41:33.821810', 'step': 17239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:33.880704', 'step': 17239, 'epoch': 2}
{'type': 'loss', 'content': 0.08892656862735748, 'timestamp': '2025-10-02 00:41:33.888104', 'step': 17240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:33.950512', 'step': 17240, 'epoch': 2}
{'type': 'loss', 'content': 0.09528820961713791, 'timestamp': '2025-10-02 00:41:33.959942', 'step': 17241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:34.034907', 'step': 17241, 'epoch': 2}
{'type': 'loss', 'content': 0.050667110830545425, 'timestamp': '2025-10-02 00:41:34.038472', 'step': 17242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:34.102511', 'step': 17242, 'epoch': 2}
{'type': 'loss', 'content': 0.06988540291786194, 'timestamp': '2025-10-02 00:41:34.112947', 'step': 17243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:34.169718', 'step': 17243, 'epoch': 2}
{'type': 'loss', 'content': 0.039763566106557846, 'timestamp': '2025-10-02 00:41:34.180892', 'step': 17244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:34.236232', 'step': 17244, 'epoch': 2}
{'type': 'loss', 'content': 0.10579312592744827, 'timestamp': '2025-10-02 00:41:34.239512', 'step': 17245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:34.295783', 'step': 17245, 'epoch': 2}
{'type': 'loss', 'content': 0.0784171000123024, 'timestamp': '2025-10-02 00:41:34.298744', 'step': 17246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:34.360165', 'step': 17246, 'epoch': 2}
{'type': 'loss', 'content': 0.07513721287250519, 'timestamp': '2025-10-02 00:41:34.363410', 'step': 17247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:34.420698', 'step': 17247, 'epoch': 2}
{'type': 'loss', 'content': 0.005800341721624136, 'timestamp': '2025-10-02 00:41:34.427449', 'step': 17248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:34.488073', 'step': 17248, 'epoch': 2}
{'type': 'loss', 'content': 0.028420768678188324, 'timestamp': '2025-10-02 00:41:34.490913', 'step': 17249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:34.547534', 'step': 17249, 'epoch': 2}
{'type': 'loss', 'content': 0.08609616011381149, 'timestamp': '2025-10-02 00:41:34.550618', 'step': 17250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:34.610033', 'step': 17250, 'epoch': 2}
{'type': 'loss', 'content': 0.0544147752225399, 'timestamp': '2025-10-02 00:41:34.612571', 'step': 17251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:34.669418', 'step': 17251, 'epoch': 2}
{'type': 'loss', 'content': 0.10319164395332336, 'timestamp': '2025-10-02 00:41:34.676274', 'step': 17252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:34.733628', 'step': 17252, 'epoch': 2}
{'type': 'loss', 'content': 0.11741209030151367, 'timestamp': '2025-10-02 00:41:34.737610', 'step': 17253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:34.795244', 'step': 17253, 'epoch': 2}
{'type': 'loss', 'content': 0.03645169362425804, 'timestamp': '2025-10-02 00:41:34.804725', 'step': 17254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:41:34.860627', 'step': 17254, 'epoch': 2}
{'type': 'loss', 'content': 0.13207586109638214, 'timestamp': '2025-10-02 00:41:34.864469', 'step': 17255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:34.922646', 'step': 17255, 'epoch': 2}
{'type': 'loss', 'content': 0.07540755718946457, 'timestamp': '2025-10-02 00:41:34.929230', 'step': 17256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:34.989626', 'step': 17256, 'epoch': 2}
{'type': 'loss', 'content': 0.008666746318340302, 'timestamp': '2025-10-02 00:41:35.000609', 'step': 17257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:35.058549', 'step': 17257, 'epoch': 2}
{'type': 'loss', 'content': 0.033276207745075226, 'timestamp': '2025-10-02 00:41:35.063630', 'step': 17258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:35.125429', 'step': 17258, 'epoch': 2}
{'type': 'loss', 'content': 0.0407104566693306, 'timestamp': '2025-10-02 00:41:35.135662', 'step': 17259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:35.209649', 'step': 17259, 'epoch': 2}
{'type': 'loss', 'content': 0.07669413089752197, 'timestamp': '2025-10-02 00:41:35.221398', 'step': 17260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:35.295499', 'step': 17260, 'epoch': 2}
{'type': 'loss', 'content': 0.06545069068670273, 'timestamp': '2025-10-02 00:41:35.300924', 'step': 17261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:35.376516', 'step': 17261, 'epoch': 2}
{'type': 'loss', 'content': 0.0033100303262472153, 'timestamp': '2025-10-02 00:41:35.386672', 'step': 17262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:35.460469', 'step': 17262, 'epoch': 2}
{'type': 'loss', 'content': 0.07122241705656052, 'timestamp': '2025-10-02 00:41:35.465955', 'step': 17263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:35.553642', 'step': 17263, 'epoch': 2}
{'type': 'loss', 'content': 0.05093027651309967, 'timestamp': '2025-10-02 00:41:35.569744', 'step': 17264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:35.629610', 'step': 17264, 'epoch': 2}
{'type': 'loss', 'content': 0.047816187143325806, 'timestamp': '2025-10-02 00:41:35.632277', 'step': 17265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:35.702576', 'step': 17265, 'epoch': 2}
{'type': 'loss', 'content': 0.040753837674856186, 'timestamp': '2025-10-02 00:41:35.712743', 'step': 17266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:35.778879', 'step': 17266, 'epoch': 2}
{'type': 'loss', 'content': 0.043140705674886703, 'timestamp': '2025-10-02 00:41:35.789575', 'step': 17267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:35.870042', 'step': 17267, 'epoch': 2}
{'type': 'loss', 'content': 0.10479243099689484, 'timestamp': '2025-10-02 00:41:35.877502', 'step': 17268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:35.945979', 'step': 17268, 'epoch': 2}
{'type': 'loss', 'content': 0.06311499327421188, 'timestamp': '2025-10-02 00:41:35.960209', 'step': 17269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:36.034662', 'step': 17269, 'epoch': 2}
{'type': 'loss', 'content': 0.1338547170162201, 'timestamp': '2025-10-02 00:41:36.040888', 'step': 17270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:36.121684', 'step': 17270, 'epoch': 2}
{'type': 'loss', 'content': 0.0239737406373024, 'timestamp': '2025-10-02 00:41:36.131884', 'step': 17271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:36.190474', 'step': 17271, 'epoch': 2}
{'type': 'loss', 'content': 0.053368035703897476, 'timestamp': '2025-10-02 00:41:36.198290', 'step': 17272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:36.265277', 'step': 17272, 'epoch': 2}
{'type': 'loss', 'content': 0.05246404930949211, 'timestamp': '2025-10-02 00:41:36.269705', 'step': 17273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:36.355102', 'step': 17273, 'epoch': 2}
{'type': 'loss', 'content': 0.07908192276954651, 'timestamp': '2025-10-02 00:41:36.364354', 'step': 17274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:36.438838', 'step': 17274, 'epoch': 2}
{'type': 'loss', 'content': 0.04785094037652016, 'timestamp': '2025-10-02 00:41:36.447764', 'step': 17275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:36.515158', 'step': 17275, 'epoch': 2}
{'type': 'loss', 'content': 0.009192602708935738, 'timestamp': '2025-10-02 00:41:36.528103', 'step': 17276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:36.600417', 'step': 17276, 'epoch': 2}
{'type': 'loss', 'content': 0.08811145275831223, 'timestamp': '2025-10-02 00:41:36.609909', 'step': 17277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:36.679057', 'step': 17277, 'epoch': 2}
{'type': 'loss', 'content': 0.007032608613371849, 'timestamp': '2025-10-02 00:41:36.687281', 'step': 17278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:36.757488', 'step': 17278, 'epoch': 2}
{'type': 'loss', 'content': 0.11483874171972275, 'timestamp': '2025-10-02 00:41:36.761175', 'step': 17279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:36.820848', 'step': 17279, 'epoch': 2}
{'type': 'loss', 'content': 0.05438407137989998, 'timestamp': '2025-10-02 00:41:36.831067', 'step': 17280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:36.891901', 'step': 17280, 'epoch': 2}
{'type': 'loss', 'content': 0.03857024013996124, 'timestamp': '2025-10-02 00:41:36.903747', 'step': 17281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:36.975633', 'step': 17281, 'epoch': 2}
{'type': 'loss', 'content': 0.1081031784415245, 'timestamp': '2025-10-02 00:41:36.984679', 'step': 17282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:37.051699', 'step': 17282, 'epoch': 2}
{'type': 'loss', 'content': 0.09353082627058029, 'timestamp': '2025-10-02 00:41:37.055099', 'step': 17283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:37.117016', 'step': 17283, 'epoch': 2}
{'type': 'loss', 'content': 0.07144094258546829, 'timestamp': '2025-10-02 00:41:37.126719', 'step': 17284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:37.199343', 'step': 17284, 'epoch': 2}
{'type': 'loss', 'content': 0.14106765389442444, 'timestamp': '2025-10-02 00:41:37.205742', 'step': 17285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:37.274993', 'step': 17285, 'epoch': 2}
{'type': 'loss', 'content': 0.03490053862333298, 'timestamp': '2025-10-02 00:41:37.282894', 'step': 17286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:37.354692', 'step': 17286, 'epoch': 2}
{'type': 'loss', 'content': 0.26695680618286133, 'timestamp': '2025-10-02 00:41:37.362382', 'step': 17287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:37.426140', 'step': 17287, 'epoch': 2}
{'type': 'loss', 'content': 0.04462622478604317, 'timestamp': '2025-10-02 00:41:37.435438', 'step': 17288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:37.509618', 'step': 17288, 'epoch': 2}
{'type': 'loss', 'content': 0.07076027989387512, 'timestamp': '2025-10-02 00:41:37.520870', 'step': 17289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:37.598883', 'step': 17289, 'epoch': 2}
{'type': 'loss', 'content': 0.01754024811089039, 'timestamp': '2025-10-02 00:41:37.609533', 'step': 17290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:37.687391', 'step': 17290, 'epoch': 2}
{'type': 'loss', 'content': 0.020017575472593307, 'timestamp': '2025-10-02 00:41:37.698032', 'step': 17291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:37.764600', 'step': 17291, 'epoch': 2}
{'type': 'loss', 'content': 0.04298540577292442, 'timestamp': '2025-10-02 00:41:37.777648', 'step': 17292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:37.851256', 'step': 17292, 'epoch': 2}
{'type': 'loss', 'content': 0.035273369401693344, 'timestamp': '2025-10-02 00:41:37.859654', 'step': 17293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:37.930070', 'step': 17293, 'epoch': 2}
{'type': 'loss', 'content': 0.06071829795837402, 'timestamp': '2025-10-02 00:41:37.945795', 'step': 17294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:38.019363', 'step': 17294, 'epoch': 2}
{'type': 'loss', 'content': 0.05760670453310013, 'timestamp': '2025-10-02 00:41:38.026749', 'step': 17295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:38.098689', 'step': 17295, 'epoch': 2}
{'type': 'loss', 'content': 0.06169495731592178, 'timestamp': '2025-10-02 00:41:38.109959', 'step': 17296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:38.169893', 'step': 17296, 'epoch': 2}
{'type': 'loss', 'content': 0.06130408123135567, 'timestamp': '2025-10-02 00:41:38.172499', 'step': 17297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:38.235908', 'step': 17297, 'epoch': 2}
{'type': 'loss', 'content': 0.02279006317257881, 'timestamp': '2025-10-02 00:41:38.242884', 'step': 17298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:38.309369', 'step': 17298, 'epoch': 2}
{'type': 'loss', 'content': 0.05283549427986145, 'timestamp': '2025-10-02 00:41:38.312633', 'step': 17299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:38.374574', 'step': 17299, 'epoch': 2}
{'type': 'loss', 'content': 0.2235211879014969, 'timestamp': '2025-10-02 00:41:38.381180', 'step': 17300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:38.438722', 'step': 17300, 'epoch': 2}
{'type': 'loss', 'content': 0.014647954143583775, 'timestamp': '2025-10-02 00:41:38.448624', 'step': 17301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:38.509399', 'step': 17301, 'epoch': 2}
{'type': 'loss', 'content': 0.01949009858071804, 'timestamp': '2025-10-02 00:41:38.512818', 'step': 17302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:38.570842', 'step': 17302, 'epoch': 2}
{'type': 'loss', 'content': 0.038057368248701096, 'timestamp': '2025-10-02 00:41:38.581066', 'step': 17303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:38.643318', 'step': 17303, 'epoch': 2}
{'type': 'loss', 'content': 0.16270117461681366, 'timestamp': '2025-10-02 00:41:38.650187', 'step': 17304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:38.717959', 'step': 17304, 'epoch': 2}
{'type': 'loss', 'content': 0.04921170696616173, 'timestamp': '2025-10-02 00:41:38.726851', 'step': 17305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:38.783638', 'step': 17305, 'epoch': 2}
{'type': 'loss', 'content': 0.06916901469230652, 'timestamp': '2025-10-02 00:41:38.791110', 'step': 17306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:38.853755', 'step': 17306, 'epoch': 2}
{'type': 'loss', 'content': 0.11854276061058044, 'timestamp': '2025-10-02 00:41:38.858957', 'step': 17307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:38.931904', 'step': 17307, 'epoch': 2}
{'type': 'loss', 'content': 0.06328586488962173, 'timestamp': '2025-10-02 00:41:38.938740', 'step': 17308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:38.995780', 'step': 17308, 'epoch': 2}
{'type': 'loss', 'content': 0.04279880225658417, 'timestamp': '2025-10-02 00:41:39.000986', 'step': 17309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:39.070520', 'step': 17309, 'epoch': 2}
{'type': 'loss', 'content': 0.03931459039449692, 'timestamp': '2025-10-02 00:41:39.078294', 'step': 17310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:41:39.142892', 'step': 17310, 'epoch': 2}
{'type': 'loss', 'content': 0.010749728418886662, 'timestamp': '2025-10-02 00:41:39.153134', 'step': 17311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:39.219141', 'step': 17311, 'epoch': 2}
{'type': 'loss', 'content': 0.06689885258674622, 'timestamp': '2025-10-02 00:41:39.230396', 'step': 17312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:39.300368', 'step': 17312, 'epoch': 2}
{'type': 'loss', 'content': 0.1594923585653305, 'timestamp': '2025-10-02 00:41:39.307561', 'step': 17313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:39.370402', 'step': 17313, 'epoch': 2}
{'type': 'loss', 'content': 0.060350894927978516, 'timestamp': '2025-10-02 00:41:39.376899', 'step': 17314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:39.441527', 'step': 17314, 'epoch': 2}
{'type': 'loss', 'content': 0.054855894297361374, 'timestamp': '2025-10-02 00:41:39.444925', 'step': 17315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:39.506619', 'step': 17315, 'epoch': 2}
{'type': 'loss', 'content': 0.07999864965677261, 'timestamp': '2025-10-02 00:41:39.513924', 'step': 17316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:39.577912', 'step': 17316, 'epoch': 2}
{'type': 'loss', 'content': 0.01216590404510498, 'timestamp': '2025-10-02 00:41:39.587818', 'step': 17317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:39.658009', 'step': 17317, 'epoch': 2}
{'type': 'loss', 'content': 0.03636262193322182, 'timestamp': '2025-10-02 00:41:39.668461', 'step': 17318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:39.739467', 'step': 17318, 'epoch': 2}
{'type': 'loss', 'content': 0.030480902642011642, 'timestamp': '2025-10-02 00:41:39.750071', 'step': 17319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:39.811222', 'step': 17319, 'epoch': 2}
{'type': 'loss', 'content': 0.03571844846010208, 'timestamp': '2025-10-02 00:41:39.819159', 'step': 17320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:39.877677', 'step': 17320, 'epoch': 2}
{'type': 'loss', 'content': 0.014046739786863327, 'timestamp': '2025-10-02 00:41:39.880677', 'step': 17321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:39.939733', 'step': 17321, 'epoch': 2}
{'type': 'loss', 'content': 0.10143345594406128, 'timestamp': '2025-10-02 00:41:39.942694', 'step': 17322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:40.003724', 'step': 17322, 'epoch': 2}
{'type': 'loss', 'content': 0.013724179938435555, 'timestamp': '2025-10-02 00:41:40.007234', 'step': 17323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:40.071795', 'step': 17323, 'epoch': 2}
{'type': 'loss', 'content': 0.030823294073343277, 'timestamp': '2025-10-02 00:41:40.083214', 'step': 17324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:40.138860', 'step': 17324, 'epoch': 2}
{'type': 'loss', 'content': 0.02423892542719841, 'timestamp': '2025-10-02 00:41:40.142367', 'step': 17325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:40.202155', 'step': 17325, 'epoch': 2}
{'type': 'loss', 'content': 0.030939318239688873, 'timestamp': '2025-10-02 00:41:40.205460', 'step': 17326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:40.269108', 'step': 17326, 'epoch': 2}
{'type': 'loss', 'content': 0.12489081174135208, 'timestamp': '2025-10-02 00:41:40.271922', 'step': 17327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:40.335375', 'step': 17327, 'epoch': 2}
{'type': 'loss', 'content': 0.04894271492958069, 'timestamp': '2025-10-02 00:41:40.346781', 'step': 17328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:40.406086', 'step': 17328, 'epoch': 2}
{'type': 'loss', 'content': 0.09650121629238129, 'timestamp': '2025-10-02 00:41:40.409859', 'step': 17329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:40.468204', 'step': 17329, 'epoch': 2}
{'type': 'loss', 'content': 0.088755764067173, 'timestamp': '2025-10-02 00:41:40.471453', 'step': 17330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:40.537156', 'step': 17330, 'epoch': 2}
{'type': 'loss', 'content': 0.14362482726573944, 'timestamp': '2025-10-02 00:41:40.540374', 'step': 17331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:40.605962', 'step': 17331, 'epoch': 2}
{'type': 'loss', 'content': 0.023239189758896828, 'timestamp': '2025-10-02 00:41:40.618300', 'step': 17332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:40.698246', 'step': 17332, 'epoch': 2}
{'type': 'loss', 'content': 0.14572001993656158, 'timestamp': '2025-10-02 00:41:40.701153', 'step': 17333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:40.763652', 'step': 17333, 'epoch': 2}
{'type': 'loss', 'content': 0.12586914002895355, 'timestamp': '2025-10-02 00:41:40.775415', 'step': 17334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:41:40.842034', 'step': 17334, 'epoch': 2}
{'type': 'loss', 'content': 0.044307511299848557, 'timestamp': '2025-10-02 00:41:40.850989', 'step': 17335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:40.910609', 'step': 17335, 'epoch': 2}
{'type': 'loss', 'content': 0.06769336760044098, 'timestamp': '2025-10-02 00:41:40.918393', 'step': 17336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:40.976994', 'step': 17336, 'epoch': 2}
{'type': 'loss', 'content': 0.11441203206777573, 'timestamp': '2025-10-02 00:41:40.980285', 'step': 17337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:41.037765', 'step': 17337, 'epoch': 2}
{'type': 'loss', 'content': 0.0719725638628006, 'timestamp': '2025-10-02 00:41:41.042914', 'step': 17338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:41.102115', 'step': 17338, 'epoch': 2}
{'type': 'loss', 'content': 0.1318504810333252, 'timestamp': '2025-10-02 00:41:41.105535', 'step': 17339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:41.164179', 'step': 17339, 'epoch': 2}
{'type': 'loss', 'content': 0.043937064707279205, 'timestamp': '2025-10-02 00:41:41.172171', 'step': 17340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:41:41.231925', 'step': 17340, 'epoch': 2}
{'type': 'loss', 'content': 0.10462239384651184, 'timestamp': '2025-10-02 00:41:41.242937', 'step': 17341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:41:41.322358', 'step': 17341, 'epoch': 2}
{'type': 'loss', 'content': 0.02828463912010193, 'timestamp': '2025-10-02 00:41:41.333039', 'step': 17342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:41.403346', 'step': 17342, 'epoch': 2}
{'type': 'loss', 'content': 0.03792595863342285, 'timestamp': '2025-10-02 00:41:41.413808', 'step': 17343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:41.473412', 'step': 17343, 'epoch': 2}
{'type': 'loss', 'content': 0.060145508497953415, 'timestamp': '2025-10-02 00:41:41.484270', 'step': 17344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:41.548219', 'step': 17344, 'epoch': 2}
{'type': 'loss', 'content': 0.15746432542800903, 'timestamp': '2025-10-02 00:41:41.551396', 'step': 17345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:41.621330', 'step': 17345, 'epoch': 2}
{'type': 'loss', 'content': 0.07667728513479233, 'timestamp': '2025-10-02 00:41:41.624862', 'step': 17346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:41.691018', 'step': 17346, 'epoch': 2}
{'type': 'loss', 'content': 0.03855997696518898, 'timestamp': '2025-10-02 00:41:41.701533', 'step': 17347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:41.763614', 'step': 17347, 'epoch': 2}
{'type': 'loss', 'content': 0.15358133614063263, 'timestamp': '2025-10-02 00:41:41.772177', 'step': 17348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:41.834330', 'step': 17348, 'epoch': 2}
{'type': 'loss', 'content': 0.05012845620512962, 'timestamp': '2025-10-02 00:41:41.844544', 'step': 17349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:41.903701', 'step': 17349, 'epoch': 2}
{'type': 'loss', 'content': 0.06429664045572281, 'timestamp': '2025-10-02 00:41:41.906879', 'step': 17350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:41.964288', 'step': 17350, 'epoch': 2}
{'type': 'loss', 'content': 0.1071239560842514, 'timestamp': '2025-10-02 00:41:41.969751', 'step': 17351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:42.029657', 'step': 17351, 'epoch': 2}
{'type': 'loss', 'content': 0.09346511960029602, 'timestamp': '2025-10-02 00:41:42.036418', 'step': 17352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:42.091668', 'step': 17352, 'epoch': 2}
{'type': 'loss', 'content': 0.039872851222753525, 'timestamp': '2025-10-02 00:41:42.098553', 'step': 17353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:42.157906', 'step': 17353, 'epoch': 2}
{'type': 'loss', 'content': 0.06453312188386917, 'timestamp': '2025-10-02 00:41:42.162354', 'step': 17354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:42.218818', 'step': 17354, 'epoch': 2}
{'type': 'loss', 'content': 0.058354176580905914, 'timestamp': '2025-10-02 00:41:42.221618', 'step': 17355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:41:42.284511', 'step': 17355, 'epoch': 2}
{'type': 'loss', 'content': 0.052848685532808304, 'timestamp': '2025-10-02 00:41:42.291328', 'step': 17356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:41:42.360173', 'step': 17356, 'epoch': 2}
{'type': 'loss', 'content': 0.1408582180738449, 'timestamp': '2025-10-02 00:41:42.363682', 'step': 17357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:42.441160', 'step': 17357, 'epoch': 2}
{'type': 'loss', 'content': 0.08430083841085434, 'timestamp': '2025-10-02 00:41:42.450692', 'step': 17358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:42.509107', 'step': 17358, 'epoch': 2}
{'type': 'loss', 'content': 0.12744611501693726, 'timestamp': '2025-10-02 00:41:42.512567', 'step': 17359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:42.573107', 'step': 17359, 'epoch': 2}
{'type': 'loss', 'content': 0.10825235396623611, 'timestamp': '2025-10-02 00:41:42.579724', 'step': 17360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:42.649430', 'step': 17360, 'epoch': 2}
{'type': 'loss', 'content': 0.03802311792969704, 'timestamp': '2025-10-02 00:41:42.660710', 'step': 17361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:42.726834', 'step': 17361, 'epoch': 2}
{'type': 'loss', 'content': 0.096215158700943, 'timestamp': '2025-10-02 00:41:42.732980', 'step': 17362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:42.793674', 'step': 17362, 'epoch': 2}
{'type': 'loss', 'content': 0.038449861109256744, 'timestamp': '2025-10-02 00:41:42.801697', 'step': 17363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:42.865188', 'step': 17363, 'epoch': 2}
{'type': 'loss', 'content': 0.012026209384202957, 'timestamp': '2025-10-02 00:41:42.876965', 'step': 17364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:42.944711', 'step': 17364, 'epoch': 2}
{'type': 'loss', 'content': 0.06559823453426361, 'timestamp': '2025-10-02 00:41:42.953630', 'step': 17365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:43.028881', 'step': 17365, 'epoch': 2}
{'type': 'loss', 'content': 0.05038285627961159, 'timestamp': '2025-10-02 00:41:43.034369', 'step': 17366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:43.092015', 'step': 17366, 'epoch': 2}
{'type': 'loss', 'content': 0.055924348533153534, 'timestamp': '2025-10-02 00:41:43.098803', 'step': 17367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:41:43.158004', 'step': 17367, 'epoch': 2}
{'type': 'loss', 'content': 0.08628872036933899, 'timestamp': '2025-10-02 00:41:43.165358', 'step': 17368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:43.240718', 'step': 17368, 'epoch': 2}
{'type': 'loss', 'content': 0.03169291466474533, 'timestamp': '2025-10-02 00:41:43.251999', 'step': 17369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:43.309681', 'step': 17369, 'epoch': 2}
{'type': 'loss', 'content': 0.06928001344203949, 'timestamp': '2025-10-02 00:41:43.316824', 'step': 17370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:43.374541', 'step': 17370, 'epoch': 2}
{'type': 'loss', 'content': 0.017364900559186935, 'timestamp': '2025-10-02 00:41:43.382385', 'step': 17371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:43.452202', 'step': 17371, 'epoch': 2}
{'type': 'loss', 'content': 0.08910585194826126, 'timestamp': '2025-10-02 00:41:43.460493', 'step': 17372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:43.518851', 'step': 17372, 'epoch': 2}
{'type': 'loss', 'content': 0.03747011721134186, 'timestamp': '2025-10-02 00:41:43.526099', 'step': 17373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:43.593348', 'step': 17373, 'epoch': 2}
{'type': 'loss', 'content': 0.041085656732320786, 'timestamp': '2025-10-02 00:41:43.596717', 'step': 17374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:43.656070', 'step': 17374, 'epoch': 2}
{'type': 'loss', 'content': 0.08684807270765305, 'timestamp': '2025-10-02 00:41:43.659847', 'step': 17375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:43.723159', 'step': 17375, 'epoch': 2}
{'type': 'loss', 'content': 0.15711238980293274, 'timestamp': '2025-10-02 00:41:43.729597', 'step': 17376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:41:43.791093', 'step': 17376, 'epoch': 2}
{'type': 'loss', 'content': 0.06728436797857285, 'timestamp': '2025-10-02 00:41:43.794640', 'step': 17377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:43.851533', 'step': 17377, 'epoch': 2}
{'type': 'loss', 'content': 0.060594405978918076, 'timestamp': '2025-10-02 00:41:43.857035', 'step': 17378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:43.913393', 'step': 17378, 'epoch': 2}
{'type': 'loss', 'content': 0.1407289355993271, 'timestamp': '2025-10-02 00:41:43.916177', 'step': 17379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:43.974652', 'step': 17379, 'epoch': 2}
{'type': 'loss', 'content': 0.07539037615060806, 'timestamp': '2025-10-02 00:41:43.984946', 'step': 17380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:44.039395', 'step': 17380, 'epoch': 2}
{'type': 'loss', 'content': 0.07906230539083481, 'timestamp': '2025-10-02 00:41:44.041748', 'step': 17381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:44.104285', 'step': 17381, 'epoch': 2}
{'type': 'loss', 'content': 0.035020872950553894, 'timestamp': '2025-10-02 00:41:44.114735', 'step': 17382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:41:44.171377', 'step': 17382, 'epoch': 2}
{'type': 'loss', 'content': 0.06572627276182175, 'timestamp': '2025-10-02 00:41:44.174289', 'step': 17383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:41:44.242189', 'step': 17383, 'epoch': 2}
{'type': 'loss', 'content': 0.06215040758252144, 'timestamp': '2025-10-02 00:41:44.254897', 'step': 17384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:41:44.310068', 'step': 17384, 'epoch': 2}
{'type': 'loss', 'content': 0.05148598179221153, 'timestamp': '2025-10-02 00:41:44.312383', 'step': 17385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:44.370927', 'step': 17385, 'epoch': 2}
{'type': 'loss', 'content': 0.0110500892624259, 'timestamp': '2025-10-02 00:41:44.378010', 'step': 17386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:44.435729', 'step': 17386, 'epoch': 2}
{'type': 'loss', 'content': 0.049760401248931885, 'timestamp': '2025-10-02 00:41:44.442786', 'step': 17387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:44.498655', 'step': 17387, 'epoch': 2}
{'type': 'loss', 'content': 0.16424745321273804, 'timestamp': '2025-10-02 00:41:44.505141', 'step': 17388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:41:44.559253', 'step': 17388, 'epoch': 2}
{'type': 'loss', 'content': 0.14126357436180115, 'timestamp': '2025-10-02 00:41:44.562236', 'step': 17389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:41:44.624121', 'step': 17389, 'epoch': 2}
{'type': 'loss', 'content': 0.05505763366818428, 'timestamp': '2025-10-02 00:41:44.634565', 'step': 17390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:41:44.693262', 'step': 17390, 'epoch': 2}
{'type': 'loss', 'content': 0.02616170421242714, 'timestamp': '2025-10-02 00:41:44.700150', 'step': 17391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:41:44.763513', 'step': 17391, 'epoch': 2}
{'type': 'loss', 'content': 0.01865418069064617, 'timestamp': '2025-10-02 00:41:44.775121', 'step': 17392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:44.830312', 'step': 17392, 'epoch': 2}
{'type': 'loss', 'content': 0.08764102309942245, 'timestamp': '2025-10-02 00:41:44.835873', 'step': 17393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:41:44.891618', 'step': 17393, 'epoch': 2}
{'type': 'loss', 'content': 0.08445759117603302, 'timestamp': '2025-10-02 00:41:44.896713', 'step': 17394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:41:44.954955', 'step': 17394, 'epoch': 2}
{'type': 'loss', 'content': 0.055837202817201614, 'timestamp': '2025-10-02 00:41:44.958419', 'step': 17395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:41:45.014852', 'step': 17395, 'epoch': 2}
{'type': 'loss', 'content': 0.05852558836340904, 'timestamp': '2025-10-02 00:41:45.020927', 'step': 17396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:45.078528', 'step': 17396, 'epoch': 2}
{'type': 'loss', 'content': 0.036016762256622314, 'timestamp': '2025-10-02 00:41:45.084274', 'step': 17397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:41:45.140756', 'step': 17397, 'epoch': 2}
{'type': 'loss', 'content': 0.02802254632115364, 'timestamp': '2025-10-02 00:41:45.146319', 'step': 17398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:41:45.204146', 'step': 17398, 'epoch': 2}
{'type': 'loss', 'content': 0.08717279881238937, 'timestamp': '2025-10-02 00:41:45.213685', 'step': 17399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:41:45.270199', 'step': 17399, 'epoch': 2}
{'type': 'loss', 'content': 0.10499975085258484, 'timestamp': '2025-10-02 00:41:45.276436', 'step': 17400, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:42:11.870547', 'step': 17400, 'epoch': 2}
{'type': 'pplx', 'content': 97.99401367979331, 'timestamp': '2025-10-02 00:42:11.874724', 'step': 17400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:11.931064', 'step': 17400, 'epoch': 2}
{'type': 'loss', 'content': 0.041394010186195374, 'timestamp': '2025-10-02 00:42:11.941982', 'step': 17401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:11.997948', 'step': 17401, 'epoch': 2}
{'type': 'loss', 'content': 0.04318268224596977, 'timestamp': '2025-10-02 00:42:12.001024', 'step': 17402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:12.066162', 'step': 17402, 'epoch': 2}
{'type': 'loss', 'content': 0.002932898700237274, 'timestamp': '2025-10-02 00:42:12.076617', 'step': 17403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:12.132035', 'step': 17403, 'epoch': 2}
{'type': 'loss', 'content': 0.04124537482857704, 'timestamp': '2025-10-02 00:42:12.139680', 'step': 17404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:12.194004', 'step': 17404, 'epoch': 2}
{'type': 'loss', 'content': 0.11010505259037018, 'timestamp': '2025-10-02 00:42:12.196536', 'step': 17405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:12.251091', 'step': 17405, 'epoch': 2}
{'type': 'loss', 'content': 0.03804958239197731, 'timestamp': '2025-10-02 00:42:12.254135', 'step': 17406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:12.309512', 'step': 17406, 'epoch': 2}
{'type': 'loss', 'content': 0.03743787854909897, 'timestamp': '2025-10-02 00:42:12.315039', 'step': 17407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:12.369412', 'step': 17407, 'epoch': 2}
{'type': 'loss', 'content': 0.12078748643398285, 'timestamp': '2025-10-02 00:42:12.375802', 'step': 17408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:12.429810', 'step': 17408, 'epoch': 2}
{'type': 'loss', 'content': 0.0665607675909996, 'timestamp': '2025-10-02 00:42:12.436909', 'step': 17409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:12.495108', 'step': 17409, 'epoch': 2}
{'type': 'loss', 'content': 0.04467104375362396, 'timestamp': '2025-10-02 00:42:12.502267', 'step': 17410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:12.558819', 'step': 17410, 'epoch': 2}
{'type': 'loss', 'content': 0.03719722852110863, 'timestamp': '2025-10-02 00:42:12.560959', 'step': 17411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:12.621674', 'step': 17411, 'epoch': 2}
{'type': 'loss', 'content': 0.0075879390351474285, 'timestamp': '2025-10-02 00:42:12.632013', 'step': 17412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:12.685874', 'step': 17412, 'epoch': 2}
{'type': 'loss', 'content': 0.17771697044372559, 'timestamp': '2025-10-02 00:42:12.688913', 'step': 17413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:12.749117', 'step': 17413, 'epoch': 2}
{'type': 'loss', 'content': 0.06404920667409897, 'timestamp': '2025-10-02 00:42:12.758482', 'step': 17414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:12.813785', 'step': 17414, 'epoch': 2}
{'type': 'loss', 'content': 0.07208823412656784, 'timestamp': '2025-10-02 00:42:12.815962', 'step': 17415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:12.872778', 'step': 17415, 'epoch': 2}
{'type': 'loss', 'content': 0.03827988728880882, 'timestamp': '2025-10-02 00:42:12.879349', 'step': 17416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:12.934083', 'step': 17416, 'epoch': 2}
{'type': 'loss', 'content': 0.032895371317863464, 'timestamp': '2025-10-02 00:42:12.936515', 'step': 17417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:12.997145', 'step': 17417, 'epoch': 2}
{'type': 'loss', 'content': 0.038054950535297394, 'timestamp': '2025-10-02 00:42:12.999797', 'step': 17418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:13.054723', 'step': 17418, 'epoch': 2}
{'type': 'loss', 'content': 0.0663953423500061, 'timestamp': '2025-10-02 00:42:13.058455', 'step': 17419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:13.115862', 'step': 17419, 'epoch': 2}
{'type': 'loss', 'content': 0.021662764251232147, 'timestamp': '2025-10-02 00:42:13.122684', 'step': 17420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:13.180216', 'step': 17420, 'epoch': 2}
{'type': 'loss', 'content': 0.045745011419057846, 'timestamp': '2025-10-02 00:42:13.182573', 'step': 17421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:13.238588', 'step': 17421, 'epoch': 2}
{'type': 'loss', 'content': 0.05024217814207077, 'timestamp': '2025-10-02 00:42:13.241112', 'step': 17422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:13.296105', 'step': 17422, 'epoch': 2}
{'type': 'loss', 'content': 0.10300109535455704, 'timestamp': '2025-10-02 00:42:13.299274', 'step': 17423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:13.357507', 'step': 17423, 'epoch': 2}
{'type': 'loss', 'content': 0.1536659151315689, 'timestamp': '2025-10-02 00:42:13.363559', 'step': 17424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:13.419865', 'step': 17424, 'epoch': 2}
{'type': 'loss', 'content': 0.02585011161863804, 'timestamp': '2025-10-02 00:42:13.425485', 'step': 17425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:13.497611', 'step': 17425, 'epoch': 2}
{'type': 'loss', 'content': 0.05910084769129753, 'timestamp': '2025-10-02 00:42:13.507773', 'step': 17426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:13.565418', 'step': 17426, 'epoch': 2}
{'type': 'loss', 'content': 0.07230260223150253, 'timestamp': '2025-10-02 00:42:13.567644', 'step': 17427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:13.637960', 'step': 17427, 'epoch': 2}
{'type': 'loss', 'content': 0.04545501992106438, 'timestamp': '2025-10-02 00:42:13.646035', 'step': 17428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:13.701787', 'step': 17428, 'epoch': 2}
{'type': 'loss', 'content': 0.048578713089227676, 'timestamp': '2025-10-02 00:42:13.707335', 'step': 17429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:13.764559', 'step': 17429, 'epoch': 2}
{'type': 'loss', 'content': 0.032170772552490234, 'timestamp': '2025-10-02 00:42:13.767388', 'step': 17430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:13.824213', 'step': 17430, 'epoch': 2}
{'type': 'loss', 'content': 0.00832922849804163, 'timestamp': '2025-10-02 00:42:13.827706', 'step': 17431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:13.884515', 'step': 17431, 'epoch': 2}
{'type': 'loss', 'content': 0.021583426743745804, 'timestamp': '2025-10-02 00:42:13.891493', 'step': 17432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:13.945996', 'step': 17432, 'epoch': 2}
{'type': 'loss', 'content': 0.09868238121271133, 'timestamp': '2025-10-02 00:42:13.948649', 'step': 17433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:14.005176', 'step': 17433, 'epoch': 2}
{'type': 'loss', 'content': 0.038276396691799164, 'timestamp': '2025-10-02 00:42:14.008052', 'step': 17434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:14.063504', 'step': 17434, 'epoch': 2}
{'type': 'loss', 'content': 0.1609218567609787, 'timestamp': '2025-10-02 00:42:14.066167', 'step': 17435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:14.120721', 'step': 17435, 'epoch': 2}
{'type': 'loss', 'content': 0.18710552155971527, 'timestamp': '2025-10-02 00:42:14.126922', 'step': 17436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:14.181108', 'step': 17436, 'epoch': 2}
{'type': 'loss', 'content': 0.06603231281042099, 'timestamp': '2025-10-02 00:42:14.183496', 'step': 17437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:42:14.245046', 'step': 17437, 'epoch': 2}
{'type': 'loss', 'content': 0.05946176499128342, 'timestamp': '2025-10-02 00:42:14.255656', 'step': 17438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:14.310750', 'step': 17438, 'epoch': 2}
{'type': 'loss', 'content': 0.019695494323968887, 'timestamp': '2025-10-02 00:42:14.314218', 'step': 17439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:14.372315', 'step': 17439, 'epoch': 2}
{'type': 'loss', 'content': 0.07014703005552292, 'timestamp': '2025-10-02 00:42:14.378424', 'step': 17440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:14.432662', 'step': 17440, 'epoch': 2}
{'type': 'loss', 'content': 0.031607791781425476, 'timestamp': '2025-10-02 00:42:14.441873', 'step': 17441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:14.498906', 'step': 17441, 'epoch': 2}
{'type': 'loss', 'content': 0.08936943858861923, 'timestamp': '2025-10-02 00:42:14.501565', 'step': 17442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:14.557278', 'step': 17442, 'epoch': 2}
{'type': 'loss', 'content': 0.05865956470370293, 'timestamp': '2025-10-02 00:42:14.560012', 'step': 17443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:14.615904', 'step': 17443, 'epoch': 2}
{'type': 'loss', 'content': 0.046435218304395676, 'timestamp': '2025-10-02 00:42:14.621683', 'step': 17444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:14.676202', 'step': 17444, 'epoch': 2}
{'type': 'loss', 'content': 0.08845589309930801, 'timestamp': '2025-10-02 00:42:14.679521', 'step': 17445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:14.735783', 'step': 17445, 'epoch': 2}
{'type': 'loss', 'content': 0.0759643018245697, 'timestamp': '2025-10-02 00:42:14.738551', 'step': 17446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:14.800964', 'step': 17446, 'epoch': 2}
{'type': 'loss', 'content': 0.060173843055963516, 'timestamp': '2025-10-02 00:42:14.811067', 'step': 17447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:14.867411', 'step': 17447, 'epoch': 2}
{'type': 'loss', 'content': 0.0667012482881546, 'timestamp': '2025-10-02 00:42:14.875388', 'step': 17448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:14.931487', 'step': 17448, 'epoch': 2}
{'type': 'loss', 'content': 0.06642691045999527, 'timestamp': '2025-10-02 00:42:14.934464', 'step': 17449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:14.991335', 'step': 17449, 'epoch': 2}
{'type': 'loss', 'content': 0.0993083193898201, 'timestamp': '2025-10-02 00:42:14.993614', 'step': 17450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:15.050236', 'step': 17450, 'epoch': 2}
{'type': 'loss', 'content': 0.06584986299276352, 'timestamp': '2025-10-02 00:42:15.053136', 'step': 17451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:42:15.117768', 'step': 17451, 'epoch': 2}
{'type': 'loss', 'content': 0.021825972944498062, 'timestamp': '2025-10-02 00:42:15.129135', 'step': 17452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:15.184836', 'step': 17452, 'epoch': 2}
{'type': 'loss', 'content': 0.13580945134162903, 'timestamp': '2025-10-02 00:42:15.187423', 'step': 17453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:15.243025', 'step': 17453, 'epoch': 2}
{'type': 'loss', 'content': 0.05413317307829857, 'timestamp': '2025-10-02 00:42:15.245478', 'step': 17454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:15.300718', 'step': 17454, 'epoch': 2}
{'type': 'loss', 'content': 0.08000818639993668, 'timestamp': '2025-10-02 00:42:15.303116', 'step': 17455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:15.358218', 'step': 17455, 'epoch': 2}
{'type': 'loss', 'content': 0.07098948210477829, 'timestamp': '2025-10-02 00:42:15.364836', 'step': 17456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:15.419555', 'step': 17456, 'epoch': 2}
{'type': 'loss', 'content': 0.11962088942527771, 'timestamp': '2025-10-02 00:42:15.422329', 'step': 17457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:15.476783', 'step': 17457, 'epoch': 2}
{'type': 'loss', 'content': 0.0674721896648407, 'timestamp': '2025-10-02 00:42:15.482649', 'step': 17458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:15.538404', 'step': 17458, 'epoch': 2}
{'type': 'loss', 'content': 0.10953767597675323, 'timestamp': '2025-10-02 00:42:15.540667', 'step': 17459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:15.595400', 'step': 17459, 'epoch': 2}
{'type': 'loss', 'content': 0.030764365568757057, 'timestamp': '2025-10-02 00:42:15.601340', 'step': 17460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:15.655107', 'step': 17460, 'epoch': 2}
{'type': 'loss', 'content': 0.07773010432720184, 'timestamp': '2025-10-02 00:42:15.661930', 'step': 17461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:42:15.737655', 'step': 17461, 'epoch': 2}
{'type': 'loss', 'content': 0.027464918792247772, 'timestamp': '2025-10-02 00:42:15.749930', 'step': 17462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:15.804983', 'step': 17462, 'epoch': 2}
{'type': 'loss', 'content': 0.08531387150287628, 'timestamp': '2025-10-02 00:42:15.807422', 'step': 17463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:15.861610', 'step': 17463, 'epoch': 2}
{'type': 'loss', 'content': 0.1616780012845993, 'timestamp': '2025-10-02 00:42:15.868455', 'step': 17464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:15.922796', 'step': 17464, 'epoch': 2}
{'type': 'loss', 'content': 0.07953672111034393, 'timestamp': '2025-10-02 00:42:15.925470', 'step': 17465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:15.980172', 'step': 17465, 'epoch': 2}
{'type': 'loss', 'content': 0.07683020085096359, 'timestamp': '2025-10-02 00:42:15.987458', 'step': 17466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:16.041867', 'step': 17466, 'epoch': 2}
{'type': 'loss', 'content': 0.05486094206571579, 'timestamp': '2025-10-02 00:42:16.044092', 'step': 17467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:16.097897', 'step': 17467, 'epoch': 2}
{'type': 'loss', 'content': 0.14096654951572418, 'timestamp': '2025-10-02 00:42:16.103800', 'step': 17468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:16.170575', 'step': 17468, 'epoch': 2}
{'type': 'loss', 'content': 0.08297094702720642, 'timestamp': '2025-10-02 00:42:16.173150', 'step': 17469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:16.227701', 'step': 17469, 'epoch': 2}
{'type': 'loss', 'content': 0.12539927661418915, 'timestamp': '2025-10-02 00:42:16.230089', 'step': 17470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:16.285763', 'step': 17470, 'epoch': 2}
{'type': 'loss', 'content': 0.13596250116825104, 'timestamp': '2025-10-02 00:42:16.288272', 'step': 17471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:16.342545', 'step': 17471, 'epoch': 2}
{'type': 'loss', 'content': 0.050059910863637924, 'timestamp': '2025-10-02 00:42:16.348710', 'step': 17472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:16.406149', 'step': 17472, 'epoch': 2}
{'type': 'loss', 'content': 0.054120983928442, 'timestamp': '2025-10-02 00:42:16.417103', 'step': 17473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:16.472528', 'step': 17473, 'epoch': 2}
{'type': 'loss', 'content': 0.024495115503668785, 'timestamp': '2025-10-02 00:42:16.479743', 'step': 17474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:16.534217', 'step': 17474, 'epoch': 2}
{'type': 'loss', 'content': 0.1422712802886963, 'timestamp': '2025-10-02 00:42:16.536560', 'step': 17475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:16.592060', 'step': 17475, 'epoch': 2}
{'type': 'loss', 'content': 0.051722947508096695, 'timestamp': '2025-10-02 00:42:16.598068', 'step': 17476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:16.652603', 'step': 17476, 'epoch': 2}
{'type': 'loss', 'content': 0.11863939464092255, 'timestamp': '2025-10-02 00:42:16.655038', 'step': 17477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:16.709728', 'step': 17477, 'epoch': 2}
{'type': 'loss', 'content': 0.06274504214525223, 'timestamp': '2025-10-02 00:42:16.713680', 'step': 17478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:16.769749', 'step': 17478, 'epoch': 2}
{'type': 'loss', 'content': 0.012598914094269276, 'timestamp': '2025-10-02 00:42:16.775388', 'step': 17479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:16.830762', 'step': 17479, 'epoch': 2}
{'type': 'loss', 'content': 0.11773685365915298, 'timestamp': '2025-10-02 00:42:16.836921', 'step': 17480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:16.893799', 'step': 17480, 'epoch': 2}
{'type': 'loss', 'content': 0.23888473212718964, 'timestamp': '2025-10-02 00:42:16.896747', 'step': 17481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:16.952457', 'step': 17481, 'epoch': 2}
{'type': 'loss', 'content': 0.06682153046131134, 'timestamp': '2025-10-02 00:42:16.954986', 'step': 17482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:17.011453', 'step': 17482, 'epoch': 2}
{'type': 'loss', 'content': 0.07795018702745438, 'timestamp': '2025-10-02 00:42:17.016980', 'step': 17483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:17.072812', 'step': 17483, 'epoch': 2}
{'type': 'loss', 'content': 0.07975821197032928, 'timestamp': '2025-10-02 00:42:17.079111', 'step': 17484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:17.132992', 'step': 17484, 'epoch': 2}
{'type': 'loss', 'content': 0.05082780122756958, 'timestamp': '2025-10-02 00:42:17.135493', 'step': 17485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:17.189511', 'step': 17485, 'epoch': 2}
{'type': 'loss', 'content': 0.14509977400302887, 'timestamp': '2025-10-02 00:42:17.192458', 'step': 17486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:17.247529', 'step': 17486, 'epoch': 2}
{'type': 'loss', 'content': 0.13169187307357788, 'timestamp': '2025-10-02 00:42:17.250557', 'step': 17487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:17.304804', 'step': 17487, 'epoch': 2}
{'type': 'loss', 'content': 0.11348606646060944, 'timestamp': '2025-10-02 00:42:17.310781', 'step': 17488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:17.365035', 'step': 17488, 'epoch': 2}
{'type': 'loss', 'content': 0.03347824513912201, 'timestamp': '2025-10-02 00:42:17.367757', 'step': 17489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:17.426999', 'step': 17489, 'epoch': 2}
{'type': 'loss', 'content': 0.03433402255177498, 'timestamp': '2025-10-02 00:42:17.437160', 'step': 17490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:17.493697', 'step': 17490, 'epoch': 2}
{'type': 'loss', 'content': 0.06464402377605438, 'timestamp': '2025-10-02 00:42:17.496308', 'step': 17491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:17.551472', 'step': 17491, 'epoch': 2}
{'type': 'loss', 'content': 0.02228185534477234, 'timestamp': '2025-10-02 00:42:17.557964', 'step': 17492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:17.612371', 'step': 17492, 'epoch': 2}
{'type': 'loss', 'content': 0.0681564137339592, 'timestamp': '2025-10-02 00:42:17.616861', 'step': 17493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:17.670815', 'step': 17493, 'epoch': 2}
{'type': 'loss', 'content': 0.13041073083877563, 'timestamp': '2025-10-02 00:42:17.673341', 'step': 17494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:17.727990', 'step': 17494, 'epoch': 2}
{'type': 'loss', 'content': 0.12304875254631042, 'timestamp': '2025-10-02 00:42:17.730518', 'step': 17495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:17.786001', 'step': 17495, 'epoch': 2}
{'type': 'loss', 'content': 0.01771457865834236, 'timestamp': '2025-10-02 00:42:17.792427', 'step': 17496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:17.846057', 'step': 17496, 'epoch': 2}
{'type': 'loss', 'content': 0.054223135113716125, 'timestamp': '2025-10-02 00:42:17.851770', 'step': 17497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:17.906622', 'step': 17497, 'epoch': 2}
{'type': 'loss', 'content': 0.10609736293554306, 'timestamp': '2025-10-02 00:42:17.909089', 'step': 17498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:17.968672', 'step': 17498, 'epoch': 2}
{'type': 'loss', 'content': 0.020847508683800697, 'timestamp': '2025-10-02 00:42:17.978798', 'step': 17499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:18.035201', 'step': 17499, 'epoch': 2}
{'type': 'loss', 'content': 0.0321086086332798, 'timestamp': '2025-10-02 00:42:18.041729', 'step': 17500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 17500', 'timestamp': '2025-10-02 00:42:18.466796', 'step': 17500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:18.527676', 'step': 17500, 'epoch': 2}
{'type': 'loss', 'content': 0.024669626727700233, 'timestamp': '2025-10-02 00:42:18.538845', 'step': 17501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:18.599920', 'step': 17501, 'epoch': 2}
{'type': 'loss', 'content': 0.05601535737514496, 'timestamp': '2025-10-02 00:42:18.602393', 'step': 17502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:18.656673', 'step': 17502, 'epoch': 2}
{'type': 'loss', 'content': 0.07146834582090378, 'timestamp': '2025-10-02 00:42:18.659049', 'step': 17503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:18.714030', 'step': 17503, 'epoch': 2}
{'type': 'loss', 'content': 0.05398568883538246, 'timestamp': '2025-10-02 00:42:18.720073', 'step': 17504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:18.774685', 'step': 17504, 'epoch': 2}
{'type': 'loss', 'content': 0.06527455151081085, 'timestamp': '2025-10-02 00:42:18.776960', 'step': 17505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:18.831856', 'step': 17505, 'epoch': 2}
{'type': 'loss', 'content': 0.060291700065135956, 'timestamp': '2025-10-02 00:42:18.841091', 'step': 17506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:18.895909', 'step': 17506, 'epoch': 2}
{'type': 'loss', 'content': 0.01521798875182867, 'timestamp': '2025-10-02 00:42:18.903189', 'step': 17507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:18.958506', 'step': 17507, 'epoch': 2}
{'type': 'loss', 'content': 0.1117645651102066, 'timestamp': '2025-10-02 00:42:18.964542', 'step': 17508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:19.018964', 'step': 17508, 'epoch': 2}
{'type': 'loss', 'content': 0.10291241109371185, 'timestamp': '2025-10-02 00:42:19.021463', 'step': 17509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:19.076680', 'step': 17509, 'epoch': 2}
{'type': 'loss', 'content': 0.07901687920093536, 'timestamp': '2025-10-02 00:42:19.079231', 'step': 17510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:19.134450', 'step': 17510, 'epoch': 2}
{'type': 'loss', 'content': 0.14088378846645355, 'timestamp': '2025-10-02 00:42:19.140088', 'step': 17511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:19.196218', 'step': 17511, 'epoch': 2}
{'type': 'loss', 'content': 0.017051411792635918, 'timestamp': '2025-10-02 00:42:19.201957', 'step': 17512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:19.255831', 'step': 17512, 'epoch': 2}
{'type': 'loss', 'content': 0.0720428004860878, 'timestamp': '2025-10-02 00:42:19.258935', 'step': 17513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:19.316512', 'step': 17513, 'epoch': 2}
{'type': 'loss', 'content': 0.030375277623534203, 'timestamp': '2025-10-02 00:42:19.326026', 'step': 17514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:19.381443', 'step': 17514, 'epoch': 2}
{'type': 'loss', 'content': 0.1257394552230835, 'timestamp': '2025-10-02 00:42:19.384102', 'step': 17515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:19.438926', 'step': 17515, 'epoch': 2}
{'type': 'loss', 'content': 0.0769798532128334, 'timestamp': '2025-10-02 00:42:19.444945', 'step': 17516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:19.498815', 'step': 17516, 'epoch': 2}
{'type': 'loss', 'content': 0.04404677078127861, 'timestamp': '2025-10-02 00:42:19.501161', 'step': 17517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:19.556071', 'step': 17517, 'epoch': 2}
{'type': 'loss', 'content': 0.03449830785393715, 'timestamp': '2025-10-02 00:42:19.558368', 'step': 17518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:19.613617', 'step': 17518, 'epoch': 2}
{'type': 'loss', 'content': 0.08029738068580627, 'timestamp': '2025-10-02 00:42:19.616322', 'step': 17519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:19.670832', 'step': 17519, 'epoch': 2}
{'type': 'loss', 'content': 0.05603286623954773, 'timestamp': '2025-10-02 00:42:19.676743', 'step': 17520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:19.730227', 'step': 17520, 'epoch': 2}
{'type': 'loss', 'content': 0.06953861564397812, 'timestamp': '2025-10-02 00:42:19.732591', 'step': 17521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:19.787284', 'step': 17521, 'epoch': 2}
{'type': 'loss', 'content': 0.020649870857596397, 'timestamp': '2025-10-02 00:42:19.793064', 'step': 17522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:19.847982', 'step': 17522, 'epoch': 2}
{'type': 'loss', 'content': 0.01351523119956255, 'timestamp': '2025-10-02 00:42:19.854963', 'step': 17523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:19.909782', 'step': 17523, 'epoch': 2}
{'type': 'loss', 'content': 0.037660177797079086, 'timestamp': '2025-10-02 00:42:19.916157', 'step': 17524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:19.969800', 'step': 17524, 'epoch': 2}
{'type': 'loss', 'content': 0.10721075534820557, 'timestamp': '2025-10-02 00:42:19.972354', 'step': 17525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:20.026989', 'step': 17525, 'epoch': 2}
{'type': 'loss', 'content': 0.04685819894075394, 'timestamp': '2025-10-02 00:42:20.029393', 'step': 17526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:20.083611', 'step': 17526, 'epoch': 2}
{'type': 'loss', 'content': 0.09184917062520981, 'timestamp': '2025-10-02 00:42:20.086211', 'step': 17527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:20.140859', 'step': 17527, 'epoch': 2}
{'type': 'loss', 'content': 0.03368891775608063, 'timestamp': '2025-10-02 00:42:20.149033', 'step': 17528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:20.203189', 'step': 17528, 'epoch': 2}
{'type': 'loss', 'content': 0.08388028293848038, 'timestamp': '2025-10-02 00:42:20.208902', 'step': 17529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:20.263655', 'step': 17529, 'epoch': 2}
{'type': 'loss', 'content': 0.013155034743249416, 'timestamp': '2025-10-02 00:42:20.270852', 'step': 17530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:20.325961', 'step': 17530, 'epoch': 2}
{'type': 'loss', 'content': 0.07855679839849472, 'timestamp': '2025-10-02 00:42:20.328124', 'step': 17531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:20.382348', 'step': 17531, 'epoch': 2}
{'type': 'loss', 'content': 0.020679213106632233, 'timestamp': '2025-10-02 00:42:20.388344', 'step': 17532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:20.442837', 'step': 17532, 'epoch': 2}
{'type': 'loss', 'content': 0.06484615057706833, 'timestamp': '2025-10-02 00:42:20.450188', 'step': 17533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:42:20.514378', 'step': 17533, 'epoch': 2}
{'type': 'loss', 'content': 0.016002248972654343, 'timestamp': '2025-10-02 00:42:20.525233', 'step': 17534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:20.581222', 'step': 17534, 'epoch': 2}
{'type': 'loss', 'content': 0.006384911481291056, 'timestamp': '2025-10-02 00:42:20.583504', 'step': 17535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:20.639174', 'step': 17535, 'epoch': 2}
{'type': 'loss', 'content': 0.03024810366332531, 'timestamp': '2025-10-02 00:42:20.645490', 'step': 17536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:20.700355', 'step': 17536, 'epoch': 2}
{'type': 'loss', 'content': 0.09009606391191483, 'timestamp': '2025-10-02 00:42:20.703108', 'step': 17537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:20.758129', 'step': 17537, 'epoch': 2}
{'type': 'loss', 'content': 0.06109738349914551, 'timestamp': '2025-10-02 00:42:20.760609', 'step': 17538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:20.816037', 'step': 17538, 'epoch': 2}
{'type': 'loss', 'content': 0.05718346685171127, 'timestamp': '2025-10-02 00:42:20.818299', 'step': 17539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:20.872866', 'step': 17539, 'epoch': 2}
{'type': 'loss', 'content': 0.046564843505620956, 'timestamp': '2025-10-02 00:42:20.880954', 'step': 17540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:20.936826', 'step': 17540, 'epoch': 2}
{'type': 'loss', 'content': 0.11722006648778915, 'timestamp': '2025-10-02 00:42:20.939024', 'step': 17541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:21.000858', 'step': 17541, 'epoch': 2}
{'type': 'loss', 'content': 0.03744977340102196, 'timestamp': '2025-10-02 00:42:21.011303', 'step': 17542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:21.065864', 'step': 17542, 'epoch': 2}
{'type': 'loss', 'content': 0.11580350250005722, 'timestamp': '2025-10-02 00:42:21.068494', 'step': 17543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:21.123135', 'step': 17543, 'epoch': 2}
{'type': 'loss', 'content': 0.06824401766061783, 'timestamp': '2025-10-02 00:42:21.133135', 'step': 17544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:42:21.193833', 'step': 17544, 'epoch': 2}
{'type': 'loss', 'content': 0.010017708875238895, 'timestamp': '2025-10-02 00:42:21.205352', 'step': 17545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:21.263112', 'step': 17545, 'epoch': 2}
{'type': 'loss', 'content': 0.05637838691473007, 'timestamp': '2025-10-02 00:42:21.272625', 'step': 17546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:42:21.342398', 'step': 17546, 'epoch': 2}
{'type': 'loss', 'content': 0.03498849645256996, 'timestamp': '2025-10-02 00:42:21.354368', 'step': 17547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:21.409938', 'step': 17547, 'epoch': 2}
{'type': 'loss', 'content': 0.09101704508066177, 'timestamp': '2025-10-02 00:42:21.420204', 'step': 17548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:21.474604', 'step': 17548, 'epoch': 2}
{'type': 'loss', 'content': 0.09835919737815857, 'timestamp': '2025-10-02 00:42:21.477006', 'step': 17549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:21.531551', 'step': 17549, 'epoch': 2}
{'type': 'loss', 'content': 0.05617620423436165, 'timestamp': '2025-10-02 00:42:21.535918', 'step': 17550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:21.590623', 'step': 17550, 'epoch': 2}
{'type': 'loss', 'content': 0.0821857675909996, 'timestamp': '2025-10-02 00:42:21.593453', 'step': 17551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:21.648291', 'step': 17551, 'epoch': 2}
{'type': 'loss', 'content': 0.049769993871450424, 'timestamp': '2025-10-02 00:42:21.654066', 'step': 17552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:42:21.707604', 'step': 17552, 'epoch': 2}
{'type': 'loss', 'content': 0.0828973799943924, 'timestamp': '2025-10-02 00:42:21.710414', 'step': 17553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:21.765737', 'step': 17553, 'epoch': 2}
{'type': 'loss', 'content': 0.02436591498553753, 'timestamp': '2025-10-02 00:42:21.768387', 'step': 17554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:21.823523', 'step': 17554, 'epoch': 2}
{'type': 'loss', 'content': 0.06880541890859604, 'timestamp': '2025-10-02 00:42:21.825989', 'step': 17555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:21.880113', 'step': 17555, 'epoch': 2}
{'type': 'loss', 'content': 0.08142980933189392, 'timestamp': '2025-10-02 00:42:21.888081', 'step': 17556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:21.941825', 'step': 17556, 'epoch': 2}
{'type': 'loss', 'content': 0.12200195342302322, 'timestamp': '2025-10-02 00:42:21.944403', 'step': 17557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:21.999331', 'step': 17557, 'epoch': 2}
{'type': 'loss', 'content': 0.0318804532289505, 'timestamp': '2025-10-02 00:42:22.001532', 'step': 17558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:22.056515', 'step': 17558, 'epoch': 2}
{'type': 'loss', 'content': 0.10124371200799942, 'timestamp': '2025-10-02 00:42:22.059375', 'step': 17559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:22.114281', 'step': 17559, 'epoch': 2}
{'type': 'loss', 'content': 0.08915866166353226, 'timestamp': '2025-10-02 00:42:22.120365', 'step': 17560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:22.175139', 'step': 17560, 'epoch': 2}
{'type': 'loss', 'content': 0.004580378532409668, 'timestamp': '2025-10-02 00:42:22.180770', 'step': 17561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:22.236416', 'step': 17561, 'epoch': 2}
{'type': 'loss', 'content': 0.014586270786821842, 'timestamp': '2025-10-02 00:42:22.241973', 'step': 17562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:22.297398', 'step': 17562, 'epoch': 2}
{'type': 'loss', 'content': 0.01148217637091875, 'timestamp': '2025-10-02 00:42:22.306413', 'step': 17563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:22.361388', 'step': 17563, 'epoch': 2}
{'type': 'loss', 'content': 0.071570485830307, 'timestamp': '2025-10-02 00:42:22.368025', 'step': 17564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:22.423403', 'step': 17564, 'epoch': 2}
{'type': 'loss', 'content': 0.07602795213460922, 'timestamp': '2025-10-02 00:42:22.428949', 'step': 17565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:22.485747', 'step': 17565, 'epoch': 2}
{'type': 'loss', 'content': 0.005252878647297621, 'timestamp': '2025-10-02 00:42:22.492982', 'step': 17566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:22.550039', 'step': 17566, 'epoch': 2}
{'type': 'loss', 'content': 0.11009879410266876, 'timestamp': '2025-10-02 00:42:22.553744', 'step': 17567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:22.612254', 'step': 17567, 'epoch': 2}
{'type': 'loss', 'content': 0.05108458548784256, 'timestamp': '2025-10-02 00:42:22.619008', 'step': 17568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:22.675482', 'step': 17568, 'epoch': 2}
{'type': 'loss', 'content': 0.05450905114412308, 'timestamp': '2025-10-02 00:42:22.678432', 'step': 17569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:22.734611', 'step': 17569, 'epoch': 2}
{'type': 'loss', 'content': 0.10039141029119492, 'timestamp': '2025-10-02 00:42:22.741850', 'step': 17570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:22.805697', 'step': 17570, 'epoch': 2}
{'type': 'loss', 'content': 0.027611782774329185, 'timestamp': '2025-10-02 00:42:22.816185', 'step': 17571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:22.872051', 'step': 17571, 'epoch': 2}
{'type': 'loss', 'content': 0.03457549959421158, 'timestamp': '2025-10-02 00:42:22.879927', 'step': 17572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:22.941184', 'step': 17572, 'epoch': 2}
{'type': 'loss', 'content': 0.023674767464399338, 'timestamp': '2025-10-02 00:42:22.952491', 'step': 17573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:23.008436', 'step': 17573, 'epoch': 2}
{'type': 'loss', 'content': 0.036575496196746826, 'timestamp': '2025-10-02 00:42:23.012241', 'step': 17574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:23.070013', 'step': 17574, 'epoch': 2}
{'type': 'loss', 'content': 0.010551399551331997, 'timestamp': '2025-10-02 00:42:23.079552', 'step': 17575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:23.144374', 'step': 17575, 'epoch': 2}
{'type': 'loss', 'content': 0.06787833571434021, 'timestamp': '2025-10-02 00:42:23.150949', 'step': 17576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:23.206396', 'step': 17576, 'epoch': 2}
{'type': 'loss', 'content': 0.117937371134758, 'timestamp': '2025-10-02 00:42:23.209364', 'step': 17577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:23.265847', 'step': 17577, 'epoch': 2}
{'type': 'loss', 'content': 0.13073520362377167, 'timestamp': '2025-10-02 00:42:23.268234', 'step': 17578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:23.323402', 'step': 17578, 'epoch': 2}
{'type': 'loss', 'content': 0.12654940783977509, 'timestamp': '2025-10-02 00:42:23.326458', 'step': 17579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:23.383915', 'step': 17579, 'epoch': 2}
{'type': 'loss', 'content': 0.023813895881175995, 'timestamp': '2025-10-02 00:42:23.395077', 'step': 17580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:23.448879', 'step': 17580, 'epoch': 2}
{'type': 'loss', 'content': 0.10775269567966461, 'timestamp': '2025-10-02 00:42:23.454484', 'step': 17581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:42:23.523018', 'step': 17581, 'epoch': 2}
{'type': 'loss', 'content': 0.007601037155836821, 'timestamp': '2025-10-02 00:42:23.534946', 'step': 17582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:23.592021', 'step': 17582, 'epoch': 2}
{'type': 'loss', 'content': 0.07025136053562164, 'timestamp': '2025-10-02 00:42:23.595024', 'step': 17583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:23.651511', 'step': 17583, 'epoch': 2}
{'type': 'loss', 'content': 0.09339825063943863, 'timestamp': '2025-10-02 00:42:23.658012', 'step': 17584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:23.722165', 'step': 17584, 'epoch': 2}
{'type': 'loss', 'content': 0.14467614889144897, 'timestamp': '2025-10-02 00:42:23.724550', 'step': 17585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:42:23.793072', 'step': 17585, 'epoch': 2}
{'type': 'loss', 'content': 0.021939676254987717, 'timestamp': '2025-10-02 00:42:23.803692', 'step': 17586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:23.858853', 'step': 17586, 'epoch': 2}
{'type': 'loss', 'content': 0.002376553136855364, 'timestamp': '2025-10-02 00:42:23.861342', 'step': 17587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:23.916525', 'step': 17587, 'epoch': 2}
{'type': 'loss', 'content': 0.0726347342133522, 'timestamp': '2025-10-02 00:42:23.923370', 'step': 17588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:23.983382', 'step': 17588, 'epoch': 2}
{'type': 'loss', 'content': 0.038485798984766006, 'timestamp': '2025-10-02 00:42:23.994374', 'step': 17589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:24.051890', 'step': 17589, 'epoch': 2}
{'type': 'loss', 'content': 0.02976420894265175, 'timestamp': '2025-10-02 00:42:24.055694', 'step': 17590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:24.114189', 'step': 17590, 'epoch': 2}
{'type': 'loss', 'content': 0.03881889581680298, 'timestamp': '2025-10-02 00:42:24.121173', 'step': 17591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:24.176762', 'step': 17591, 'epoch': 2}
{'type': 'loss', 'content': 0.04426591843366623, 'timestamp': '2025-10-02 00:42:24.184621', 'step': 17592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:42:24.246000', 'step': 17592, 'epoch': 2}
{'type': 'loss', 'content': 0.042089253664016724, 'timestamp': '2025-10-02 00:42:24.257513', 'step': 17593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:24.314488', 'step': 17593, 'epoch': 2}
{'type': 'loss', 'content': 0.025027712807059288, 'timestamp': '2025-10-02 00:42:24.317302', 'step': 17594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:24.372667', 'step': 17594, 'epoch': 2}
{'type': 'loss', 'content': 0.1006745845079422, 'timestamp': '2025-10-02 00:42:24.375858', 'step': 17595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:24.439248', 'step': 17595, 'epoch': 2}
{'type': 'loss', 'content': 0.013745971955358982, 'timestamp': '2025-10-02 00:42:24.450530', 'step': 17596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:24.506249', 'step': 17596, 'epoch': 2}
{'type': 'loss', 'content': 0.024792443960905075, 'timestamp': '2025-10-02 00:42:24.511703', 'step': 17597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:24.568789', 'step': 17597, 'epoch': 2}
{'type': 'loss', 'content': 0.02783115953207016, 'timestamp': '2025-10-02 00:42:24.578083', 'step': 17598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:24.633295', 'step': 17598, 'epoch': 2}
{'type': 'loss', 'content': 0.07926258444786072, 'timestamp': '2025-10-02 00:42:24.635675', 'step': 17599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:24.690045', 'step': 17599, 'epoch': 2}
{'type': 'loss', 'content': 0.08090882748365402, 'timestamp': '2025-10-02 00:42:24.696224', 'step': 17600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:24.750787', 'step': 17600, 'epoch': 2}
{'type': 'loss', 'content': 0.026627192273736, 'timestamp': '2025-10-02 00:42:24.753231', 'step': 17601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:24.809327', 'step': 17601, 'epoch': 2}
{'type': 'loss', 'content': 0.026581058278679848, 'timestamp': '2025-10-02 00:42:24.818849', 'step': 17602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:24.874288', 'step': 17602, 'epoch': 2}
{'type': 'loss', 'content': 0.1029428020119667, 'timestamp': '2025-10-02 00:42:24.883806', 'step': 17603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:24.938314', 'step': 17603, 'epoch': 2}
{'type': 'loss', 'content': 0.12647360563278198, 'timestamp': '2025-10-02 00:42:24.944299', 'step': 17604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:24.998256', 'step': 17604, 'epoch': 2}
{'type': 'loss', 'content': 0.015394514426589012, 'timestamp': '2025-10-02 00:42:25.003895', 'step': 17605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:25.058760', 'step': 17605, 'epoch': 2}
{'type': 'loss', 'content': 0.09688400477170944, 'timestamp': '2025-10-02 00:42:25.060991', 'step': 17606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:25.115792', 'step': 17606, 'epoch': 2}
{'type': 'loss', 'content': 0.047301266342401505, 'timestamp': '2025-10-02 00:42:25.118546', 'step': 17607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:25.173092', 'step': 17607, 'epoch': 2}
{'type': 'loss', 'content': 0.06456292420625687, 'timestamp': '2025-10-02 00:42:25.179162', 'step': 17608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:25.238651', 'step': 17608, 'epoch': 2}
{'type': 'loss', 'content': 0.008840466849505901, 'timestamp': '2025-10-02 00:42:25.240902', 'step': 17609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:25.295627', 'step': 17609, 'epoch': 2}
{'type': 'loss', 'content': 0.05281989648938179, 'timestamp': '2025-10-02 00:42:25.301158', 'step': 17610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:25.361061', 'step': 17610, 'epoch': 2}
{'type': 'loss', 'content': 0.034378014504909515, 'timestamp': '2025-10-02 00:42:25.371198', 'step': 17611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:25.428656', 'step': 17611, 'epoch': 2}
{'type': 'loss', 'content': 0.029213031753897667, 'timestamp': '2025-10-02 00:42:25.434461', 'step': 17612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:25.488489', 'step': 17612, 'epoch': 2}
{'type': 'loss', 'content': 0.036151498556137085, 'timestamp': '2025-10-02 00:42:25.495751', 'step': 17613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:25.550511', 'step': 17613, 'epoch': 2}
{'type': 'loss', 'content': 0.10906490683555603, 'timestamp': '2025-10-02 00:42:25.553811', 'step': 17614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:25.612021', 'step': 17614, 'epoch': 2}
{'type': 'loss', 'content': 0.01747281849384308, 'timestamp': '2025-10-02 00:42:25.621483', 'step': 17615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:25.676731', 'step': 17615, 'epoch': 2}
{'type': 'loss', 'content': 0.13069801032543182, 'timestamp': '2025-10-02 00:42:25.683361', 'step': 17616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:25.737498', 'step': 17616, 'epoch': 2}
{'type': 'loss', 'content': 0.015750031918287277, 'timestamp': '2025-10-02 00:42:25.740020', 'step': 17617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:25.796426', 'step': 17617, 'epoch': 2}
{'type': 'loss', 'content': 0.1118648499250412, 'timestamp': '2025-10-02 00:42:25.798698', 'step': 17618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:25.853703', 'step': 17618, 'epoch': 2}
{'type': 'loss', 'content': 0.04232480749487877, 'timestamp': '2025-10-02 00:42:25.856428', 'step': 17619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:25.911512', 'step': 17619, 'epoch': 2}
{'type': 'loss', 'content': 0.03405420482158661, 'timestamp': '2025-10-02 00:42:25.918598', 'step': 17620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:25.973418', 'step': 17620, 'epoch': 2}
{'type': 'loss', 'content': 0.12801049649715424, 'timestamp': '2025-10-02 00:42:25.975693', 'step': 17621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:26.030696', 'step': 17621, 'epoch': 2}
{'type': 'loss', 'content': 0.07459519803524017, 'timestamp': '2025-10-02 00:42:26.040054', 'step': 17622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:26.095112', 'step': 17622, 'epoch': 2}
{'type': 'loss', 'content': 0.022185783833265305, 'timestamp': '2025-10-02 00:42:26.097857', 'step': 17623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:26.152991', 'step': 17623, 'epoch': 2}
{'type': 'loss', 'content': 0.04046284407377243, 'timestamp': '2025-10-02 00:42:26.159271', 'step': 17624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:26.213682', 'step': 17624, 'epoch': 2}
{'type': 'loss', 'content': 0.06369409710168839, 'timestamp': '2025-10-02 00:42:26.216129', 'step': 17625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:26.270804', 'step': 17625, 'epoch': 2}
{'type': 'loss', 'content': 0.08791173249483109, 'timestamp': '2025-10-02 00:42:26.273559', 'step': 17626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:26.331837', 'step': 17626, 'epoch': 2}
{'type': 'loss', 'content': 0.008498964831233025, 'timestamp': '2025-10-02 00:42:26.336338', 'step': 17627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:26.391689', 'step': 17627, 'epoch': 2}
{'type': 'loss', 'content': 0.056032054126262665, 'timestamp': '2025-10-02 00:42:26.401835', 'step': 17628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:26.456272', 'step': 17628, 'epoch': 2}
{'type': 'loss', 'content': 0.05381212383508682, 'timestamp': '2025-10-02 00:42:26.459135', 'step': 17629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:26.518769', 'step': 17629, 'epoch': 2}
{'type': 'loss', 'content': 0.006049748510122299, 'timestamp': '2025-10-02 00:42:26.528938', 'step': 17630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:26.589063', 'step': 17630, 'epoch': 2}
{'type': 'loss', 'content': 0.03135768696665764, 'timestamp': '2025-10-02 00:42:26.591711', 'step': 17631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:42:26.648389', 'step': 17631, 'epoch': 2}
{'type': 'loss', 'content': 0.19958855211734772, 'timestamp': '2025-10-02 00:42:26.654786', 'step': 17632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:26.713067', 'step': 17632, 'epoch': 2}
{'type': 'loss', 'content': 0.029335061088204384, 'timestamp': '2025-10-02 00:42:26.718796', 'step': 17633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:26.773433', 'step': 17633, 'epoch': 2}
{'type': 'loss', 'content': 0.061803169548511505, 'timestamp': '2025-10-02 00:42:26.776427', 'step': 17634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:26.831622', 'step': 17634, 'epoch': 2}
{'type': 'loss', 'content': 0.14871063828468323, 'timestamp': '2025-10-02 00:42:26.834412', 'step': 17635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:26.890176', 'step': 17635, 'epoch': 2}
{'type': 'loss', 'content': 0.0455537848174572, 'timestamp': '2025-10-02 00:42:26.898287', 'step': 17636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:26.952281', 'step': 17636, 'epoch': 2}
{'type': 'loss', 'content': 0.047544509172439575, 'timestamp': '2025-10-02 00:42:26.957918', 'step': 17637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:27.013502', 'step': 17637, 'epoch': 2}
{'type': 'loss', 'content': 0.15895052254199982, 'timestamp': '2025-10-02 00:42:27.016005', 'step': 17638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:27.070615', 'step': 17638, 'epoch': 2}
{'type': 'loss', 'content': 0.03617601469159126, 'timestamp': '2025-10-02 00:42:27.073628', 'step': 17639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:27.128418', 'step': 17639, 'epoch': 2}
{'type': 'loss', 'content': 0.09504232555627823, 'timestamp': '2025-10-02 00:42:27.134618', 'step': 17640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:27.192965', 'step': 17640, 'epoch': 2}
{'type': 'loss', 'content': 0.024063101038336754, 'timestamp': '2025-10-02 00:42:27.203947', 'step': 17641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:27.259857', 'step': 17641, 'epoch': 2}
{'type': 'loss', 'content': 0.014215831644833088, 'timestamp': '2025-10-02 00:42:27.268830', 'step': 17642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:27.326922', 'step': 17642, 'epoch': 2}
{'type': 'loss', 'content': 0.04291769117116928, 'timestamp': '2025-10-02 00:42:27.332467', 'step': 17643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:27.387596', 'step': 17643, 'epoch': 2}
{'type': 'loss', 'content': 0.0379522442817688, 'timestamp': '2025-10-02 00:42:27.393853', 'step': 17644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:27.447854', 'step': 17644, 'epoch': 2}
{'type': 'loss', 'content': 0.07341412454843521, 'timestamp': '2025-10-02 00:42:27.453547', 'step': 17645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:27.509773', 'step': 17645, 'epoch': 2}
{'type': 'loss', 'content': 0.13870994746685028, 'timestamp': '2025-10-02 00:42:27.513840', 'step': 17646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:27.569579', 'step': 17646, 'epoch': 2}
{'type': 'loss', 'content': 0.02090916596353054, 'timestamp': '2025-10-02 00:42:27.574937', 'step': 17647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:27.630244', 'step': 17647, 'epoch': 2}
{'type': 'loss', 'content': 0.05922139808535576, 'timestamp': '2025-10-02 00:42:27.636416', 'step': 17648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:27.691132', 'step': 17648, 'epoch': 2}
{'type': 'loss', 'content': 0.101919025182724, 'timestamp': '2025-10-02 00:42:27.693293', 'step': 17649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:27.755067', 'step': 17649, 'epoch': 2}
{'type': 'loss', 'content': 0.009073993191123009, 'timestamp': '2025-10-02 00:42:27.765512', 'step': 17650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:27.820700', 'step': 17650, 'epoch': 2}
{'type': 'loss', 'content': 0.016251498833298683, 'timestamp': '2025-10-02 00:42:27.824877', 'step': 17651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:27.879041', 'step': 17651, 'epoch': 2}
{'type': 'loss', 'content': 0.07976947724819183, 'timestamp': '2025-10-02 00:42:27.885043', 'step': 17652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:27.939660', 'step': 17652, 'epoch': 2}
{'type': 'loss', 'content': 0.05793578922748566, 'timestamp': '2025-10-02 00:42:27.945355', 'step': 17653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:28.001608', 'step': 17653, 'epoch': 2}
{'type': 'loss', 'content': 0.05714981257915497, 'timestamp': '2025-10-02 00:42:28.011150', 'step': 17654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:28.066633', 'step': 17654, 'epoch': 2}
{'type': 'loss', 'content': 0.07390883564949036, 'timestamp': '2025-10-02 00:42:28.068920', 'step': 17655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:28.129819', 'step': 17655, 'epoch': 2}
{'type': 'loss', 'content': 0.023602833971381187, 'timestamp': '2025-10-02 00:42:28.141093', 'step': 17656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:28.196499', 'step': 17656, 'epoch': 2}
{'type': 'loss', 'content': 0.06627874821424484, 'timestamp': '2025-10-02 00:42:28.206754', 'step': 17657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:28.270709', 'step': 17657, 'epoch': 2}
{'type': 'loss', 'content': 0.019753756001591682, 'timestamp': '2025-10-02 00:42:28.277948', 'step': 17658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:28.337787', 'step': 17658, 'epoch': 2}
{'type': 'loss', 'content': 0.0841958224773407, 'timestamp': '2025-10-02 00:42:28.347942', 'step': 17659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:28.402917', 'step': 17659, 'epoch': 2}
{'type': 'loss', 'content': 0.01146114245057106, 'timestamp': '2025-10-02 00:42:28.412961', 'step': 17660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:28.466859', 'step': 17660, 'epoch': 2}
{'type': 'loss', 'content': 0.03002966195344925, 'timestamp': '2025-10-02 00:42:28.473107', 'step': 17661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:28.537611', 'step': 17661, 'epoch': 2}
{'type': 'loss', 'content': 0.08033256977796555, 'timestamp': '2025-10-02 00:42:28.539790', 'step': 17662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:28.602312', 'step': 17662, 'epoch': 2}
{'type': 'loss', 'content': 0.11715742200613022, 'timestamp': '2025-10-02 00:42:28.612748', 'step': 17663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:28.667507', 'step': 17663, 'epoch': 2}
{'type': 'loss', 'content': 0.12505073845386505, 'timestamp': '2025-10-02 00:42:28.673820', 'step': 17664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:28.728108', 'step': 17664, 'epoch': 2}
{'type': 'loss', 'content': 0.06719936430454254, 'timestamp': '2025-10-02 00:42:28.737269', 'step': 17665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:28.795369', 'step': 17665, 'epoch': 2}
{'type': 'loss', 'content': 0.0916876271367073, 'timestamp': '2025-10-02 00:42:28.797624', 'step': 17666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:28.851992', 'step': 17666, 'epoch': 2}
{'type': 'loss', 'content': 0.02136370539665222, 'timestamp': '2025-10-02 00:42:28.854368', 'step': 17667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:28.909981', 'step': 17667, 'epoch': 2}
{'type': 'loss', 'content': 0.029341567307710648, 'timestamp': '2025-10-02 00:42:28.920280', 'step': 17668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:28.974083', 'step': 17668, 'epoch': 2}
{'type': 'loss', 'content': 0.007992958649992943, 'timestamp': '2025-10-02 00:42:28.979705', 'step': 17669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:29.034250', 'step': 17669, 'epoch': 2}
{'type': 'loss', 'content': 0.04783770814538002, 'timestamp': '2025-10-02 00:42:29.036621', 'step': 17670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:29.091365', 'step': 17670, 'epoch': 2}
{'type': 'loss', 'content': 0.04884039983153343, 'timestamp': '2025-10-02 00:42:29.093823', 'step': 17671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:42:29.147724', 'step': 17671, 'epoch': 2}
{'type': 'loss', 'content': 0.0665251687169075, 'timestamp': '2025-10-02 00:42:29.153933', 'step': 17672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:29.208944', 'step': 17672, 'epoch': 2}
{'type': 'loss', 'content': 0.07241283357143402, 'timestamp': '2025-10-02 00:42:29.214660', 'step': 17673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:29.268879', 'step': 17673, 'epoch': 2}
{'type': 'loss', 'content': 0.1377381980419159, 'timestamp': '2025-10-02 00:42:29.271254', 'step': 17674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:42:29.340677', 'step': 17674, 'epoch': 2}
{'type': 'loss', 'content': 0.02174309268593788, 'timestamp': '2025-10-02 00:42:29.353077', 'step': 17675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:29.411896', 'step': 17675, 'epoch': 2}
{'type': 'loss', 'content': 0.0038562719710171223, 'timestamp': '2025-10-02 00:42:29.422841', 'step': 17676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:29.478088', 'step': 17676, 'epoch': 2}
{'type': 'loss', 'content': 0.028307609260082245, 'timestamp': '2025-10-02 00:42:29.480154', 'step': 17677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:29.534953', 'step': 17677, 'epoch': 2}
{'type': 'loss', 'content': 0.022689886391162872, 'timestamp': '2025-10-02 00:42:29.537483', 'step': 17678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:29.592322', 'step': 17678, 'epoch': 2}
{'type': 'loss', 'content': 0.02576594054698944, 'timestamp': '2025-10-02 00:42:29.594607', 'step': 17679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:29.650523', 'step': 17679, 'epoch': 2}
{'type': 'loss', 'content': 0.019930053502321243, 'timestamp': '2025-10-02 00:42:29.656436', 'step': 17680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:29.710396', 'step': 17680, 'epoch': 2}
{'type': 'loss', 'content': 0.12996478378772736, 'timestamp': '2025-10-02 00:42:29.717400', 'step': 17681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:29.772112', 'step': 17681, 'epoch': 2}
{'type': 'loss', 'content': 0.19779609143733978, 'timestamp': '2025-10-02 00:42:29.775024', 'step': 17682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:29.830199', 'step': 17682, 'epoch': 2}
{'type': 'loss', 'content': 0.07222868502140045, 'timestamp': '2025-10-02 00:42:29.832604', 'step': 17683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:29.888049', 'step': 17683, 'epoch': 2}
{'type': 'loss', 'content': 0.06343617290258408, 'timestamp': '2025-10-02 00:42:29.898344', 'step': 17684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:29.953634', 'step': 17684, 'epoch': 2}
{'type': 'loss', 'content': 0.034995485097169876, 'timestamp': '2025-10-02 00:42:29.956346', 'step': 17685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:30.011433', 'step': 17685, 'epoch': 2}
{'type': 'loss', 'content': 0.017283296212553978, 'timestamp': '2025-10-02 00:42:30.017121', 'step': 17686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:30.073127', 'step': 17686, 'epoch': 2}
{'type': 'loss', 'content': 0.03352808579802513, 'timestamp': '2025-10-02 00:42:30.082663', 'step': 17687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:30.137514', 'step': 17687, 'epoch': 2}
{'type': 'loss', 'content': 0.0037901459727436304, 'timestamp': '2025-10-02 00:42:30.143957', 'step': 17688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:30.197728', 'step': 17688, 'epoch': 2}
{'type': 'loss', 'content': 0.06873597949743271, 'timestamp': '2025-10-02 00:42:30.203214', 'step': 17689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:30.259025', 'step': 17689, 'epoch': 2}
{'type': 'loss', 'content': 0.02897881343960762, 'timestamp': '2025-10-02 00:42:30.261436', 'step': 17690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:42:30.323428', 'step': 17690, 'epoch': 2}
{'type': 'loss', 'content': 0.024804146960377693, 'timestamp': '2025-10-02 00:42:30.334071', 'step': 17691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:42:30.405632', 'step': 17691, 'epoch': 2}
{'type': 'loss', 'content': 0.04144464433193207, 'timestamp': '2025-10-02 00:42:30.418956', 'step': 17692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:30.473159', 'step': 17692, 'epoch': 2}
{'type': 'loss', 'content': 0.053758833557367325, 'timestamp': '2025-10-02 00:42:30.483308', 'step': 17693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:30.538392', 'step': 17693, 'epoch': 2}
{'type': 'loss', 'content': 0.027283981442451477, 'timestamp': '2025-10-02 00:42:30.540859', 'step': 17694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:30.595427', 'step': 17694, 'epoch': 2}
{'type': 'loss', 'content': 0.043961599469184875, 'timestamp': '2025-10-02 00:42:30.604619', 'step': 17695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:30.660683', 'step': 17695, 'epoch': 2}
{'type': 'loss', 'content': 0.03685664013028145, 'timestamp': '2025-10-02 00:42:30.666731', 'step': 17696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:30.721238', 'step': 17696, 'epoch': 2}
{'type': 'loss', 'content': 0.09476038068532944, 'timestamp': '2025-10-02 00:42:30.726860', 'step': 17697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:30.782256', 'step': 17697, 'epoch': 2}
{'type': 'loss', 'content': 0.014472777023911476, 'timestamp': '2025-10-02 00:42:30.791787', 'step': 17698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:30.846509', 'step': 17698, 'epoch': 2}
{'type': 'loss', 'content': 0.06308595836162567, 'timestamp': '2025-10-02 00:42:30.849245', 'step': 17699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:30.902953', 'step': 17699, 'epoch': 2}
{'type': 'loss', 'content': 0.07753080129623413, 'timestamp': '2025-10-02 00:42:30.909094', 'step': 17700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:30.966296', 'step': 17700, 'epoch': 2}
{'type': 'loss', 'content': 0.04027346521615982, 'timestamp': '2025-10-02 00:42:30.973549', 'step': 17701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:31.028633', 'step': 17701, 'epoch': 2}
{'type': 'loss', 'content': 0.053975317627191544, 'timestamp': '2025-10-02 00:42:31.035708', 'step': 17702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:42:31.098537', 'step': 17702, 'epoch': 2}
{'type': 'loss', 'content': 0.06701941788196564, 'timestamp': '2025-10-02 00:42:31.109130', 'step': 17703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:31.165564', 'step': 17703, 'epoch': 2}
{'type': 'loss', 'content': 0.09921786189079285, 'timestamp': '2025-10-02 00:42:31.175684', 'step': 17704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:31.232941', 'step': 17704, 'epoch': 2}
{'type': 'loss', 'content': 0.02902485430240631, 'timestamp': '2025-10-02 00:42:31.235339', 'step': 17705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:31.289403', 'step': 17705, 'epoch': 2}
{'type': 'loss', 'content': 0.06973311305046082, 'timestamp': '2025-10-02 00:42:31.291531', 'step': 17706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:31.346958', 'step': 17706, 'epoch': 2}
{'type': 'loss', 'content': 0.15952308475971222, 'timestamp': '2025-10-02 00:42:31.354093', 'step': 17707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:31.411514', 'step': 17707, 'epoch': 2}
{'type': 'loss', 'content': 0.01884601078927517, 'timestamp': '2025-10-02 00:42:31.418176', 'step': 17708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:31.474397', 'step': 17708, 'epoch': 2}
{'type': 'loss', 'content': 0.05757978558540344, 'timestamp': '2025-10-02 00:42:31.477310', 'step': 17709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:31.534718', 'step': 17709, 'epoch': 2}
{'type': 'loss', 'content': 0.07544729858636856, 'timestamp': '2025-10-02 00:42:31.537762', 'step': 17710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:31.594716', 'step': 17710, 'epoch': 2}
{'type': 'loss', 'content': 0.08128675073385239, 'timestamp': '2025-10-02 00:42:31.597232', 'step': 17711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:31.652908', 'step': 17711, 'epoch': 2}
{'type': 'loss', 'content': 0.05978689342737198, 'timestamp': '2025-10-02 00:42:31.659926', 'step': 17712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:31.713826', 'step': 17712, 'epoch': 2}
{'type': 'loss', 'content': 0.15237957239151, 'timestamp': '2025-10-02 00:42:31.716691', 'step': 17713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:31.776444', 'step': 17713, 'epoch': 2}
{'type': 'loss', 'content': 0.07079660147428513, 'timestamp': '2025-10-02 00:42:31.779747', 'step': 17714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:31.837905', 'step': 17714, 'epoch': 2}
{'type': 'loss', 'content': 0.03675784543156624, 'timestamp': '2025-10-02 00:42:31.841478', 'step': 17715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:31.898726', 'step': 17715, 'epoch': 2}
{'type': 'loss', 'content': 0.08820517361164093, 'timestamp': '2025-10-02 00:42:31.905564', 'step': 17716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:31.960341', 'step': 17716, 'epoch': 2}
{'type': 'loss', 'content': 0.05105580389499664, 'timestamp': '2025-10-02 00:42:31.966005', 'step': 17717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:32.021045', 'step': 17717, 'epoch': 2}
{'type': 'loss', 'content': 0.06359031796455383, 'timestamp': '2025-10-02 00:42:32.024613', 'step': 17718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:32.079816', 'step': 17718, 'epoch': 2}
{'type': 'loss', 'content': 0.021846432238817215, 'timestamp': '2025-10-02 00:42:32.082502', 'step': 17719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:32.138711', 'step': 17719, 'epoch': 2}
{'type': 'loss', 'content': 0.050887953490018845, 'timestamp': '2025-10-02 00:42:32.144929', 'step': 17720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:32.199914', 'step': 17720, 'epoch': 2}
{'type': 'loss', 'content': 0.025673894211649895, 'timestamp': '2025-10-02 00:42:32.202122', 'step': 17721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:32.258750', 'step': 17721, 'epoch': 2}
{'type': 'loss', 'content': 0.030770834535360336, 'timestamp': '2025-10-02 00:42:32.261628', 'step': 17722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:32.319308', 'step': 17722, 'epoch': 2}
{'type': 'loss', 'content': 0.055964235216379166, 'timestamp': '2025-10-02 00:42:32.324942', 'step': 17723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:32.382308', 'step': 17723, 'epoch': 2}
{'type': 'loss', 'content': 0.010467294603586197, 'timestamp': '2025-10-02 00:42:32.389627', 'step': 17724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:32.445084', 'step': 17724, 'epoch': 2}
{'type': 'loss', 'content': 0.04910436272621155, 'timestamp': '2025-10-02 00:42:32.448430', 'step': 17725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:32.505298', 'step': 17725, 'epoch': 2}
{'type': 'loss', 'content': 0.025005033239722252, 'timestamp': '2025-10-02 00:42:32.514825', 'step': 17726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:32.578956', 'step': 17726, 'epoch': 2}
{'type': 'loss', 'content': 0.0337093248963356, 'timestamp': '2025-10-02 00:42:32.589393', 'step': 17727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:42:32.660410', 'step': 17727, 'epoch': 2}
{'type': 'loss', 'content': 0.0635433942079544, 'timestamp': '2025-10-02 00:42:32.673790', 'step': 17728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:32.731321', 'step': 17728, 'epoch': 2}
{'type': 'loss', 'content': 0.15512876212596893, 'timestamp': '2025-10-02 00:42:32.733754', 'step': 17729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:32.790118', 'step': 17729, 'epoch': 2}
{'type': 'loss', 'content': 0.041145555675029755, 'timestamp': '2025-10-02 00:42:32.792809', 'step': 17730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:32.848623', 'step': 17730, 'epoch': 2}
{'type': 'loss', 'content': 0.17189665138721466, 'timestamp': '2025-10-02 00:42:32.850946', 'step': 17731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:32.906203', 'step': 17731, 'epoch': 2}
{'type': 'loss', 'content': 0.005108228884637356, 'timestamp': '2025-10-02 00:42:32.913015', 'step': 17732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:32.969176', 'step': 17732, 'epoch': 2}
{'type': 'loss', 'content': 0.046237874776124954, 'timestamp': '2025-10-02 00:42:32.972118', 'step': 17733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:33.028618', 'step': 17733, 'epoch': 2}
{'type': 'loss', 'content': 0.11028941720724106, 'timestamp': '2025-10-02 00:42:33.031490', 'step': 17734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:33.087921', 'step': 17734, 'epoch': 2}
{'type': 'loss', 'content': 0.034951791167259216, 'timestamp': '2025-10-02 00:42:33.097044', 'step': 17735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:42:33.150953', 'step': 17735, 'epoch': 2}
{'type': 'loss', 'content': 0.0911717340350151, 'timestamp': '2025-10-02 00:42:33.156905', 'step': 17736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:33.210833', 'step': 17736, 'epoch': 2}
{'type': 'loss', 'content': 0.08142124116420746, 'timestamp': '2025-10-02 00:42:33.213325', 'step': 17737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:33.267728', 'step': 17737, 'epoch': 2}
{'type': 'loss', 'content': 0.05999373644590378, 'timestamp': '2025-10-02 00:42:33.274787', 'step': 17738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:33.331276', 'step': 17738, 'epoch': 2}
{'type': 'loss', 'content': 0.007094433065503836, 'timestamp': '2025-10-02 00:42:33.336679', 'step': 17739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:33.391616', 'step': 17739, 'epoch': 2}
{'type': 'loss', 'content': 0.07883337140083313, 'timestamp': '2025-10-02 00:42:33.397767', 'step': 17740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:33.452516', 'step': 17740, 'epoch': 2}
{'type': 'loss', 'content': 0.14041902124881744, 'timestamp': '2025-10-02 00:42:33.455998', 'step': 17741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:33.510334', 'step': 17741, 'epoch': 2}
{'type': 'loss', 'content': 0.07805142551660538, 'timestamp': '2025-10-02 00:42:33.512828', 'step': 17742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:33.568084', 'step': 17742, 'epoch': 2}
{'type': 'loss', 'content': 0.013489454053342342, 'timestamp': '2025-10-02 00:42:33.577310', 'step': 17743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:33.635507', 'step': 17743, 'epoch': 2}
{'type': 'loss', 'content': 0.059655822813510895, 'timestamp': '2025-10-02 00:42:33.641597', 'step': 17744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:33.696370', 'step': 17744, 'epoch': 2}
{'type': 'loss', 'content': 0.002694373717531562, 'timestamp': '2025-10-02 00:42:33.706659', 'step': 17745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:33.762200', 'step': 17745, 'epoch': 2}
{'type': 'loss', 'content': 0.02847006730735302, 'timestamp': '2025-10-02 00:42:33.765017', 'step': 17746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:33.823300', 'step': 17746, 'epoch': 2}
{'type': 'loss', 'content': 0.06121020391583443, 'timestamp': '2025-10-02 00:42:33.825887', 'step': 17747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:33.881014', 'step': 17747, 'epoch': 2}
{'type': 'loss', 'content': 0.009059161879122257, 'timestamp': '2025-10-02 00:42:33.888031', 'step': 17748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:33.942113', 'step': 17748, 'epoch': 2}
{'type': 'loss', 'content': 0.050185613334178925, 'timestamp': '2025-10-02 00:42:33.944380', 'step': 17749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:42:34.006886', 'step': 17749, 'epoch': 2}
{'type': 'loss', 'content': 0.027082566171884537, 'timestamp': '2025-10-02 00:42:34.017752', 'step': 17750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:34.072947', 'step': 17750, 'epoch': 2}
{'type': 'loss', 'content': 0.11733531951904297, 'timestamp': '2025-10-02 00:42:34.075242', 'step': 17751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:34.131291', 'step': 17751, 'epoch': 2}
{'type': 'loss', 'content': 0.07293055951595306, 'timestamp': '2025-10-02 00:42:34.137813', 'step': 17752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:34.192815', 'step': 17752, 'epoch': 2}
{'type': 'loss', 'content': 0.03230227902531624, 'timestamp': '2025-10-02 00:42:34.198635', 'step': 17753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:34.255054', 'step': 17753, 'epoch': 2}
{'type': 'loss', 'content': 0.022159839048981667, 'timestamp': '2025-10-02 00:42:34.264388', 'step': 17754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:42:34.319260', 'step': 17754, 'epoch': 2}
{'type': 'loss', 'content': 0.09341657906770706, 'timestamp': '2025-10-02 00:42:34.322031', 'step': 17755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:34.377544', 'step': 17755, 'epoch': 2}
{'type': 'loss', 'content': 0.06904187798500061, 'timestamp': '2025-10-02 00:42:34.383921', 'step': 17756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:34.451605', 'step': 17756, 'epoch': 2}
{'type': 'loss', 'content': 0.051374826580286026, 'timestamp': '2025-10-02 00:42:34.462909', 'step': 17757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:34.539428', 'step': 17757, 'epoch': 2}
{'type': 'loss', 'content': 0.014709140174090862, 'timestamp': '2025-10-02 00:42:34.559942', 'step': 17758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:34.624438', 'step': 17758, 'epoch': 2}
{'type': 'loss', 'content': 0.03570369631052017, 'timestamp': '2025-10-02 00:42:34.630538', 'step': 17759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:34.704614', 'step': 17759, 'epoch': 2}
{'type': 'loss', 'content': 0.09471938014030457, 'timestamp': '2025-10-02 00:42:34.711557', 'step': 17760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:34.772098', 'step': 17760, 'epoch': 2}
{'type': 'loss', 'content': 0.12255381047725677, 'timestamp': '2025-10-02 00:42:34.775555', 'step': 17761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:34.839508', 'step': 17761, 'epoch': 2}
{'type': 'loss', 'content': 0.04310932755470276, 'timestamp': '2025-10-02 00:42:34.846815', 'step': 17762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:34.911681', 'step': 17762, 'epoch': 2}
{'type': 'loss', 'content': 0.01637265272438526, 'timestamp': '2025-10-02 00:42:34.915564', 'step': 17763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:34.973913', 'step': 17763, 'epoch': 2}
{'type': 'loss', 'content': 0.04847963526844978, 'timestamp': '2025-10-02 00:42:34.985884', 'step': 17764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:35.044835', 'step': 17764, 'epoch': 2}
{'type': 'loss', 'content': 0.13714806735515594, 'timestamp': '2025-10-02 00:42:35.051037', 'step': 17765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:35.120682', 'step': 17765, 'epoch': 2}
{'type': 'loss', 'content': 0.04122456535696983, 'timestamp': '2025-10-02 00:42:35.128616', 'step': 17766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:35.213147', 'step': 17766, 'epoch': 2}
{'type': 'loss', 'content': 0.03259948268532753, 'timestamp': '2025-10-02 00:42:35.218400', 'step': 17767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:35.288896', 'step': 17767, 'epoch': 2}
{'type': 'loss', 'content': 0.05843271315097809, 'timestamp': '2025-10-02 00:42:35.295480', 'step': 17768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:35.365169', 'step': 17768, 'epoch': 2}
{'type': 'loss', 'content': 0.023684462532401085, 'timestamp': '2025-10-02 00:42:35.372225', 'step': 17769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:35.442757', 'step': 17769, 'epoch': 2}
{'type': 'loss', 'content': 0.03565365448594093, 'timestamp': '2025-10-02 00:42:35.446732', 'step': 17770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:35.518692', 'step': 17770, 'epoch': 2}
{'type': 'loss', 'content': 0.0667518898844719, 'timestamp': '2025-10-02 00:42:35.522932', 'step': 17771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:35.592831', 'step': 17771, 'epoch': 2}
{'type': 'loss', 'content': 0.034508753567934036, 'timestamp': '2025-10-02 00:42:35.601560', 'step': 17772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:35.659354', 'step': 17772, 'epoch': 2}
{'type': 'loss', 'content': 0.034936632961034775, 'timestamp': '2025-10-02 00:42:35.669654', 'step': 17773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:35.739522', 'step': 17773, 'epoch': 2}
{'type': 'loss', 'content': 0.04472203925251961, 'timestamp': '2025-10-02 00:42:35.745117', 'step': 17774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:35.801576', 'step': 17774, 'epoch': 2}
{'type': 'loss', 'content': 0.027761351317167282, 'timestamp': '2025-10-02 00:42:35.805125', 'step': 17775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:35.863174', 'step': 17775, 'epoch': 2}
{'type': 'loss', 'content': 0.1380341649055481, 'timestamp': '2025-10-02 00:42:35.871267', 'step': 17776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:35.950043', 'step': 17776, 'epoch': 2}
{'type': 'loss', 'content': 0.010337268002331257, 'timestamp': '2025-10-02 00:42:35.961054', 'step': 17777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:36.025642', 'step': 17777, 'epoch': 2}
{'type': 'loss', 'content': 0.023468544706702232, 'timestamp': '2025-10-02 00:42:36.029667', 'step': 17778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:36.090221', 'step': 17778, 'epoch': 2}
{'type': 'loss', 'content': 0.09271898120641708, 'timestamp': '2025-10-02 00:42:36.093311', 'step': 17779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:36.155678', 'step': 17779, 'epoch': 2}
{'type': 'loss', 'content': 0.0758984312415123, 'timestamp': '2025-10-02 00:42:36.166099', 'step': 17780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:36.235799', 'step': 17780, 'epoch': 2}
{'type': 'loss', 'content': 0.07610178738832474, 'timestamp': '2025-10-02 00:42:36.238342', 'step': 17781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:36.294744', 'step': 17781, 'epoch': 2}
{'type': 'loss', 'content': 0.0737280547618866, 'timestamp': '2025-10-02 00:42:36.301547', 'step': 17782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:36.364229', 'step': 17782, 'epoch': 2}
{'type': 'loss', 'content': 0.044955093413591385, 'timestamp': '2025-10-02 00:42:36.369671', 'step': 17783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:36.432366', 'step': 17783, 'epoch': 2}
{'type': 'loss', 'content': 0.06133856996893883, 'timestamp': '2025-10-02 00:42:36.439411', 'step': 17784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:36.497234', 'step': 17784, 'epoch': 2}
{'type': 'loss', 'content': 0.1267765462398529, 'timestamp': '2025-10-02 00:42:36.501913', 'step': 17785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:36.568381', 'step': 17785, 'epoch': 2}
{'type': 'loss', 'content': 0.04866178333759308, 'timestamp': '2025-10-02 00:42:36.571582', 'step': 17786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:36.629149', 'step': 17786, 'epoch': 2}
{'type': 'loss', 'content': 0.05113190785050392, 'timestamp': '2025-10-02 00:42:36.637823', 'step': 17787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:36.697375', 'step': 17787, 'epoch': 2}
{'type': 'loss', 'content': 0.1753893792629242, 'timestamp': '2025-10-02 00:42:36.704230', 'step': 17788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:36.761654', 'step': 17788, 'epoch': 2}
{'type': 'loss', 'content': 0.029162602499127388, 'timestamp': '2025-10-02 00:42:36.770833', 'step': 17789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:36.828698', 'step': 17789, 'epoch': 2}
{'type': 'loss', 'content': 0.029451599344611168, 'timestamp': '2025-10-02 00:42:36.838271', 'step': 17790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:36.896038', 'step': 17790, 'epoch': 2}
{'type': 'loss', 'content': 0.0786401703953743, 'timestamp': '2025-10-02 00:42:36.901336', 'step': 17791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:36.964917', 'step': 17791, 'epoch': 2}
{'type': 'loss', 'content': 0.033706098794937134, 'timestamp': '2025-10-02 00:42:36.976855', 'step': 17792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:37.046673', 'step': 17792, 'epoch': 2}
{'type': 'loss', 'content': 0.021273603662848473, 'timestamp': '2025-10-02 00:42:37.056948', 'step': 17793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:37.117508', 'step': 17793, 'epoch': 2}
{'type': 'loss', 'content': 0.09261776506900787, 'timestamp': '2025-10-02 00:42:37.120382', 'step': 17794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:37.193581', 'step': 17794, 'epoch': 2}
{'type': 'loss', 'content': 0.10100074857473373, 'timestamp': '2025-10-02 00:42:37.196407', 'step': 17795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:37.259260', 'step': 17795, 'epoch': 2}
{'type': 'loss', 'content': 0.08232387155294418, 'timestamp': '2025-10-02 00:42:37.267081', 'step': 17796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:37.323076', 'step': 17796, 'epoch': 2}
{'type': 'loss', 'content': 0.1596774309873581, 'timestamp': '2025-10-02 00:42:37.326169', 'step': 17797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:37.394746', 'step': 17797, 'epoch': 2}
{'type': 'loss', 'content': 0.10071062296628952, 'timestamp': '2025-10-02 00:42:37.400762', 'step': 17798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:37.472066', 'step': 17798, 'epoch': 2}
{'type': 'loss', 'content': 0.062397636473178864, 'timestamp': '2025-10-02 00:42:37.475309', 'step': 17799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:37.539176', 'step': 17799, 'epoch': 2}
{'type': 'loss', 'content': 0.0481843575835228, 'timestamp': '2025-10-02 00:42:37.546604', 'step': 17800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:37.605291', 'step': 17800, 'epoch': 2}
{'type': 'loss', 'content': 0.024422766640782356, 'timestamp': '2025-10-02 00:42:37.613453', 'step': 17801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:37.681890', 'step': 17801, 'epoch': 2}
{'type': 'loss', 'content': 0.13708536326885223, 'timestamp': '2025-10-02 00:42:37.684390', 'step': 17802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:37.753575', 'step': 17802, 'epoch': 2}
{'type': 'loss', 'content': 0.08912215381860733, 'timestamp': '2025-10-02 00:42:37.757841', 'step': 17803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:37.816934', 'step': 17803, 'epoch': 2}
{'type': 'loss', 'content': 0.019941452890634537, 'timestamp': '2025-10-02 00:42:37.823677', 'step': 17804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:37.891762', 'step': 17804, 'epoch': 2}
{'type': 'loss', 'content': 0.011047035455703735, 'timestamp': '2025-10-02 00:42:37.899078', 'step': 17805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:37.964301', 'step': 17805, 'epoch': 2}
{'type': 'loss', 'content': 0.024125708267092705, 'timestamp': '2025-10-02 00:42:37.973492', 'step': 17806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:38.038802', 'step': 17806, 'epoch': 2}
{'type': 'loss', 'content': 0.11490565538406372, 'timestamp': '2025-10-02 00:42:38.041843', 'step': 17807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:38.100441', 'step': 17807, 'epoch': 2}
{'type': 'loss', 'content': 0.06662630289793015, 'timestamp': '2025-10-02 00:42:38.110146', 'step': 17808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:38.165280', 'step': 17808, 'epoch': 2}
{'type': 'loss', 'content': 0.135723277926445, 'timestamp': '2025-10-02 00:42:38.173049', 'step': 17809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:38.234213', 'step': 17809, 'epoch': 2}
{'type': 'loss', 'content': 0.015487941913306713, 'timestamp': '2025-10-02 00:42:38.243479', 'step': 17810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:38.301837', 'step': 17810, 'epoch': 2}
{'type': 'loss', 'content': 0.0804075226187706, 'timestamp': '2025-10-02 00:42:38.305881', 'step': 17811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:38.362240', 'step': 17811, 'epoch': 2}
{'type': 'loss', 'content': 0.08853000402450562, 'timestamp': '2025-10-02 00:42:38.372040', 'step': 17812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:38.437850', 'step': 17812, 'epoch': 2}
{'type': 'loss', 'content': 0.06610966473817825, 'timestamp': '2025-10-02 00:42:38.443556', 'step': 17813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:38.507521', 'step': 17813, 'epoch': 2}
{'type': 'loss', 'content': 0.08332100510597229, 'timestamp': '2025-10-02 00:42:38.517732', 'step': 17814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:38.581292', 'step': 17814, 'epoch': 2}
{'type': 'loss', 'content': 0.09945672750473022, 'timestamp': '2025-10-02 00:42:38.593978', 'step': 17815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:38.650879', 'step': 17815, 'epoch': 2}
{'type': 'loss', 'content': 0.12016317993402481, 'timestamp': '2025-10-02 00:42:38.657873', 'step': 17816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:38.730033', 'step': 17816, 'epoch': 2}
{'type': 'loss', 'content': 0.09323707222938538, 'timestamp': '2025-10-02 00:42:38.733395', 'step': 17817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:38.802293', 'step': 17817, 'epoch': 2}
{'type': 'loss', 'content': 0.10150442272424698, 'timestamp': '2025-10-02 00:42:38.812259', 'step': 17818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:38.880379', 'step': 17818, 'epoch': 2}
{'type': 'loss', 'content': 0.028898902237415314, 'timestamp': '2025-10-02 00:42:38.889860', 'step': 17819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:38.947137', 'step': 17819, 'epoch': 2}
{'type': 'loss', 'content': 0.09016681462526321, 'timestamp': '2025-10-02 00:42:38.961709', 'step': 17820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:39.034400', 'step': 17820, 'epoch': 2}
{'type': 'loss', 'content': 0.05717853829264641, 'timestamp': '2025-10-02 00:42:39.041900', 'step': 17821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:39.105623', 'step': 17821, 'epoch': 2}
{'type': 'loss', 'content': 0.03413024917244911, 'timestamp': '2025-10-02 00:42:39.115818', 'step': 17822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:42:39.181628', 'step': 17822, 'epoch': 2}
{'type': 'loss', 'content': 0.15723292529582977, 'timestamp': '2025-10-02 00:42:39.184973', 'step': 17823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:39.263611', 'step': 17823, 'epoch': 2}
{'type': 'loss', 'content': 0.10219567269086838, 'timestamp': '2025-10-02 00:42:39.292745', 'step': 17824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:39.362468', 'step': 17824, 'epoch': 2}
{'type': 'loss', 'content': 0.10468198359012604, 'timestamp': '2025-10-02 00:42:39.367670', 'step': 17825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:39.461366', 'step': 17825, 'epoch': 2}
{'type': 'loss', 'content': 0.09411003440618515, 'timestamp': '2025-10-02 00:42:39.481826', 'step': 17826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:39.545218', 'step': 17826, 'epoch': 2}
{'type': 'loss', 'content': 0.04102884605526924, 'timestamp': '2025-10-02 00:42:39.550863', 'step': 17827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:39.614607', 'step': 17827, 'epoch': 2}
{'type': 'loss', 'content': 0.020846834406256676, 'timestamp': '2025-10-02 00:42:39.624290', 'step': 17828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:39.685988', 'step': 17828, 'epoch': 2}
{'type': 'loss', 'content': 0.10340730845928192, 'timestamp': '2025-10-02 00:42:39.696998', 'step': 17829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:39.763599', 'step': 17829, 'epoch': 2}
{'type': 'loss', 'content': 0.051501866430044174, 'timestamp': '2025-10-02 00:42:39.773773', 'step': 17830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:39.836227', 'step': 17830, 'epoch': 2}
{'type': 'loss', 'content': 0.053534120321273804, 'timestamp': '2025-10-02 00:42:39.842879', 'step': 17831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:39.902769', 'step': 17831, 'epoch': 2}
{'type': 'loss', 'content': 0.08546849340200424, 'timestamp': '2025-10-02 00:42:39.910294', 'step': 17832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:39.970494', 'step': 17832, 'epoch': 2}
{'type': 'loss', 'content': 0.03743252158164978, 'timestamp': '2025-10-02 00:42:39.982286', 'step': 17833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:40.041618', 'step': 17833, 'epoch': 2}
{'type': 'loss', 'content': 0.020035097375512123, 'timestamp': '2025-10-02 00:42:40.045711', 'step': 17834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:40.107727', 'step': 17834, 'epoch': 2}
{'type': 'loss', 'content': 0.05254992097616196, 'timestamp': '2025-10-02 00:42:40.117162', 'step': 17835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:40.177422', 'step': 17835, 'epoch': 2}
{'type': 'loss', 'content': 0.039226703345775604, 'timestamp': '2025-10-02 00:42:40.185482', 'step': 17836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:40.249814', 'step': 17836, 'epoch': 2}
{'type': 'loss', 'content': 0.07959021627902985, 'timestamp': '2025-10-02 00:42:40.259512', 'step': 17837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:40.326848', 'step': 17837, 'epoch': 2}
{'type': 'loss', 'content': 0.0251456405967474, 'timestamp': '2025-10-02 00:42:40.330369', 'step': 17838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:40.395330', 'step': 17838, 'epoch': 2}
{'type': 'loss', 'content': 0.024848787114024162, 'timestamp': '2025-10-02 00:42:40.405917', 'step': 17839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:40.463502', 'step': 17839, 'epoch': 2}
{'type': 'loss', 'content': 0.13745582103729248, 'timestamp': '2025-10-02 00:42:40.473959', 'step': 17840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:40.539983', 'step': 17840, 'epoch': 2}
{'type': 'loss', 'content': 0.08515475690364838, 'timestamp': '2025-10-02 00:42:40.542870', 'step': 17841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:40.599423', 'step': 17841, 'epoch': 2}
{'type': 'loss', 'content': 0.04588386416435242, 'timestamp': '2025-10-02 00:42:40.603119', 'step': 17842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:40.661714', 'step': 17842, 'epoch': 2}
{'type': 'loss', 'content': 0.11890187114477158, 'timestamp': '2025-10-02 00:42:40.667652', 'step': 17843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:40.729712', 'step': 17843, 'epoch': 2}
{'type': 'loss', 'content': 0.030236948281526566, 'timestamp': '2025-10-02 00:42:40.735908', 'step': 17844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:40.793337', 'step': 17844, 'epoch': 2}
{'type': 'loss', 'content': 0.0877651646733284, 'timestamp': '2025-10-02 00:42:40.795792', 'step': 17845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:40.851510', 'step': 17845, 'epoch': 2}
{'type': 'loss', 'content': 0.12725277245044708, 'timestamp': '2025-10-02 00:42:40.856851', 'step': 17846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:40.920053', 'step': 17846, 'epoch': 2}
{'type': 'loss', 'content': 0.071907639503479, 'timestamp': '2025-10-02 00:42:40.926915', 'step': 17847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:40.986038', 'step': 17847, 'epoch': 2}
{'type': 'loss', 'content': 0.02715129591524601, 'timestamp': '2025-10-02 00:42:40.996048', 'step': 17848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:41.052887', 'step': 17848, 'epoch': 2}
{'type': 'loss', 'content': 0.09253120422363281, 'timestamp': '2025-10-02 00:42:41.058658', 'step': 17849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:41.118059', 'step': 17849, 'epoch': 2}
{'type': 'loss', 'content': 0.0655074343085289, 'timestamp': '2025-10-02 00:42:41.124983', 'step': 17850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:41.186894', 'step': 17850, 'epoch': 2}
{'type': 'loss', 'content': 0.14931422472000122, 'timestamp': '2025-10-02 00:42:41.189117', 'step': 17851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:41.249285', 'step': 17851, 'epoch': 2}
{'type': 'loss', 'content': 0.05029655992984772, 'timestamp': '2025-10-02 00:42:41.256157', 'step': 17852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:41.315788', 'step': 17852, 'epoch': 2}
{'type': 'loss', 'content': 0.21659469604492188, 'timestamp': '2025-10-02 00:42:41.320121', 'step': 17853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:41.377212', 'step': 17853, 'epoch': 2}
{'type': 'loss', 'content': 0.04947614297270775, 'timestamp': '2025-10-02 00:42:41.379918', 'step': 17854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:41.437418', 'step': 17854, 'epoch': 2}
{'type': 'loss', 'content': 0.07145044207572937, 'timestamp': '2025-10-02 00:42:41.441352', 'step': 17855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:41.504941', 'step': 17855, 'epoch': 2}
{'type': 'loss', 'content': 0.12978117167949677, 'timestamp': '2025-10-02 00:42:41.513004', 'step': 17856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:41.572258', 'step': 17856, 'epoch': 2}
{'type': 'loss', 'content': 0.060667265206575394, 'timestamp': '2025-10-02 00:42:41.577563', 'step': 17857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:41.640781', 'step': 17857, 'epoch': 2}
{'type': 'loss', 'content': 0.057384658604860306, 'timestamp': '2025-10-02 00:42:41.650985', 'step': 17858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:41.710512', 'step': 17858, 'epoch': 2}
{'type': 'loss', 'content': 0.042242590337991714, 'timestamp': '2025-10-02 00:42:41.714412', 'step': 17859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:41.772232', 'step': 17859, 'epoch': 2}
{'type': 'loss', 'content': 0.04539226368069649, 'timestamp': '2025-10-02 00:42:41.779938', 'step': 17860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:42:41.844356', 'step': 17860, 'epoch': 2}
{'type': 'loss', 'content': 0.03930947929620743, 'timestamp': '2025-10-02 00:42:41.856157', 'step': 17861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:41.913212', 'step': 17861, 'epoch': 2}
{'type': 'loss', 'content': 0.13197839260101318, 'timestamp': '2025-10-02 00:42:41.916524', 'step': 17862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:41.981265', 'step': 17862, 'epoch': 2}
{'type': 'loss', 'content': 0.10485079139471054, 'timestamp': '2025-10-02 00:42:41.987285', 'step': 17863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:42.054307', 'step': 17863, 'epoch': 2}
{'type': 'loss', 'content': 0.0694185197353363, 'timestamp': '2025-10-02 00:42:42.061953', 'step': 17864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:42.120465', 'step': 17864, 'epoch': 2}
{'type': 'loss', 'content': 0.08906196057796478, 'timestamp': '2025-10-02 00:42:42.127638', 'step': 17865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:42.185998', 'step': 17865, 'epoch': 2}
{'type': 'loss', 'content': 0.022451629862189293, 'timestamp': '2025-10-02 00:42:42.195134', 'step': 17866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:42.262102', 'step': 17866, 'epoch': 2}
{'type': 'loss', 'content': 0.07848287373781204, 'timestamp': '2025-10-02 00:42:42.265434', 'step': 17867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:42.323224', 'step': 17867, 'epoch': 2}
{'type': 'loss', 'content': 0.04027462750673294, 'timestamp': '2025-10-02 00:42:42.333311', 'step': 17868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:42.391113', 'step': 17868, 'epoch': 2}
{'type': 'loss', 'content': 0.018238164484500885, 'timestamp': '2025-10-02 00:42:42.396837', 'step': 17869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:42.474341', 'step': 17869, 'epoch': 2}
{'type': 'loss', 'content': 0.03630228340625763, 'timestamp': '2025-10-02 00:42:42.481718', 'step': 17870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:42.554000', 'step': 17870, 'epoch': 2}
{'type': 'loss', 'content': 0.14370301365852356, 'timestamp': '2025-10-02 00:42:42.558088', 'step': 17871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:42.629529', 'step': 17871, 'epoch': 2}
{'type': 'loss', 'content': 0.06730756908655167, 'timestamp': '2025-10-02 00:42:42.643210', 'step': 17872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:42.706030', 'step': 17872, 'epoch': 2}
{'type': 'loss', 'content': 0.08990997076034546, 'timestamp': '2025-10-02 00:42:42.709391', 'step': 17873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:42.771380', 'step': 17873, 'epoch': 2}
{'type': 'loss', 'content': 0.07524982839822769, 'timestamp': '2025-10-02 00:42:42.775116', 'step': 17874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:42.846070', 'step': 17874, 'epoch': 2}
{'type': 'loss', 'content': 0.06078029423952103, 'timestamp': '2025-10-02 00:42:42.849197', 'step': 17875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:42:42.920446', 'step': 17875, 'epoch': 2}
{'type': 'loss', 'content': 0.016029782593250275, 'timestamp': '2025-10-02 00:42:42.933485', 'step': 17876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:42:43.010157', 'step': 17876, 'epoch': 2}
{'type': 'loss', 'content': 0.009031840600073338, 'timestamp': '2025-10-02 00:42:43.021890', 'step': 17877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:43.082389', 'step': 17877, 'epoch': 2}
{'type': 'loss', 'content': 0.11189904063940048, 'timestamp': '2025-10-02 00:42:43.085497', 'step': 17878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:43.146772', 'step': 17878, 'epoch': 2}
{'type': 'loss', 'content': 0.16008366644382477, 'timestamp': '2025-10-02 00:42:43.150005', 'step': 17879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:43.212732', 'step': 17879, 'epoch': 2}
{'type': 'loss', 'content': 0.028438901528716087, 'timestamp': '2025-10-02 00:42:43.223990', 'step': 17880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:43.288984', 'step': 17880, 'epoch': 2}
{'type': 'loss', 'content': 0.03147285059094429, 'timestamp': '2025-10-02 00:42:43.292715', 'step': 17881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:43.349781', 'step': 17881, 'epoch': 2}
{'type': 'loss', 'content': 0.07243471592664719, 'timestamp': '2025-10-02 00:42:43.356247', 'step': 17882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:43.418912', 'step': 17882, 'epoch': 2}
{'type': 'loss', 'content': 0.019931182265281677, 'timestamp': '2025-10-02 00:42:43.428439', 'step': 17883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:43.485665', 'step': 17883, 'epoch': 2}
{'type': 'loss', 'content': 0.016249356791377068, 'timestamp': '2025-10-02 00:42:43.495963', 'step': 17884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:43.554579', 'step': 17884, 'epoch': 2}
{'type': 'loss', 'content': 0.05379283055663109, 'timestamp': '2025-10-02 00:42:43.557406', 'step': 17885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:43.613666', 'step': 17885, 'epoch': 2}
{'type': 'loss', 'content': 0.09977815300226212, 'timestamp': '2025-10-02 00:42:43.619277', 'step': 17886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:43.678011', 'step': 17886, 'epoch': 2}
{'type': 'loss', 'content': 0.07729006558656693, 'timestamp': '2025-10-02 00:42:43.681945', 'step': 17887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:43.745068', 'step': 17887, 'epoch': 2}
{'type': 'loss', 'content': 0.13125444948673248, 'timestamp': '2025-10-02 00:42:43.755774', 'step': 17888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:43.812313', 'step': 17888, 'epoch': 2}
{'type': 'loss', 'content': 0.05736610293388367, 'timestamp': '2025-10-02 00:42:43.815535', 'step': 17889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:43.873752', 'step': 17889, 'epoch': 2}
{'type': 'loss', 'content': 0.08636474609375, 'timestamp': '2025-10-02 00:42:43.876160', 'step': 17890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:43.932414', 'step': 17890, 'epoch': 2}
{'type': 'loss', 'content': 0.05078265070915222, 'timestamp': '2025-10-02 00:42:43.935609', 'step': 17891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:43.997610', 'step': 17891, 'epoch': 2}
{'type': 'loss', 'content': 0.012835332192480564, 'timestamp': '2025-10-02 00:42:44.008529', 'step': 17892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:44.065824', 'step': 17892, 'epoch': 2}
{'type': 'loss', 'content': 0.09312137961387634, 'timestamp': '2025-10-02 00:42:44.068379', 'step': 17893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:44.126622', 'step': 17893, 'epoch': 2}
{'type': 'loss', 'content': 0.06877611577510834, 'timestamp': '2025-10-02 00:42:44.135978', 'step': 17894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:44.194757', 'step': 17894, 'epoch': 2}
{'type': 'loss', 'content': 0.056328874081373215, 'timestamp': '2025-10-02 00:42:44.197290', 'step': 17895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:44.253508', 'step': 17895, 'epoch': 2}
{'type': 'loss', 'content': 0.025811990723013878, 'timestamp': '2025-10-02 00:42:44.263792', 'step': 17896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:44.320823', 'step': 17896, 'epoch': 2}
{'type': 'loss', 'content': 0.016285832971334457, 'timestamp': '2025-10-02 00:42:44.330051', 'step': 17897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:44.387622', 'step': 17897, 'epoch': 2}
{'type': 'loss', 'content': 0.1309940665960312, 'timestamp': '2025-10-02 00:42:44.390346', 'step': 17898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:44.449877', 'step': 17898, 'epoch': 2}
{'type': 'loss', 'content': 0.027923956513404846, 'timestamp': '2025-10-02 00:42:44.452531', 'step': 17899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:44.513402', 'step': 17899, 'epoch': 2}
{'type': 'loss', 'content': 0.017921267077326775, 'timestamp': '2025-10-02 00:42:44.523731', 'step': 17900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:44.609280', 'step': 17900, 'epoch': 2}
{'type': 'loss', 'content': 0.12388978153467178, 'timestamp': '2025-10-02 00:42:44.616712', 'step': 17901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:44.689191', 'step': 17901, 'epoch': 2}
{'type': 'loss', 'content': 0.04318491742014885, 'timestamp': '2025-10-02 00:42:44.699298', 'step': 17902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:44.757098', 'step': 17902, 'epoch': 2}
{'type': 'loss', 'content': 0.04041087627410889, 'timestamp': '2025-10-02 00:42:44.766655', 'step': 17903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:44.823443', 'step': 17903, 'epoch': 2}
{'type': 'loss', 'content': 0.020065762102603912, 'timestamp': '2025-10-02 00:42:44.830088', 'step': 17904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:44.886898', 'step': 17904, 'epoch': 2}
{'type': 'loss', 'content': 0.09164408594369888, 'timestamp': '2025-10-02 00:42:44.889667', 'step': 17905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:44.946666', 'step': 17905, 'epoch': 2}
{'type': 'loss', 'content': 0.07933585345745087, 'timestamp': '2025-10-02 00:42:44.951354', 'step': 17906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:45.009100', 'step': 17906, 'epoch': 2}
{'type': 'loss', 'content': 0.00909911934286356, 'timestamp': '2025-10-02 00:42:45.016157', 'step': 17907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:45.070980', 'step': 17907, 'epoch': 2}
{'type': 'loss', 'content': 0.09512369334697723, 'timestamp': '2025-10-02 00:42:45.077163', 'step': 17908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:45.131761', 'step': 17908, 'epoch': 2}
{'type': 'loss', 'content': 0.09582652151584625, 'timestamp': '2025-10-02 00:42:45.134428', 'step': 17909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:42:45.208253', 'step': 17909, 'epoch': 2}
{'type': 'loss', 'content': 0.03102230280637741, 'timestamp': '2025-10-02 00:42:45.221460', 'step': 17910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:45.276754', 'step': 17910, 'epoch': 2}
{'type': 'loss', 'content': 0.14299099147319794, 'timestamp': '2025-10-02 00:42:45.279857', 'step': 17911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:45.335398', 'step': 17911, 'epoch': 2}
{'type': 'loss', 'content': 0.09201809018850327, 'timestamp': '2025-10-02 00:42:45.341683', 'step': 17912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:45.396081', 'step': 17912, 'epoch': 2}
{'type': 'loss', 'content': 0.08360636979341507, 'timestamp': '2025-10-02 00:42:45.398856', 'step': 17913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:45.454416', 'step': 17913, 'epoch': 2}
{'type': 'loss', 'content': 0.013797571882605553, 'timestamp': '2025-10-02 00:42:45.457239', 'step': 17914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:45.512628', 'step': 17914, 'epoch': 2}
{'type': 'loss', 'content': 0.033243753015995026, 'timestamp': '2025-10-02 00:42:45.515459', 'step': 17915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:45.570523', 'step': 17915, 'epoch': 2}
{'type': 'loss', 'content': 0.03611164912581444, 'timestamp': '2025-10-02 00:42:45.576608', 'step': 17916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:45.634306', 'step': 17916, 'epoch': 2}
{'type': 'loss', 'content': 0.08571833372116089, 'timestamp': '2025-10-02 00:42:45.645272', 'step': 17917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:45.700147', 'step': 17917, 'epoch': 2}
{'type': 'loss', 'content': 0.08187714219093323, 'timestamp': '2025-10-02 00:42:45.706510', 'step': 17918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:45.767259', 'step': 17918, 'epoch': 2}
{'type': 'loss', 'content': 0.05390894040465355, 'timestamp': '2025-10-02 00:42:45.779412', 'step': 17919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:45.846604', 'step': 17919, 'epoch': 2}
{'type': 'loss', 'content': 0.0602990947663784, 'timestamp': '2025-10-02 00:42:45.855791', 'step': 17920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:45.929673', 'step': 17920, 'epoch': 2}
{'type': 'loss', 'content': 0.04099179431796074, 'timestamp': '2025-10-02 00:42:45.939855', 'step': 17921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:42:46.009196', 'step': 17921, 'epoch': 2}
{'type': 'loss', 'content': 0.024455294013023376, 'timestamp': '2025-10-02 00:42:46.021476', 'step': 17922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:46.077810', 'step': 17922, 'epoch': 2}
{'type': 'loss', 'content': 0.044797658920288086, 'timestamp': '2025-10-02 00:42:46.080232', 'step': 17923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:46.134619', 'step': 17923, 'epoch': 2}
{'type': 'loss', 'content': 0.09917943924665451, 'timestamp': '2025-10-02 00:42:46.140659', 'step': 17924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:46.195120', 'step': 17924, 'epoch': 2}
{'type': 'loss', 'content': 0.05180053412914276, 'timestamp': '2025-10-02 00:42:46.205361', 'step': 17925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:46.264917', 'step': 17925, 'epoch': 2}
{'type': 'loss', 'content': 0.07704685628414154, 'timestamp': '2025-10-02 00:42:46.275108', 'step': 17926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:46.330699', 'step': 17926, 'epoch': 2}
{'type': 'loss', 'content': 0.0303431898355484, 'timestamp': '2025-10-02 00:42:46.333334', 'step': 17927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:46.387870', 'step': 17927, 'epoch': 2}
{'type': 'loss', 'content': 0.08368102461099625, 'timestamp': '2025-10-02 00:42:46.393788', 'step': 17928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:46.448618', 'step': 17928, 'epoch': 2}
{'type': 'loss', 'content': 0.027961326763033867, 'timestamp': '2025-10-02 00:42:46.451605', 'step': 17929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:46.506885', 'step': 17929, 'epoch': 2}
{'type': 'loss', 'content': 0.054960332810878754, 'timestamp': '2025-10-02 00:42:46.509534', 'step': 17930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:46.569104', 'step': 17930, 'epoch': 2}
{'type': 'loss', 'content': 0.05889187380671501, 'timestamp': '2025-10-02 00:42:46.579249', 'step': 17931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:46.638615', 'step': 17931, 'epoch': 2}
{'type': 'loss', 'content': 0.021516188979148865, 'timestamp': '2025-10-02 00:42:46.644420', 'step': 17932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:46.698369', 'step': 17932, 'epoch': 2}
{'type': 'loss', 'content': 0.10188166797161102, 'timestamp': '2025-10-02 00:42:46.703981', 'step': 17933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:46.759177', 'step': 17933, 'epoch': 2}
{'type': 'loss', 'content': 0.01915108785033226, 'timestamp': '2025-10-02 00:42:46.766281', 'step': 17934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:46.822534', 'step': 17934, 'epoch': 2}
{'type': 'loss', 'content': 0.08329214155673981, 'timestamp': '2025-10-02 00:42:46.824961', 'step': 17935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:42:46.892418', 'step': 17935, 'epoch': 2}
{'type': 'loss', 'content': 0.010136022232472897, 'timestamp': '2025-10-02 00:42:46.905147', 'step': 17936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:46.967308', 'step': 17936, 'epoch': 2}
{'type': 'loss', 'content': 0.17918828129768372, 'timestamp': '2025-10-02 00:42:46.979862', 'step': 17937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:47.042426', 'step': 17937, 'epoch': 2}
{'type': 'loss', 'content': 0.10255681723356247, 'timestamp': '2025-10-02 00:42:47.045198', 'step': 17938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:47.100999', 'step': 17938, 'epoch': 2}
{'type': 'loss', 'content': 0.0498371459543705, 'timestamp': '2025-10-02 00:42:47.105414', 'step': 17939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:47.159877', 'step': 17939, 'epoch': 2}
{'type': 'loss', 'content': 0.08045785129070282, 'timestamp': '2025-10-02 00:42:47.165767', 'step': 17940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:47.219609', 'step': 17940, 'epoch': 2}
{'type': 'loss', 'content': 0.04576418921351433, 'timestamp': '2025-10-02 00:42:47.222017', 'step': 17941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:47.277805', 'step': 17941, 'epoch': 2}
{'type': 'loss', 'content': 0.0511460117995739, 'timestamp': '2025-10-02 00:42:47.280769', 'step': 17942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:47.335870', 'step': 17942, 'epoch': 2}
{'type': 'loss', 'content': 0.07233741134405136, 'timestamp': '2025-10-02 00:42:47.338620', 'step': 17943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:47.392756', 'step': 17943, 'epoch': 2}
{'type': 'loss', 'content': 0.12195494771003723, 'timestamp': '2025-10-02 00:42:47.398767', 'step': 17944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:47.452408', 'step': 17944, 'epoch': 2}
{'type': 'loss', 'content': 0.1191248968243599, 'timestamp': '2025-10-02 00:42:47.454891', 'step': 17945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:47.509397', 'step': 17945, 'epoch': 2}
{'type': 'loss', 'content': 0.17516964673995972, 'timestamp': '2025-10-02 00:42:47.512014', 'step': 17946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:47.566684', 'step': 17946, 'epoch': 2}
{'type': 'loss', 'content': 0.09719294309616089, 'timestamp': '2025-10-02 00:42:47.568923', 'step': 17947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:47.623645', 'step': 17947, 'epoch': 2}
{'type': 'loss', 'content': 0.052877120673656464, 'timestamp': '2025-10-02 00:42:47.630252', 'step': 17948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:47.685193', 'step': 17948, 'epoch': 2}
{'type': 'loss', 'content': 0.021526234224438667, 'timestamp': '2025-10-02 00:42:47.687517', 'step': 17949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:47.742629', 'step': 17949, 'epoch': 2}
{'type': 'loss', 'content': 0.16027149558067322, 'timestamp': '2025-10-02 00:42:47.745400', 'step': 17950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:47.801072', 'step': 17950, 'epoch': 2}
{'type': 'loss', 'content': 0.0334656648337841, 'timestamp': '2025-10-02 00:42:47.803434', 'step': 17951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:47.862367', 'step': 17951, 'epoch': 2}
{'type': 'loss', 'content': 0.027670547366142273, 'timestamp': '2025-10-02 00:42:47.873336', 'step': 17952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:47.928453', 'step': 17952, 'epoch': 2}
{'type': 'loss', 'content': 0.010549371130764484, 'timestamp': '2025-10-02 00:42:47.935795', 'step': 17953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:47.990849', 'step': 17953, 'epoch': 2}
{'type': 'loss', 'content': 0.026965491473674774, 'timestamp': '2025-10-02 00:42:47.997938', 'step': 17954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:48.052782', 'step': 17954, 'epoch': 2}
{'type': 'loss', 'content': 0.12740547955036163, 'timestamp': '2025-10-02 00:42:48.055415', 'step': 17955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:48.110711', 'step': 17955, 'epoch': 2}
{'type': 'loss', 'content': 0.09276042878627777, 'timestamp': '2025-10-02 00:42:48.116907', 'step': 17956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:48.173498', 'step': 17956, 'epoch': 2}
{'type': 'loss', 'content': 0.05093558877706528, 'timestamp': '2025-10-02 00:42:48.179069', 'step': 17957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:48.248994', 'step': 17957, 'epoch': 2}
{'type': 'loss', 'content': 0.09318900853395462, 'timestamp': '2025-10-02 00:42:48.254941', 'step': 17958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:48.320727', 'step': 17958, 'epoch': 2}
{'type': 'loss', 'content': 0.15518733859062195, 'timestamp': '2025-10-02 00:42:48.324222', 'step': 17959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:48.378742', 'step': 17959, 'epoch': 2}
{'type': 'loss', 'content': 0.030812829732894897, 'timestamp': '2025-10-02 00:42:48.384645', 'step': 17960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:48.438977', 'step': 17960, 'epoch': 2}
{'type': 'loss', 'content': 0.1504008024930954, 'timestamp': '2025-10-02 00:42:48.441187', 'step': 17961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:48.496550', 'step': 17961, 'epoch': 2}
{'type': 'loss', 'content': 0.018970338627696037, 'timestamp': '2025-10-02 00:42:48.505888', 'step': 17962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:48.561663', 'step': 17962, 'epoch': 2}
{'type': 'loss', 'content': 0.02526177279651165, 'timestamp': '2025-10-02 00:42:48.564144', 'step': 17963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:48.618590', 'step': 17963, 'epoch': 2}
{'type': 'loss', 'content': 0.026865702122449875, 'timestamp': '2025-10-02 00:42:48.624955', 'step': 17964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:48.679578', 'step': 17964, 'epoch': 2}
{'type': 'loss', 'content': 0.05083972215652466, 'timestamp': '2025-10-02 00:42:48.685122', 'step': 17965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:48.739673', 'step': 17965, 'epoch': 2}
{'type': 'loss', 'content': 0.03642532229423523, 'timestamp': '2025-10-02 00:42:48.742183', 'step': 17966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:48.797141', 'step': 17966, 'epoch': 2}
{'type': 'loss', 'content': 0.07288847863674164, 'timestamp': '2025-10-02 00:42:48.806143', 'step': 17967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:48.861326', 'step': 17967, 'epoch': 2}
{'type': 'loss', 'content': 0.14884419739246368, 'timestamp': '2025-10-02 00:42:48.867199', 'step': 17968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:48.921188', 'step': 17968, 'epoch': 2}
{'type': 'loss', 'content': 0.1167697086930275, 'timestamp': '2025-10-02 00:42:48.923410', 'step': 17969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:48.977228', 'step': 17969, 'epoch': 2}
{'type': 'loss', 'content': 0.09983114898204803, 'timestamp': '2025-10-02 00:42:48.979565', 'step': 17970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:49.034156', 'step': 17970, 'epoch': 2}
{'type': 'loss', 'content': 0.06918623298406601, 'timestamp': '2025-10-02 00:42:49.036828', 'step': 17971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:49.091398', 'step': 17971, 'epoch': 2}
{'type': 'loss', 'content': 0.05524884909391403, 'timestamp': '2025-10-02 00:42:49.097821', 'step': 17972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:49.152387', 'step': 17972, 'epoch': 2}
{'type': 'loss', 'content': 0.028804752975702286, 'timestamp': '2025-10-02 00:42:49.159861', 'step': 17973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:49.217711', 'step': 17973, 'epoch': 2}
{'type': 'loss', 'content': 0.07686682045459747, 'timestamp': '2025-10-02 00:42:49.220211', 'step': 17974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:49.275960', 'step': 17974, 'epoch': 2}
{'type': 'loss', 'content': 0.011373471468687057, 'timestamp': '2025-10-02 00:42:49.285526', 'step': 17975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:42:49.347720', 'step': 17975, 'epoch': 2}
{'type': 'loss', 'content': 0.014245082624256611, 'timestamp': '2025-10-02 00:42:49.359181', 'step': 17976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:49.421956', 'step': 17976, 'epoch': 2}
{'type': 'loss', 'content': 0.11217482388019562, 'timestamp': '2025-10-02 00:42:49.424724', 'step': 17977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:49.485825', 'step': 17977, 'epoch': 2}
{'type': 'loss', 'content': 0.023218397051095963, 'timestamp': '2025-10-02 00:42:49.496013', 'step': 17978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:49.550382', 'step': 17978, 'epoch': 2}
{'type': 'loss', 'content': 0.1026720330119133, 'timestamp': '2025-10-02 00:42:49.552785', 'step': 17979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:49.608529', 'step': 17979, 'epoch': 2}
{'type': 'loss', 'content': 0.04610379785299301, 'timestamp': '2025-10-02 00:42:49.618804', 'step': 17980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:49.673169', 'step': 17980, 'epoch': 2}
{'type': 'loss', 'content': 0.047860488295555115, 'timestamp': '2025-10-02 00:42:49.675739', 'step': 17981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:49.731098', 'step': 17981, 'epoch': 2}
{'type': 'loss', 'content': 0.055938512086868286, 'timestamp': '2025-10-02 00:42:49.733640', 'step': 17982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:49.788582', 'step': 17982, 'epoch': 2}
{'type': 'loss', 'content': 0.0971231609582901, 'timestamp': '2025-10-02 00:42:49.790865', 'step': 17983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:49.844980', 'step': 17983, 'epoch': 2}
{'type': 'loss', 'content': 0.08677971363067627, 'timestamp': '2025-10-02 00:42:49.854960', 'step': 17984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:49.910787', 'step': 17984, 'epoch': 2}
{'type': 'loss', 'content': 0.051723919808864594, 'timestamp': '2025-10-02 00:42:49.913049', 'step': 17985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:49.967470', 'step': 17985, 'epoch': 2}
{'type': 'loss', 'content': 0.08613381534814835, 'timestamp': '2025-10-02 00:42:49.970103', 'step': 17986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:50.025277', 'step': 17986, 'epoch': 2}
{'type': 'loss', 'content': 0.08490338921546936, 'timestamp': '2025-10-02 00:42:50.027890', 'step': 17987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:50.082633', 'step': 17987, 'epoch': 2}
{'type': 'loss', 'content': 0.045138973742723465, 'timestamp': '2025-10-02 00:42:50.088708', 'step': 17988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:50.142151', 'step': 17988, 'epoch': 2}
{'type': 'loss', 'content': 0.08984781056642532, 'timestamp': '2025-10-02 00:42:50.144801', 'step': 17989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:50.201549', 'step': 17989, 'epoch': 2}
{'type': 'loss', 'content': 0.08475308120250702, 'timestamp': '2025-10-02 00:42:50.203798', 'step': 17990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:50.258610', 'step': 17990, 'epoch': 2}
{'type': 'loss', 'content': 0.07154087722301483, 'timestamp': '2025-10-02 00:42:50.261151', 'step': 17991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:50.316410', 'step': 17991, 'epoch': 2}
{'type': 'loss', 'content': 0.0035591935738921165, 'timestamp': '2025-10-02 00:42:50.326923', 'step': 17992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:50.383114', 'step': 17992, 'epoch': 2}
{'type': 'loss', 'content': 0.10551313310861588, 'timestamp': '2025-10-02 00:42:50.385637', 'step': 17993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:50.440757', 'step': 17993, 'epoch': 2}
{'type': 'loss', 'content': 0.035695794969797134, 'timestamp': '2025-10-02 00:42:50.448180', 'step': 17994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:50.503396', 'step': 17994, 'epoch': 2}
{'type': 'loss', 'content': 0.05353209003806114, 'timestamp': '2025-10-02 00:42:50.509775', 'step': 17995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:50.572006', 'step': 17995, 'epoch': 2}
{'type': 'loss', 'content': 0.025532716885209084, 'timestamp': '2025-10-02 00:42:50.586865', 'step': 17996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:50.643102', 'step': 17996, 'epoch': 2}
{'type': 'loss', 'content': 0.030070971697568893, 'timestamp': '2025-10-02 00:42:50.646437', 'step': 17997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:50.707161', 'step': 17997, 'epoch': 2}
{'type': 'loss', 'content': 0.07169710844755173, 'timestamp': '2025-10-02 00:42:50.716356', 'step': 17998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:50.771879', 'step': 17998, 'epoch': 2}
{'type': 'loss', 'content': 0.0313325934112072, 'timestamp': '2025-10-02 00:42:50.780874', 'step': 17999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:50.835606', 'step': 17999, 'epoch': 2}
{'type': 'loss', 'content': 0.07443619519472122, 'timestamp': '2025-10-02 00:42:50.842074', 'step': 18000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 18000', 'timestamp': '2025-10-02 00:42:51.291184', 'step': 18000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:42:51.375455', 'step': 18000, 'epoch': 2}
{'type': 'loss', 'content': 0.00847876537591219, 'timestamp': '2025-10-02 00:42:51.390117', 'step': 18001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:51.468404', 'step': 18001, 'epoch': 2}
{'type': 'loss', 'content': 0.04392057657241821, 'timestamp': '2025-10-02 00:42:51.477912', 'step': 18002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:51.556279', 'step': 18002, 'epoch': 2}
{'type': 'loss', 'content': 0.09697052836418152, 'timestamp': '2025-10-02 00:42:51.561795', 'step': 18003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:51.629045', 'step': 18003, 'epoch': 2}
{'type': 'loss', 'content': 0.03181873634457588, 'timestamp': '2025-10-02 00:42:51.643381', 'step': 18004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:51.716576', 'step': 18004, 'epoch': 2}
{'type': 'loss', 'content': 0.022353466600179672, 'timestamp': '2025-10-02 00:42:51.726239', 'step': 18005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:51.805426', 'step': 18005, 'epoch': 2}
{'type': 'loss', 'content': 0.013989850878715515, 'timestamp': '2025-10-02 00:42:51.819141', 'step': 18006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:51.889879', 'step': 18006, 'epoch': 2}
{'type': 'loss', 'content': 0.04302404820919037, 'timestamp': '2025-10-02 00:42:51.911393', 'step': 18007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:42:52.003731', 'step': 18007, 'epoch': 2}
{'type': 'loss', 'content': 0.08045147359371185, 'timestamp': '2025-10-02 00:42:52.023655', 'step': 18008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:52.107661', 'step': 18008, 'epoch': 2}
{'type': 'loss', 'content': 0.03444121405482292, 'timestamp': '2025-10-02 00:42:52.110676', 'step': 18009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:52.169135', 'step': 18009, 'epoch': 2}
{'type': 'loss', 'content': 0.1433888077735901, 'timestamp': '2025-10-02 00:42:52.171801', 'step': 18010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:52.230123', 'step': 18010, 'epoch': 2}
{'type': 'loss', 'content': 0.04508063197135925, 'timestamp': '2025-10-02 00:42:52.233441', 'step': 18011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:52.290523', 'step': 18011, 'epoch': 2}
{'type': 'loss', 'content': 0.10303692519664764, 'timestamp': '2025-10-02 00:42:52.300270', 'step': 18012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:42:52.374292', 'step': 18012, 'epoch': 2}
{'type': 'loss', 'content': 0.04488852992653847, 'timestamp': '2025-10-02 00:42:52.388710', 'step': 18013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:52.445049', 'step': 18013, 'epoch': 2}
{'type': 'loss', 'content': 0.10425771027803421, 'timestamp': '2025-10-02 00:42:52.452239', 'step': 18014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:52.509879', 'step': 18014, 'epoch': 2}
{'type': 'loss', 'content': 0.11779309809207916, 'timestamp': '2025-10-02 00:42:52.512618', 'step': 18015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:52.569043', 'step': 18015, 'epoch': 2}
{'type': 'loss', 'content': 0.05279604718089104, 'timestamp': '2025-10-02 00:42:52.575844', 'step': 18016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:52.632645', 'step': 18016, 'epoch': 2}
{'type': 'loss', 'content': 0.11062343418598175, 'timestamp': '2025-10-02 00:42:52.641774', 'step': 18017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:52.698267', 'step': 18017, 'epoch': 2}
{'type': 'loss', 'content': 0.12985819578170776, 'timestamp': '2025-10-02 00:42:52.701319', 'step': 18018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:52.756606', 'step': 18018, 'epoch': 2}
{'type': 'loss', 'content': 0.09559120237827301, 'timestamp': '2025-10-02 00:42:52.762216', 'step': 18019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:52.823494', 'step': 18019, 'epoch': 2}
{'type': 'loss', 'content': 0.08269385993480682, 'timestamp': '2025-10-02 00:42:52.830120', 'step': 18020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:52.885147', 'step': 18020, 'epoch': 2}
{'type': 'loss', 'content': 0.04018344357609749, 'timestamp': '2025-10-02 00:42:52.892394', 'step': 18021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:42:52.961161', 'step': 18021, 'epoch': 2}
{'type': 'loss', 'content': 0.008341054432094097, 'timestamp': '2025-10-02 00:42:52.972007', 'step': 18022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:53.030522', 'step': 18022, 'epoch': 2}
{'type': 'loss', 'content': 0.04536467418074608, 'timestamp': '2025-10-02 00:42:53.036092', 'step': 18023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:53.101696', 'step': 18023, 'epoch': 2}
{'type': 'loss', 'content': 0.026593344286084175, 'timestamp': '2025-10-02 00:42:53.112920', 'step': 18024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:53.173623', 'step': 18024, 'epoch': 2}
{'type': 'loss', 'content': 0.051289670169353485, 'timestamp': '2025-10-02 00:42:53.176983', 'step': 18025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:53.245516', 'step': 18025, 'epoch': 2}
{'type': 'loss', 'content': 0.03425107151269913, 'timestamp': '2025-10-02 00:42:53.249226', 'step': 18026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:53.313067', 'step': 18026, 'epoch': 2}
{'type': 'loss', 'content': 0.0900859385728836, 'timestamp': '2025-10-02 00:42:53.320206', 'step': 18027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:42:53.381123', 'step': 18027, 'epoch': 2}
{'type': 'loss', 'content': 0.0211520716547966, 'timestamp': '2025-10-02 00:42:53.392059', 'step': 18028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:53.448116', 'step': 18028, 'epoch': 2}
{'type': 'loss', 'content': 0.06006607413291931, 'timestamp': '2025-10-02 00:42:53.455506', 'step': 18029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:53.514490', 'step': 18029, 'epoch': 2}
{'type': 'loss', 'content': 0.0252715814858675, 'timestamp': '2025-10-02 00:42:53.523604', 'step': 18030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:53.579339', 'step': 18030, 'epoch': 2}
{'type': 'loss', 'content': 0.04775484651327133, 'timestamp': '2025-10-02 00:42:53.581672', 'step': 18031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:53.636558', 'step': 18031, 'epoch': 2}
{'type': 'loss', 'content': 0.028164830058813095, 'timestamp': '2025-10-02 00:42:53.643412', 'step': 18032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:42:53.697387', 'step': 18032, 'epoch': 2}
{'type': 'loss', 'content': 0.22498777508735657, 'timestamp': '2025-10-02 00:42:53.699762', 'step': 18033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:53.754314', 'step': 18033, 'epoch': 2}
{'type': 'loss', 'content': 0.0838000625371933, 'timestamp': '2025-10-02 00:42:53.759898', 'step': 18034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:53.815771', 'step': 18034, 'epoch': 2}
{'type': 'loss', 'content': 0.10201577842235565, 'timestamp': '2025-10-02 00:42:53.818291', 'step': 18035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:53.873071', 'step': 18035, 'epoch': 2}
{'type': 'loss', 'content': 0.08776717633008957, 'timestamp': '2025-10-02 00:42:53.882761', 'step': 18036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:53.937426', 'step': 18036, 'epoch': 2}
{'type': 'loss', 'content': 0.022962355986237526, 'timestamp': '2025-10-02 00:42:53.939902', 'step': 18037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:53.995270', 'step': 18037, 'epoch': 2}
{'type': 'loss', 'content': 0.02565515600144863, 'timestamp': '2025-10-02 00:42:54.002628', 'step': 18038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:54.057501', 'step': 18038, 'epoch': 2}
{'type': 'loss', 'content': 0.10806657373905182, 'timestamp': '2025-10-02 00:42:54.059966', 'step': 18039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:54.115065', 'step': 18039, 'epoch': 2}
{'type': 'loss', 'content': 0.058311305940151215, 'timestamp': '2025-10-02 00:42:54.122940', 'step': 18040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:54.176988', 'step': 18040, 'epoch': 2}
{'type': 'loss', 'content': 0.09555136412382126, 'timestamp': '2025-10-02 00:42:54.179547', 'step': 18041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:54.235296', 'step': 18041, 'epoch': 2}
{'type': 'loss', 'content': 0.01022881455719471, 'timestamp': '2025-10-02 00:42:54.244831', 'step': 18042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:54.300178', 'step': 18042, 'epoch': 2}
{'type': 'loss', 'content': 0.12949877977371216, 'timestamp': '2025-10-02 00:42:54.302785', 'step': 18043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:54.357391', 'step': 18043, 'epoch': 2}
{'type': 'loss', 'content': 0.05546925216913223, 'timestamp': '2025-10-02 00:42:54.363410', 'step': 18044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:54.418078', 'step': 18044, 'epoch': 2}
{'type': 'loss', 'content': 0.0971546322107315, 'timestamp': '2025-10-02 00:42:54.420466', 'step': 18045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:54.475617', 'step': 18045, 'epoch': 2}
{'type': 'loss', 'content': 0.03197662904858589, 'timestamp': '2025-10-02 00:42:54.481252', 'step': 18046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:54.538678', 'step': 18046, 'epoch': 2}
{'type': 'loss', 'content': 0.06579715013504028, 'timestamp': '2025-10-02 00:42:54.541850', 'step': 18047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:54.606730', 'step': 18047, 'epoch': 2}
{'type': 'loss', 'content': 0.09072849899530411, 'timestamp': '2025-10-02 00:42:54.617974', 'step': 18048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:54.672477', 'step': 18048, 'epoch': 2}
{'type': 'loss', 'content': 0.10571642965078354, 'timestamp': '2025-10-02 00:42:54.687556', 'step': 18049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:42:54.755318', 'step': 18049, 'epoch': 2}
{'type': 'loss', 'content': 0.03394074738025665, 'timestamp': '2025-10-02 00:42:54.765789', 'step': 18050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:42:54.840944', 'step': 18050, 'epoch': 2}
{'type': 'loss', 'content': 0.0052866810001432896, 'timestamp': '2025-10-02 00:42:54.854425', 'step': 18051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:54.909333', 'step': 18051, 'epoch': 2}
{'type': 'loss', 'content': 0.044832952320575714, 'timestamp': '2025-10-02 00:42:54.917237', 'step': 18052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:54.971334', 'step': 18052, 'epoch': 2}
{'type': 'loss', 'content': 0.05875347927212715, 'timestamp': '2025-10-02 00:42:54.973888', 'step': 18053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:55.028729', 'step': 18053, 'epoch': 2}
{'type': 'loss', 'content': 0.043790608644485474, 'timestamp': '2025-10-02 00:42:55.031108', 'step': 18054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:55.085575', 'step': 18054, 'epoch': 2}
{'type': 'loss', 'content': 0.06402313709259033, 'timestamp': '2025-10-02 00:42:55.094647', 'step': 18055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:55.149824', 'step': 18055, 'epoch': 2}
{'type': 'loss', 'content': 0.029724398627877235, 'timestamp': '2025-10-02 00:42:55.156637', 'step': 18056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:55.211654', 'step': 18056, 'epoch': 2}
{'type': 'loss', 'content': 0.015689345076680183, 'timestamp': '2025-10-02 00:42:55.221904', 'step': 18057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:55.276848', 'step': 18057, 'epoch': 2}
{'type': 'loss', 'content': 0.03238305449485779, 'timestamp': '2025-10-02 00:42:55.279230', 'step': 18058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:55.333300', 'step': 18058, 'epoch': 2}
{'type': 'loss', 'content': 0.12172224372625351, 'timestamp': '2025-10-02 00:42:55.335743', 'step': 18059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:55.391045', 'step': 18059, 'epoch': 2}
{'type': 'loss', 'content': 0.04535887390375137, 'timestamp': '2025-10-02 00:42:55.401165', 'step': 18060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:55.455655', 'step': 18060, 'epoch': 2}
{'type': 'loss', 'content': 0.03127831220626831, 'timestamp': '2025-10-02 00:42:55.458566', 'step': 18061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:55.513409', 'step': 18061, 'epoch': 2}
{'type': 'loss', 'content': 0.09804616868495941, 'timestamp': '2025-10-02 00:42:55.516413', 'step': 18062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:55.571957', 'step': 18062, 'epoch': 2}
{'type': 'loss', 'content': 0.03996696695685387, 'timestamp': '2025-10-02 00:42:55.574325', 'step': 18063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:42:55.629607', 'step': 18063, 'epoch': 2}
{'type': 'loss', 'content': 0.09801848232746124, 'timestamp': '2025-10-02 00:42:55.636832', 'step': 18064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:55.690719', 'step': 18064, 'epoch': 2}
{'type': 'loss', 'content': 0.17476138472557068, 'timestamp': '2025-10-02 00:42:55.694216', 'step': 18065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:55.750430', 'step': 18065, 'epoch': 2}
{'type': 'loss', 'content': 0.01690894551575184, 'timestamp': '2025-10-02 00:42:55.759591', 'step': 18066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:55.816424', 'step': 18066, 'epoch': 2}
{'type': 'loss', 'content': 0.05877496674656868, 'timestamp': '2025-10-02 00:42:55.822015', 'step': 18067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:42:55.896269', 'step': 18067, 'epoch': 2}
{'type': 'loss', 'content': 0.009687722660601139, 'timestamp': '2025-10-02 00:42:55.906578', 'step': 18068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:55.961659', 'step': 18068, 'epoch': 2}
{'type': 'loss', 'content': 0.05356923118233681, 'timestamp': '2025-10-02 00:42:55.964640', 'step': 18069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:56.020868', 'step': 18069, 'epoch': 2}
{'type': 'loss', 'content': 0.040189821273088455, 'timestamp': '2025-10-02 00:42:56.031776', 'step': 18070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:42:56.088044', 'step': 18070, 'epoch': 2}
{'type': 'loss', 'content': 0.1430087834596634, 'timestamp': '2025-10-02 00:42:56.090869', 'step': 18071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:56.146587', 'step': 18071, 'epoch': 2}
{'type': 'loss', 'content': 0.03881341964006424, 'timestamp': '2025-10-02 00:42:56.154563', 'step': 18072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:56.207794', 'step': 18072, 'epoch': 2}
{'type': 'loss', 'content': 0.06640450656414032, 'timestamp': '2025-10-02 00:42:56.210503', 'step': 18073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:56.265031', 'step': 18073, 'epoch': 2}
{'type': 'loss', 'content': 0.011234940961003304, 'timestamp': '2025-10-02 00:42:56.267360', 'step': 18074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:56.322236', 'step': 18074, 'epoch': 2}
{'type': 'loss', 'content': 0.041624829173088074, 'timestamp': '2025-10-02 00:42:56.324834', 'step': 18075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:56.379428', 'step': 18075, 'epoch': 2}
{'type': 'loss', 'content': 0.05985032021999359, 'timestamp': '2025-10-02 00:42:56.385457', 'step': 18076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:42:56.440821', 'step': 18076, 'epoch': 2}
{'type': 'loss', 'content': 0.09232117235660553, 'timestamp': '2025-10-02 00:42:56.446375', 'step': 18077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:56.500763', 'step': 18077, 'epoch': 2}
{'type': 'loss', 'content': 0.05802476406097412, 'timestamp': '2025-10-02 00:42:56.509811', 'step': 18078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:42:56.565313', 'step': 18078, 'epoch': 2}
{'type': 'loss', 'content': 0.03821021690964699, 'timestamp': '2025-10-02 00:42:56.574498', 'step': 18079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:56.629743', 'step': 18079, 'epoch': 2}
{'type': 'loss', 'content': 0.0190451480448246, 'timestamp': '2025-10-02 00:42:56.635740', 'step': 18080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:56.689732', 'step': 18080, 'epoch': 2}
{'type': 'loss', 'content': 0.0812123492360115, 'timestamp': '2025-10-02 00:42:56.692305', 'step': 18081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:56.747526', 'step': 18081, 'epoch': 2}
{'type': 'loss', 'content': 0.04760728031396866, 'timestamp': '2025-10-02 00:42:56.754807', 'step': 18082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:56.809818', 'step': 18082, 'epoch': 2}
{'type': 'loss', 'content': 0.09398816525936127, 'timestamp': '2025-10-02 00:42:56.811999', 'step': 18083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:56.866980', 'step': 18083, 'epoch': 2}
{'type': 'loss', 'content': 0.09045352786779404, 'timestamp': '2025-10-02 00:42:56.873178', 'step': 18084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:56.928001', 'step': 18084, 'epoch': 2}
{'type': 'loss', 'content': 0.1539943665266037, 'timestamp': '2025-10-02 00:42:56.930585', 'step': 18085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:42:56.985074', 'step': 18085, 'epoch': 2}
{'type': 'loss', 'content': 0.03627451881766319, 'timestamp': '2025-10-02 00:42:56.988299', 'step': 18086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:42:57.043652', 'step': 18086, 'epoch': 2}
{'type': 'loss', 'content': 0.08529819548130035, 'timestamp': '2025-10-02 00:42:57.047503', 'step': 18087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:57.106421', 'step': 18087, 'epoch': 2}
{'type': 'loss', 'content': 0.041644152253866196, 'timestamp': '2025-10-02 00:42:57.112831', 'step': 18088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:42:57.188221', 'step': 18088, 'epoch': 2}
{'type': 'loss', 'content': 0.053166553378105164, 'timestamp': '2025-10-02 00:42:57.191150', 'step': 18089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:42:57.259890', 'step': 18089, 'epoch': 2}
{'type': 'loss', 'content': 0.027803199365735054, 'timestamp': '2025-10-02 00:42:57.267021', 'step': 18090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:57.322919', 'step': 18090, 'epoch': 2}
{'type': 'loss', 'content': 0.03598981723189354, 'timestamp': '2025-10-02 00:42:57.325351', 'step': 18091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:42:57.381350', 'step': 18091, 'epoch': 2}
{'type': 'loss', 'content': 0.060534168034791946, 'timestamp': '2025-10-02 00:42:57.387336', 'step': 18092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:42:57.441820', 'step': 18092, 'epoch': 2}
{'type': 'loss', 'content': 0.07116631418466568, 'timestamp': '2025-10-02 00:42:57.444067', 'step': 18093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:42:57.499497', 'step': 18093, 'epoch': 2}
{'type': 'loss', 'content': 0.1175178736448288, 'timestamp': '2025-10-02 00:42:57.501678', 'step': 18094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:42:57.556672', 'step': 18094, 'epoch': 2}
{'type': 'loss', 'content': 0.03134627640247345, 'timestamp': '2025-10-02 00:42:57.559324', 'step': 18095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:42:57.615577', 'step': 18095, 'epoch': 2}
{'type': 'loss', 'content': 0.09212010353803635, 'timestamp': '2025-10-02 00:42:57.622181', 'step': 18096, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:43:25.207958', 'step': 18096, 'epoch': 2}
{'type': 'pplx', 'content': 99.92365811760332, 'timestamp': '2025-10-02 00:43:25.216187', 'step': 18096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:25.276825', 'step': 18096, 'epoch': 2}
{'type': 'loss', 'content': 0.07356849312782288, 'timestamp': '2025-10-02 00:43:25.281303', 'step': 18097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:25.346395', 'step': 18097, 'epoch': 2}
{'type': 'loss', 'content': 0.10245230048894882, 'timestamp': '2025-10-02 00:43:25.353540', 'step': 18098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:25.417988', 'step': 18098, 'epoch': 2}
{'type': 'loss', 'content': 0.039888426661491394, 'timestamp': '2025-10-02 00:43:25.426107', 'step': 18099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:25.483675', 'step': 18099, 'epoch': 2}
{'type': 'loss', 'content': 0.13481204211711884, 'timestamp': '2025-10-02 00:43:25.490463', 'step': 18100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:25.553902', 'step': 18100, 'epoch': 2}
{'type': 'loss', 'content': 0.14653028547763824, 'timestamp': '2025-10-02 00:43:25.558327', 'step': 18101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:25.616329', 'step': 18101, 'epoch': 2}
{'type': 'loss', 'content': 0.028532562777400017, 'timestamp': '2025-10-02 00:43:25.625916', 'step': 18102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:25.691822', 'step': 18102, 'epoch': 2}
{'type': 'loss', 'content': 0.07976642996072769, 'timestamp': '2025-10-02 00:43:25.701974', 'step': 18103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:25.759874', 'step': 18103, 'epoch': 2}
{'type': 'loss', 'content': 0.018224291503429413, 'timestamp': '2025-10-02 00:43:25.769632', 'step': 18104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:25.830560', 'step': 18104, 'epoch': 2}
{'type': 'loss', 'content': 0.019780145958065987, 'timestamp': '2025-10-02 00:43:25.834953', 'step': 18105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:25.899715', 'step': 18105, 'epoch': 2}
{'type': 'loss', 'content': 0.05470843240618706, 'timestamp': '2025-10-02 00:43:25.903926', 'step': 18106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:25.964333', 'step': 18106, 'epoch': 2}
{'type': 'loss', 'content': 0.08921440690755844, 'timestamp': '2025-10-02 00:43:25.968361', 'step': 18107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:26.028607', 'step': 18107, 'epoch': 2}
{'type': 'loss', 'content': 0.02980457805097103, 'timestamp': '2025-10-02 00:43:26.036422', 'step': 18108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:43:26.098666', 'step': 18108, 'epoch': 2}
{'type': 'loss', 'content': 0.10977742820978165, 'timestamp': '2025-10-02 00:43:26.103356', 'step': 18109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:26.166441', 'step': 18109, 'epoch': 2}
{'type': 'loss', 'content': 0.0513322539627552, 'timestamp': '2025-10-02 00:43:26.173330', 'step': 18110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:26.238449', 'step': 18110, 'epoch': 2}
{'type': 'loss', 'content': 0.09612955898046494, 'timestamp': '2025-10-02 00:43:26.245485', 'step': 18111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:26.305504', 'step': 18111, 'epoch': 2}
{'type': 'loss', 'content': 0.06566115468740463, 'timestamp': '2025-10-02 00:43:26.313939', 'step': 18112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:26.371912', 'step': 18112, 'epoch': 2}
{'type': 'loss', 'content': 0.07898514717817307, 'timestamp': '2025-10-02 00:43:26.374793', 'step': 18113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:26.441433', 'step': 18113, 'epoch': 2}
{'type': 'loss', 'content': 0.1248612254858017, 'timestamp': '2025-10-02 00:43:26.444737', 'step': 18114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:26.503754', 'step': 18114, 'epoch': 2}
{'type': 'loss', 'content': 0.019493913277983665, 'timestamp': '2025-10-02 00:43:26.509316', 'step': 18115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:26.573407', 'step': 18115, 'epoch': 2}
{'type': 'loss', 'content': 0.10993874818086624, 'timestamp': '2025-10-02 00:43:26.580030', 'step': 18116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:26.638021', 'step': 18116, 'epoch': 2}
{'type': 'loss', 'content': 0.007671816274523735, 'timestamp': '2025-10-02 00:43:26.643690', 'step': 18117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:26.700535', 'step': 18117, 'epoch': 2}
{'type': 'loss', 'content': 0.045270077884197235, 'timestamp': '2025-10-02 00:43:26.707962', 'step': 18118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:26.773552', 'step': 18118, 'epoch': 2}
{'type': 'loss', 'content': 0.088991180062294, 'timestamp': '2025-10-02 00:43:26.783807', 'step': 18119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:26.850103', 'step': 18119, 'epoch': 2}
{'type': 'loss', 'content': 0.03597184270620346, 'timestamp': '2025-10-02 00:43:26.863992', 'step': 18120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:26.932771', 'step': 18120, 'epoch': 2}
{'type': 'loss', 'content': 0.002360131125897169, 'timestamp': '2025-10-02 00:43:26.940129', 'step': 18121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:26.996962', 'step': 18121, 'epoch': 2}
{'type': 'loss', 'content': 0.037324871867895126, 'timestamp': '2025-10-02 00:43:27.002592', 'step': 18122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:27.059804', 'step': 18122, 'epoch': 2}
{'type': 'loss', 'content': 0.0124849583953619, 'timestamp': '2025-10-02 00:43:27.065157', 'step': 18123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:27.140074', 'step': 18123, 'epoch': 2}
{'type': 'loss', 'content': 0.029767854139208794, 'timestamp': '2025-10-02 00:43:27.150358', 'step': 18124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:27.228147', 'step': 18124, 'epoch': 2}
{'type': 'loss', 'content': 0.029006756842136383, 'timestamp': '2025-10-02 00:43:27.237620', 'step': 18125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:27.295606', 'step': 18125, 'epoch': 2}
{'type': 'loss', 'content': 0.011134138330817223, 'timestamp': '2025-10-02 00:43:27.304995', 'step': 18126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:43:27.377502', 'step': 18126, 'epoch': 2}
{'type': 'loss', 'content': 0.003205670742318034, 'timestamp': '2025-10-02 00:43:27.388182', 'step': 18127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:27.453670', 'step': 18127, 'epoch': 2}
{'type': 'loss', 'content': 0.08788544684648514, 'timestamp': '2025-10-02 00:43:27.468955', 'step': 18128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:27.545216', 'step': 18128, 'epoch': 2}
{'type': 'loss', 'content': 0.049255818128585815, 'timestamp': '2025-10-02 00:43:27.548172', 'step': 18129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:27.608675', 'step': 18129, 'epoch': 2}
{'type': 'loss', 'content': 0.01927286945283413, 'timestamp': '2025-10-02 00:43:27.616082', 'step': 18130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:27.684278', 'step': 18130, 'epoch': 2}
{'type': 'loss', 'content': 0.18365703523159027, 'timestamp': '2025-10-02 00:43:27.696126', 'step': 18131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:27.753614', 'step': 18131, 'epoch': 2}
{'type': 'loss', 'content': 0.07186631858348846, 'timestamp': '2025-10-02 00:43:27.761731', 'step': 18132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:27.826803', 'step': 18132, 'epoch': 2}
{'type': 'loss', 'content': 0.05908072739839554, 'timestamp': '2025-10-02 00:43:27.831836', 'step': 18133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:27.890866', 'step': 18133, 'epoch': 2}
{'type': 'loss', 'content': 0.014927486889064312, 'timestamp': '2025-10-02 00:43:27.900396', 'step': 18134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:27.962596', 'step': 18134, 'epoch': 2}
{'type': 'loss', 'content': 0.14515161514282227, 'timestamp': '2025-10-02 00:43:27.970445', 'step': 18135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:28.045789', 'step': 18135, 'epoch': 2}
{'type': 'loss', 'content': 0.06262729316949844, 'timestamp': '2025-10-02 00:43:28.054587', 'step': 18136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:28.121023', 'step': 18136, 'epoch': 2}
{'type': 'loss', 'content': 0.013941720128059387, 'timestamp': '2025-10-02 00:43:28.124121', 'step': 18137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:28.185674', 'step': 18137, 'epoch': 2}
{'type': 'loss', 'content': 0.06653033196926117, 'timestamp': '2025-10-02 00:43:28.191658', 'step': 18138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:28.252162', 'step': 18138, 'epoch': 2}
{'type': 'loss', 'content': 0.07410091161727905, 'timestamp': '2025-10-02 00:43:28.259371', 'step': 18139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:28.317428', 'step': 18139, 'epoch': 2}
{'type': 'loss', 'content': 0.04389657452702522, 'timestamp': '2025-10-02 00:43:28.327754', 'step': 18140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:28.392816', 'step': 18140, 'epoch': 2}
{'type': 'loss', 'content': 0.05733662471175194, 'timestamp': '2025-10-02 00:43:28.396082', 'step': 18141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:28.455991', 'step': 18141, 'epoch': 2}
{'type': 'loss', 'content': 0.06178940087556839, 'timestamp': '2025-10-02 00:43:28.459441', 'step': 18142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:28.528157', 'step': 18142, 'epoch': 2}
{'type': 'loss', 'content': 0.023398103192448616, 'timestamp': '2025-10-02 00:43:28.537685', 'step': 18143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:28.601270', 'step': 18143, 'epoch': 2}
{'type': 'loss', 'content': 0.002804831136018038, 'timestamp': '2025-10-02 00:43:28.608193', 'step': 18144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:28.663708', 'step': 18144, 'epoch': 2}
{'type': 'loss', 'content': 0.08309127390384674, 'timestamp': '2025-10-02 00:43:28.667134', 'step': 18145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:28.726535', 'step': 18145, 'epoch': 2}
{'type': 'loss', 'content': 0.050491634756326675, 'timestamp': '2025-10-02 00:43:28.730237', 'step': 18146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:28.793343', 'step': 18146, 'epoch': 2}
{'type': 'loss', 'content': 0.0414983406662941, 'timestamp': '2025-10-02 00:43:28.798955', 'step': 18147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:28.859626', 'step': 18147, 'epoch': 2}
{'type': 'loss', 'content': 0.15131527185440063, 'timestamp': '2025-10-02 00:43:28.868769', 'step': 18148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:43:28.948056', 'step': 18148, 'epoch': 2}
{'type': 'loss', 'content': 0.05806306004524231, 'timestamp': '2025-10-02 00:43:28.961033', 'step': 18149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:29.022444', 'step': 18149, 'epoch': 2}
{'type': 'loss', 'content': 0.025478173047304153, 'timestamp': '2025-10-02 00:43:29.031596', 'step': 18150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:29.087905', 'step': 18150, 'epoch': 2}
{'type': 'loss', 'content': 0.02126096375286579, 'timestamp': '2025-10-02 00:43:29.090831', 'step': 18151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:29.147665', 'step': 18151, 'epoch': 2}
{'type': 'loss', 'content': 0.1081036776304245, 'timestamp': '2025-10-02 00:43:29.155738', 'step': 18152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:29.215163', 'step': 18152, 'epoch': 2}
{'type': 'loss', 'content': 0.04652596637606621, 'timestamp': '2025-10-02 00:43:29.217570', 'step': 18153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:29.276113', 'step': 18153, 'epoch': 2}
{'type': 'loss', 'content': 0.04842931777238846, 'timestamp': '2025-10-02 00:43:29.285349', 'step': 18154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:29.347300', 'step': 18154, 'epoch': 2}
{'type': 'loss', 'content': 0.101292185485363, 'timestamp': '2025-10-02 00:43:29.354633', 'step': 18155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:29.418267', 'step': 18155, 'epoch': 2}
{'type': 'loss', 'content': 0.04814523831009865, 'timestamp': '2025-10-02 00:43:29.426285', 'step': 18156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:29.484962', 'step': 18156, 'epoch': 2}
{'type': 'loss', 'content': 0.05988197401165962, 'timestamp': '2025-10-02 00:43:29.488326', 'step': 18157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:29.550961', 'step': 18157, 'epoch': 2}
{'type': 'loss', 'content': 0.11331496387720108, 'timestamp': '2025-10-02 00:43:29.556687', 'step': 18158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:29.623695', 'step': 18158, 'epoch': 2}
{'type': 'loss', 'content': 0.04755772650241852, 'timestamp': '2025-10-02 00:43:29.632667', 'step': 18159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:29.693433', 'step': 18159, 'epoch': 2}
{'type': 'loss', 'content': 0.022308163344860077, 'timestamp': '2025-10-02 00:43:29.701867', 'step': 18160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:29.763294', 'step': 18160, 'epoch': 2}
{'type': 'loss', 'content': 0.10715068131685257, 'timestamp': '2025-10-02 00:43:29.769103', 'step': 18161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:29.827815', 'step': 18161, 'epoch': 2}
{'type': 'loss', 'content': 0.09308060258626938, 'timestamp': '2025-10-02 00:43:29.830973', 'step': 18162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:29.892880', 'step': 18162, 'epoch': 2}
{'type': 'loss', 'content': 0.016300087794661522, 'timestamp': '2025-10-02 00:43:29.895749', 'step': 18163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:29.951721', 'step': 18163, 'epoch': 2}
{'type': 'loss', 'content': 0.10251779109239578, 'timestamp': '2025-10-02 00:43:29.958899', 'step': 18164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:30.026169', 'step': 18164, 'epoch': 2}
{'type': 'loss', 'content': 0.09847652167081833, 'timestamp': '2025-10-02 00:43:30.028910', 'step': 18165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:30.087942', 'step': 18165, 'epoch': 2}
{'type': 'loss', 'content': 0.04564043506979942, 'timestamp': '2025-10-02 00:43:30.095263', 'step': 18166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:30.153634', 'step': 18166, 'epoch': 2}
{'type': 'loss', 'content': 0.08378206193447113, 'timestamp': '2025-10-02 00:43:30.156274', 'step': 18167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:30.214694', 'step': 18167, 'epoch': 2}
{'type': 'loss', 'content': 0.024977151304483414, 'timestamp': '2025-10-02 00:43:30.225024', 'step': 18168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:30.283472', 'step': 18168, 'epoch': 2}
{'type': 'loss', 'content': 0.06119387596845627, 'timestamp': '2025-10-02 00:43:30.286219', 'step': 18169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:30.350461', 'step': 18169, 'epoch': 2}
{'type': 'loss', 'content': 0.05548805370926857, 'timestamp': '2025-10-02 00:43:30.358357', 'step': 18170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:30.423815', 'step': 18170, 'epoch': 2}
{'type': 'loss', 'content': 0.06382950395345688, 'timestamp': '2025-10-02 00:43:30.433393', 'step': 18171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:30.499674', 'step': 18171, 'epoch': 2}
{'type': 'loss', 'content': 0.07698746770620346, 'timestamp': '2025-10-02 00:43:30.507223', 'step': 18172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:30.569351', 'step': 18172, 'epoch': 2}
{'type': 'loss', 'content': 0.03815321996808052, 'timestamp': '2025-10-02 00:43:30.576654', 'step': 18173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:30.633809', 'step': 18173, 'epoch': 2}
{'type': 'loss', 'content': 0.05538136139512062, 'timestamp': '2025-10-02 00:43:30.636747', 'step': 18174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:30.712170', 'step': 18174, 'epoch': 2}
{'type': 'loss', 'content': 0.051970623433589935, 'timestamp': '2025-10-02 00:43:30.715506', 'step': 18175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:30.773813', 'step': 18175, 'epoch': 2}
{'type': 'loss', 'content': 0.09641751646995544, 'timestamp': '2025-10-02 00:43:30.780370', 'step': 18176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:30.849638', 'step': 18176, 'epoch': 2}
{'type': 'loss', 'content': 0.03743759170174599, 'timestamp': '2025-10-02 00:43:30.854763', 'step': 18177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:30.910519', 'step': 18177, 'epoch': 2}
{'type': 'loss', 'content': 0.06558211892843246, 'timestamp': '2025-10-02 00:43:30.913273', 'step': 18178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:30.969688', 'step': 18178, 'epoch': 2}
{'type': 'loss', 'content': 0.01766517199575901, 'timestamp': '2025-10-02 00:43:30.975359', 'step': 18179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:31.033579', 'step': 18179, 'epoch': 2}
{'type': 'loss', 'content': 0.0739063173532486, 'timestamp': '2025-10-02 00:43:31.040126', 'step': 18180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:31.102380', 'step': 18180, 'epoch': 2}
{'type': 'loss', 'content': 0.05169399455189705, 'timestamp': '2025-10-02 00:43:31.105232', 'step': 18181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:31.162231', 'step': 18181, 'epoch': 2}
{'type': 'loss', 'content': 0.020841166377067566, 'timestamp': '2025-10-02 00:43:31.171293', 'step': 18182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:31.232856', 'step': 18182, 'epoch': 2}
{'type': 'loss', 'content': 0.02722933329641819, 'timestamp': '2025-10-02 00:43:31.236238', 'step': 18183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:43:31.318822', 'step': 18183, 'epoch': 2}
{'type': 'loss', 'content': 0.044527776539325714, 'timestamp': '2025-10-02 00:43:31.333033', 'step': 18184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:31.388051', 'step': 18184, 'epoch': 2}
{'type': 'loss', 'content': 0.10121658444404602, 'timestamp': '2025-10-02 00:43:31.398725', 'step': 18185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:31.466124', 'step': 18185, 'epoch': 2}
{'type': 'loss', 'content': 0.04276791214942932, 'timestamp': '2025-10-02 00:43:31.469289', 'step': 18186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:31.526115', 'step': 18186, 'epoch': 2}
{'type': 'loss', 'content': 0.14030620455741882, 'timestamp': '2025-10-02 00:43:31.530746', 'step': 18187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:31.593370', 'step': 18187, 'epoch': 2}
{'type': 'loss', 'content': 0.028477070853114128, 'timestamp': '2025-10-02 00:43:31.599942', 'step': 18188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:31.655736', 'step': 18188, 'epoch': 2}
{'type': 'loss', 'content': 0.03148888424038887, 'timestamp': '2025-10-02 00:43:31.666570', 'step': 18189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:31.734242', 'step': 18189, 'epoch': 2}
{'type': 'loss', 'content': 0.18570618331432343, 'timestamp': '2025-10-02 00:43:31.740382', 'step': 18190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:31.809391', 'step': 18190, 'epoch': 2}
{'type': 'loss', 'content': 0.1202600747346878, 'timestamp': '2025-10-02 00:43:31.812957', 'step': 18191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:31.868339', 'step': 18191, 'epoch': 2}
{'type': 'loss', 'content': 0.08138889819383621, 'timestamp': '2025-10-02 00:43:31.874604', 'step': 18192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:31.929987', 'step': 18192, 'epoch': 2}
{'type': 'loss', 'content': 0.11271750181913376, 'timestamp': '2025-10-02 00:43:31.933262', 'step': 18193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:31.991361', 'step': 18193, 'epoch': 2}
{'type': 'loss', 'content': 0.05711914598941803, 'timestamp': '2025-10-02 00:43:31.997456', 'step': 18194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:32.058404', 'step': 18194, 'epoch': 2}
{'type': 'loss', 'content': 0.055088188499212265, 'timestamp': '2025-10-02 00:43:32.064732', 'step': 18195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:32.119495', 'step': 18195, 'epoch': 2}
{'type': 'loss', 'content': 0.023561734706163406, 'timestamp': '2025-10-02 00:43:32.129560', 'step': 18196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:32.184822', 'step': 18196, 'epoch': 2}
{'type': 'loss', 'content': 0.08901692181825638, 'timestamp': '2025-10-02 00:43:32.190821', 'step': 18197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:32.250723', 'step': 18197, 'epoch': 2}
{'type': 'loss', 'content': 0.009785963222384453, 'timestamp': '2025-10-02 00:43:32.258051', 'step': 18198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:32.319283', 'step': 18198, 'epoch': 2}
{'type': 'loss', 'content': 0.06061150133609772, 'timestamp': '2025-10-02 00:43:32.322354', 'step': 18199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:32.384805', 'step': 18199, 'epoch': 2}
{'type': 'loss', 'content': 0.09738440811634064, 'timestamp': '2025-10-02 00:43:32.391117', 'step': 18200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:43:32.461413', 'step': 18200, 'epoch': 2}
{'type': 'loss', 'content': 0.02561904676258564, 'timestamp': '2025-10-02 00:43:32.474333', 'step': 18201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:32.536137', 'step': 18201, 'epoch': 2}
{'type': 'loss', 'content': 0.030058400705456734, 'timestamp': '2025-10-02 00:43:32.544659', 'step': 18202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:32.614693', 'step': 18202, 'epoch': 2}
{'type': 'loss', 'content': 0.0401570126414299, 'timestamp': '2025-10-02 00:43:32.621330', 'step': 18203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:32.684651', 'step': 18203, 'epoch': 2}
{'type': 'loss', 'content': 0.006166014354676008, 'timestamp': '2025-10-02 00:43:32.691017', 'step': 18204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:32.754410', 'step': 18204, 'epoch': 2}
{'type': 'loss', 'content': 0.066117063164711, 'timestamp': '2025-10-02 00:43:32.760008', 'step': 18205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:32.819395', 'step': 18205, 'epoch': 2}
{'type': 'loss', 'content': 0.08588908612728119, 'timestamp': '2025-10-02 00:43:32.824659', 'step': 18206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:32.884332', 'step': 18206, 'epoch': 2}
{'type': 'loss', 'content': 0.056877996772527695, 'timestamp': '2025-10-02 00:43:32.889959', 'step': 18207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:32.955945', 'step': 18207, 'epoch': 2}
{'type': 'loss', 'content': 0.040012016892433167, 'timestamp': '2025-10-02 00:43:32.962644', 'step': 18208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:33.019637', 'step': 18208, 'epoch': 2}
{'type': 'loss', 'content': 0.055405162274837494, 'timestamp': '2025-10-02 00:43:33.027014', 'step': 18209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:33.084681', 'step': 18209, 'epoch': 2}
{'type': 'loss', 'content': 0.09475558996200562, 'timestamp': '2025-10-02 00:43:33.096486', 'step': 18210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:33.173274', 'step': 18210, 'epoch': 2}
{'type': 'loss', 'content': 0.04265787824988365, 'timestamp': '2025-10-02 00:43:33.176291', 'step': 18211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:33.236070', 'step': 18211, 'epoch': 2}
{'type': 'loss', 'content': 0.05482997000217438, 'timestamp': '2025-10-02 00:43:33.246400', 'step': 18212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:33.304795', 'step': 18212, 'epoch': 2}
{'type': 'loss', 'content': 0.14000967144966125, 'timestamp': '2025-10-02 00:43:33.308223', 'step': 18213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:33.374193', 'step': 18213, 'epoch': 2}
{'type': 'loss', 'content': 0.015018428675830364, 'timestamp': '2025-10-02 00:43:33.378290', 'step': 18214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:33.440357', 'step': 18214, 'epoch': 2}
{'type': 'loss', 'content': 0.0677030012011528, 'timestamp': '2025-10-02 00:43:33.447159', 'step': 18215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:33.506745', 'step': 18215, 'epoch': 2}
{'type': 'loss', 'content': 0.06366530060768127, 'timestamp': '2025-10-02 00:43:33.516454', 'step': 18216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:33.577093', 'step': 18216, 'epoch': 2}
{'type': 'loss', 'content': 0.005682081915438175, 'timestamp': '2025-10-02 00:43:33.586067', 'step': 18217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:43:33.671884', 'step': 18217, 'epoch': 2}
{'type': 'loss', 'content': 0.020209679380059242, 'timestamp': '2025-10-02 00:43:33.684526', 'step': 18218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:33.743692', 'step': 18218, 'epoch': 2}
{'type': 'loss', 'content': 0.12082051485776901, 'timestamp': '2025-10-02 00:43:33.746841', 'step': 18219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:33.809519', 'step': 18219, 'epoch': 2}
{'type': 'loss', 'content': 0.0783509612083435, 'timestamp': '2025-10-02 00:43:33.819925', 'step': 18220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:33.885426', 'step': 18220, 'epoch': 2}
{'type': 'loss', 'content': 0.13328830897808075, 'timestamp': '2025-10-02 00:43:33.888082', 'step': 18221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:33.947941', 'step': 18221, 'epoch': 2}
{'type': 'loss', 'content': 0.09861548244953156, 'timestamp': '2025-10-02 00:43:33.955265', 'step': 18222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:34.015550', 'step': 18222, 'epoch': 2}
{'type': 'loss', 'content': 0.026075338944792747, 'timestamp': '2025-10-02 00:43:34.019161', 'step': 18223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:34.079841', 'step': 18223, 'epoch': 2}
{'type': 'loss', 'content': 0.021361814811825752, 'timestamp': '2025-10-02 00:43:34.086276', 'step': 18224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:34.142271', 'step': 18224, 'epoch': 2}
{'type': 'loss', 'content': 0.06320485472679138, 'timestamp': '2025-10-02 00:43:34.144777', 'step': 18225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:34.213940', 'step': 18225, 'epoch': 2}
{'type': 'loss', 'content': 0.039923202246427536, 'timestamp': '2025-10-02 00:43:34.216643', 'step': 18226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:34.272755', 'step': 18226, 'epoch': 2}
{'type': 'loss', 'content': 0.05427396297454834, 'timestamp': '2025-10-02 00:43:34.276225', 'step': 18227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:34.332634', 'step': 18227, 'epoch': 2}
{'type': 'loss', 'content': 0.02848973125219345, 'timestamp': '2025-10-02 00:43:34.344255', 'step': 18228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:34.403085', 'step': 18228, 'epoch': 2}
{'type': 'loss', 'content': 0.06926129758358002, 'timestamp': '2025-10-02 00:43:34.411813', 'step': 18229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:34.483242', 'step': 18229, 'epoch': 2}
{'type': 'loss', 'content': 0.05220918357372284, 'timestamp': '2025-10-02 00:43:34.488738', 'step': 18230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:34.546645', 'step': 18230, 'epoch': 2}
{'type': 'loss', 'content': 0.07892181724309921, 'timestamp': '2025-10-02 00:43:34.549538', 'step': 18231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:34.604332', 'step': 18231, 'epoch': 2}
{'type': 'loss', 'content': 0.06832533329725266, 'timestamp': '2025-10-02 00:43:34.614101', 'step': 18232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:34.674011', 'step': 18232, 'epoch': 2}
{'type': 'loss', 'content': 0.0903644785284996, 'timestamp': '2025-10-02 00:43:34.676909', 'step': 18233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:34.733078', 'step': 18233, 'epoch': 2}
{'type': 'loss', 'content': 0.018663346767425537, 'timestamp': '2025-10-02 00:43:34.738609', 'step': 18234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:34.798547', 'step': 18234, 'epoch': 2}
{'type': 'loss', 'content': 0.023443011566996574, 'timestamp': '2025-10-02 00:43:34.803926', 'step': 18235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:34.863582', 'step': 18235, 'epoch': 2}
{'type': 'loss', 'content': 0.030104828998446465, 'timestamp': '2025-10-02 00:43:34.874545', 'step': 18236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:34.936938', 'step': 18236, 'epoch': 2}
{'type': 'loss', 'content': 0.025174343958497047, 'timestamp': '2025-10-02 00:43:34.945970', 'step': 18237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:35.010583', 'step': 18237, 'epoch': 2}
{'type': 'loss', 'content': 0.06102652847766876, 'timestamp': '2025-10-02 00:43:35.019479', 'step': 18238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:35.087833', 'step': 18238, 'epoch': 2}
{'type': 'loss', 'content': 0.06547803431749344, 'timestamp': '2025-10-02 00:43:35.094842', 'step': 18239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:35.151896', 'step': 18239, 'epoch': 2}
{'type': 'loss', 'content': 0.02950683794915676, 'timestamp': '2025-10-02 00:43:35.160059', 'step': 18240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:35.218437', 'step': 18240, 'epoch': 2}
{'type': 'loss', 'content': 0.026260897517204285, 'timestamp': '2025-10-02 00:43:35.225366', 'step': 18241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:35.281939', 'step': 18241, 'epoch': 2}
{'type': 'loss', 'content': 0.16891901195049286, 'timestamp': '2025-10-02 00:43:35.286604', 'step': 18242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:35.349869', 'step': 18242, 'epoch': 2}
{'type': 'loss', 'content': 0.012712283991277218, 'timestamp': '2025-10-02 00:43:35.354254', 'step': 18243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:35.411569', 'step': 18243, 'epoch': 2}
{'type': 'loss', 'content': 0.08639699220657349, 'timestamp': '2025-10-02 00:43:35.418873', 'step': 18244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:35.479453', 'step': 18244, 'epoch': 2}
{'type': 'loss', 'content': 0.008776522241532803, 'timestamp': '2025-10-02 00:43:35.489523', 'step': 18245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:35.559016', 'step': 18245, 'epoch': 2}
{'type': 'loss', 'content': 0.014938119798898697, 'timestamp': '2025-10-02 00:43:35.564072', 'step': 18246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:43:35.637354', 'step': 18246, 'epoch': 2}
{'type': 'loss', 'content': 0.05631386116147041, 'timestamp': '2025-10-02 00:43:35.649278', 'step': 18247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:35.714804', 'step': 18247, 'epoch': 2}
{'type': 'loss', 'content': 0.10791979730129242, 'timestamp': '2025-10-02 00:43:35.728495', 'step': 18248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:35.788875', 'step': 18248, 'epoch': 2}
{'type': 'loss', 'content': 0.14657177031040192, 'timestamp': '2025-10-02 00:43:35.792465', 'step': 18249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:35.856924', 'step': 18249, 'epoch': 2}
{'type': 'loss', 'content': 0.10156891494989395, 'timestamp': '2025-10-02 00:43:35.862381', 'step': 18250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:35.928772', 'step': 18250, 'epoch': 2}
{'type': 'loss', 'content': 0.013564513064920902, 'timestamp': '2025-10-02 00:43:35.933486', 'step': 18251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:35.994857', 'step': 18251, 'epoch': 2}
{'type': 'loss', 'content': 0.028334932401776314, 'timestamp': '2025-10-02 00:43:36.002741', 'step': 18252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:36.080474', 'step': 18252, 'epoch': 2}
{'type': 'loss', 'content': 0.05323582515120506, 'timestamp': '2025-10-02 00:43:36.085724', 'step': 18253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:43:36.160524', 'step': 18253, 'epoch': 2}
{'type': 'loss', 'content': 0.024033986032009125, 'timestamp': '2025-10-02 00:43:36.170911', 'step': 18254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:36.240623', 'step': 18254, 'epoch': 2}
{'type': 'loss', 'content': 0.021254457533359528, 'timestamp': '2025-10-02 00:43:36.247498', 'step': 18255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:36.307028', 'step': 18255, 'epoch': 2}
{'type': 'loss', 'content': 0.1468818187713623, 'timestamp': '2025-10-02 00:43:36.316833', 'step': 18256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:36.379805', 'step': 18256, 'epoch': 2}
{'type': 'loss', 'content': 0.11752527207136154, 'timestamp': '2025-10-02 00:43:36.382555', 'step': 18257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:36.454719', 'step': 18257, 'epoch': 2}
{'type': 'loss', 'content': 0.15289652347564697, 'timestamp': '2025-10-02 00:43:36.457816', 'step': 18258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:36.516685', 'step': 18258, 'epoch': 2}
{'type': 'loss', 'content': 0.021024031564593315, 'timestamp': '2025-10-02 00:43:36.519973', 'step': 18259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:36.581319', 'step': 18259, 'epoch': 2}
{'type': 'loss', 'content': 0.023025857284665108, 'timestamp': '2025-10-02 00:43:36.587503', 'step': 18260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:36.654103', 'step': 18260, 'epoch': 2}
{'type': 'loss', 'content': 0.05154675990343094, 'timestamp': '2025-10-02 00:43:36.656843', 'step': 18261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:36.715111', 'step': 18261, 'epoch': 2}
{'type': 'loss', 'content': 0.054149970412254333, 'timestamp': '2025-10-02 00:43:36.717747', 'step': 18262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:36.780070', 'step': 18262, 'epoch': 2}
{'type': 'loss', 'content': 0.014501373283565044, 'timestamp': '2025-10-02 00:43:36.782784', 'step': 18263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:36.851983', 'step': 18263, 'epoch': 2}
{'type': 'loss', 'content': 0.07934856414794922, 'timestamp': '2025-10-02 00:43:36.859178', 'step': 18264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:36.918939', 'step': 18264, 'epoch': 2}
{'type': 'loss', 'content': 0.02276340126991272, 'timestamp': '2025-10-02 00:43:36.929895', 'step': 18265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:36.989137', 'step': 18265, 'epoch': 2}
{'type': 'loss', 'content': 0.039552196860313416, 'timestamp': '2025-10-02 00:43:36.992095', 'step': 18266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:37.050280', 'step': 18266, 'epoch': 2}
{'type': 'loss', 'content': 0.03082604892551899, 'timestamp': '2025-10-02 00:43:37.057083', 'step': 18267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:37.113759', 'step': 18267, 'epoch': 2}
{'type': 'loss', 'content': 0.05003993958234787, 'timestamp': '2025-10-02 00:43:37.120790', 'step': 18268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:37.184852', 'step': 18268, 'epoch': 2}
{'type': 'loss', 'content': 0.0769101157784462, 'timestamp': '2025-10-02 00:43:37.195747', 'step': 18269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:37.258097', 'step': 18269, 'epoch': 2}
{'type': 'loss', 'content': 0.033366281539201736, 'timestamp': '2025-10-02 00:43:37.266341', 'step': 18270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:37.324487', 'step': 18270, 'epoch': 2}
{'type': 'loss', 'content': 0.09463459998369217, 'timestamp': '2025-10-02 00:43:37.328382', 'step': 18271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:37.398735', 'step': 18271, 'epoch': 2}
{'type': 'loss', 'content': 0.025025727227330208, 'timestamp': '2025-10-02 00:43:37.411496', 'step': 18272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:43:37.494687', 'step': 18272, 'epoch': 2}
{'type': 'loss', 'content': 0.0469997264444828, 'timestamp': '2025-10-02 00:43:37.505996', 'step': 18273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:37.580839', 'step': 18273, 'epoch': 2}
{'type': 'loss', 'content': 0.06255180388689041, 'timestamp': '2025-10-02 00:43:37.584760', 'step': 18274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:37.671280', 'step': 18274, 'epoch': 2}
{'type': 'loss', 'content': 0.027111174538731575, 'timestamp': '2025-10-02 00:43:37.686289', 'step': 18275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:37.757567', 'step': 18275, 'epoch': 2}
{'type': 'loss', 'content': 0.08112078905105591, 'timestamp': '2025-10-02 00:43:37.764667', 'step': 18276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:37.834615', 'step': 18276, 'epoch': 2}
{'type': 'loss', 'content': 0.03379862755537033, 'timestamp': '2025-10-02 00:43:37.845888', 'step': 18277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:37.915642', 'step': 18277, 'epoch': 2}
{'type': 'loss', 'content': 0.005356652196496725, 'timestamp': '2025-10-02 00:43:37.919553', 'step': 18278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:37.979256', 'step': 18278, 'epoch': 2}
{'type': 'loss', 'content': 0.10065609216690063, 'timestamp': '2025-10-02 00:43:37.982566', 'step': 18279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:38.040424', 'step': 18279, 'epoch': 2}
{'type': 'loss', 'content': 0.060622554272413254, 'timestamp': '2025-10-02 00:43:38.048176', 'step': 18280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:38.104861', 'step': 18280, 'epoch': 2}
{'type': 'loss', 'content': 0.06746276468038559, 'timestamp': '2025-10-02 00:43:38.107535', 'step': 18281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:38.168107', 'step': 18281, 'epoch': 2}
{'type': 'loss', 'content': 0.08657122403383255, 'timestamp': '2025-10-02 00:43:38.171174', 'step': 18282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:38.227402', 'step': 18282, 'epoch': 2}
{'type': 'loss', 'content': 0.006984899286180735, 'timestamp': '2025-10-02 00:43:38.236103', 'step': 18283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:38.291371', 'step': 18283, 'epoch': 2}
{'type': 'loss', 'content': 0.13537457585334778, 'timestamp': '2025-10-02 00:43:38.299540', 'step': 18284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:43:38.357610', 'step': 18284, 'epoch': 2}
{'type': 'loss', 'content': 0.13115249574184418, 'timestamp': '2025-10-02 00:43:38.360680', 'step': 18285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:38.420958', 'step': 18285, 'epoch': 2}
{'type': 'loss', 'content': 0.05524621903896332, 'timestamp': '2025-10-02 00:43:38.423786', 'step': 18286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:43:38.492032', 'step': 18286, 'epoch': 2}
{'type': 'loss', 'content': 0.03301380202174187, 'timestamp': '2025-10-02 00:43:38.502465', 'step': 18287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:38.562650', 'step': 18287, 'epoch': 2}
{'type': 'loss', 'content': 0.017948320135474205, 'timestamp': '2025-10-02 00:43:38.569874', 'step': 18288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:38.629011', 'step': 18288, 'epoch': 2}
{'type': 'loss', 'content': 0.015448400750756264, 'timestamp': '2025-10-02 00:43:38.638782', 'step': 18289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:38.700075', 'step': 18289, 'epoch': 2}
{'type': 'loss', 'content': 0.1579548567533493, 'timestamp': '2025-10-02 00:43:38.710308', 'step': 18290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:38.771568', 'step': 18290, 'epoch': 2}
{'type': 'loss', 'content': 0.07339687645435333, 'timestamp': '2025-10-02 00:43:38.774440', 'step': 18291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:38.830521', 'step': 18291, 'epoch': 2}
{'type': 'loss', 'content': 0.06325432658195496, 'timestamp': '2025-10-02 00:43:38.840134', 'step': 18292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:38.897613', 'step': 18292, 'epoch': 2}
{'type': 'loss', 'content': 0.06536554545164108, 'timestamp': '2025-10-02 00:43:38.901481', 'step': 18293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:38.959872', 'step': 18293, 'epoch': 2}
{'type': 'loss', 'content': 0.004795952700078487, 'timestamp': '2025-10-02 00:43:38.969404', 'step': 18294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:39.026683', 'step': 18294, 'epoch': 2}
{'type': 'loss', 'content': 0.17559340596199036, 'timestamp': '2025-10-02 00:43:39.029622', 'step': 18295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:39.094769', 'step': 18295, 'epoch': 2}
{'type': 'loss', 'content': 0.04957285150885582, 'timestamp': '2025-10-02 00:43:39.103579', 'step': 18296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:39.168987', 'step': 18296, 'epoch': 2}
{'type': 'loss', 'content': 0.039367545396089554, 'timestamp': '2025-10-02 00:43:39.175926', 'step': 18297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:39.246587', 'step': 18297, 'epoch': 2}
{'type': 'loss', 'content': 0.08202970027923584, 'timestamp': '2025-10-02 00:43:39.252827', 'step': 18298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:39.312339', 'step': 18298, 'epoch': 2}
{'type': 'loss', 'content': 0.050670087337493896, 'timestamp': '2025-10-02 00:43:39.317608', 'step': 18299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:39.383515', 'step': 18299, 'epoch': 2}
{'type': 'loss', 'content': 0.0454598106443882, 'timestamp': '2025-10-02 00:43:39.390070', 'step': 18300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:39.451992', 'step': 18300, 'epoch': 2}
{'type': 'loss', 'content': 0.0038797901943325996, 'timestamp': '2025-10-02 00:43:39.456980', 'step': 18301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:43:39.527091', 'step': 18301, 'epoch': 2}
{'type': 'loss', 'content': 0.06519697606563568, 'timestamp': '2025-10-02 00:43:39.537899', 'step': 18302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:39.597909', 'step': 18302, 'epoch': 2}
{'type': 'loss', 'content': 0.13551124930381775, 'timestamp': '2025-10-02 00:43:39.600619', 'step': 18303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:39.664673', 'step': 18303, 'epoch': 2}
{'type': 'loss', 'content': 0.010549790225923061, 'timestamp': '2025-10-02 00:43:39.674511', 'step': 18304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:39.754238', 'step': 18304, 'epoch': 2}
{'type': 'loss', 'content': 0.023918146267533302, 'timestamp': '2025-10-02 00:43:39.760765', 'step': 18305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:39.832312', 'step': 18305, 'epoch': 2}
{'type': 'loss', 'content': 0.04207804799079895, 'timestamp': '2025-10-02 00:43:39.835794', 'step': 18306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:39.895996', 'step': 18306, 'epoch': 2}
{'type': 'loss', 'content': 0.019907955080270767, 'timestamp': '2025-10-02 00:43:39.899464', 'step': 18307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:39.962883', 'step': 18307, 'epoch': 2}
{'type': 'loss', 'content': 0.022360194474458694, 'timestamp': '2025-10-02 00:43:39.970180', 'step': 18308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:40.030523', 'step': 18308, 'epoch': 2}
{'type': 'loss', 'content': 0.027031295001506805, 'timestamp': '2025-10-02 00:43:40.036493', 'step': 18309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:43:40.108508', 'step': 18309, 'epoch': 2}
{'type': 'loss', 'content': 0.026575516909360886, 'timestamp': '2025-10-02 00:43:40.119350', 'step': 18310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:40.184398', 'step': 18310, 'epoch': 2}
{'type': 'loss', 'content': 0.21382597088813782, 'timestamp': '2025-10-02 00:43:40.197356', 'step': 18311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:40.270084', 'step': 18311, 'epoch': 2}
{'type': 'loss', 'content': 0.0806337222456932, 'timestamp': '2025-10-02 00:43:40.276450', 'step': 18312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:40.345953', 'step': 18312, 'epoch': 2}
{'type': 'loss', 'content': 0.11408077925443649, 'timestamp': '2025-10-02 00:43:40.349556', 'step': 18313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:40.406832', 'step': 18313, 'epoch': 2}
{'type': 'loss', 'content': 0.05930963158607483, 'timestamp': '2025-10-02 00:43:40.410874', 'step': 18314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:40.471084', 'step': 18314, 'epoch': 2}
{'type': 'loss', 'content': 0.035365838557481766, 'timestamp': '2025-10-02 00:43:40.480591', 'step': 18315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:40.544206', 'step': 18315, 'epoch': 2}
{'type': 'loss', 'content': 0.0498492456972599, 'timestamp': '2025-10-02 00:43:40.553129', 'step': 18316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:40.615028', 'step': 18316, 'epoch': 2}
{'type': 'loss', 'content': 0.1690373718738556, 'timestamp': '2025-10-02 00:43:40.620215', 'step': 18317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:40.681455', 'step': 18317, 'epoch': 2}
{'type': 'loss', 'content': 0.050738390535116196, 'timestamp': '2025-10-02 00:43:40.688481', 'step': 18318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:40.746834', 'step': 18318, 'epoch': 2}
{'type': 'loss', 'content': 0.07808063924312592, 'timestamp': '2025-10-02 00:43:40.752048', 'step': 18319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:40.813158', 'step': 18319, 'epoch': 2}
{'type': 'loss', 'content': 0.05811473727226257, 'timestamp': '2025-10-02 00:43:40.819679', 'step': 18320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:40.879674', 'step': 18320, 'epoch': 2}
{'type': 'loss', 'content': 0.1389591246843338, 'timestamp': '2025-10-02 00:43:40.890010', 'step': 18321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:40.946809', 'step': 18321, 'epoch': 2}
{'type': 'loss', 'content': 0.035473719239234924, 'timestamp': '2025-10-02 00:43:40.950498', 'step': 18322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:41.013202', 'step': 18322, 'epoch': 2}
{'type': 'loss', 'content': 0.036457519978284836, 'timestamp': '2025-10-02 00:43:41.019873', 'step': 18323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:41.075806', 'step': 18323, 'epoch': 2}
{'type': 'loss', 'content': 0.04395969212055206, 'timestamp': '2025-10-02 00:43:41.082733', 'step': 18324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:41.142075', 'step': 18324, 'epoch': 2}
{'type': 'loss', 'content': 0.08120623230934143, 'timestamp': '2025-10-02 00:43:41.145691', 'step': 18325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:41.201346', 'step': 18325, 'epoch': 2}
{'type': 'loss', 'content': 0.09696457535028458, 'timestamp': '2025-10-02 00:43:41.207267', 'step': 18326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:41.277981', 'step': 18326, 'epoch': 2}
{'type': 'loss', 'content': 0.02854260802268982, 'timestamp': '2025-10-02 00:43:41.286618', 'step': 18327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:41.362524', 'step': 18327, 'epoch': 2}
{'type': 'loss', 'content': 0.0204164981842041, 'timestamp': '2025-10-02 00:43:41.376099', 'step': 18328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:41.439503', 'step': 18328, 'epoch': 2}
{'type': 'loss', 'content': 0.023761307820677757, 'timestamp': '2025-10-02 00:43:41.446571', 'step': 18329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:41.511949', 'step': 18329, 'epoch': 2}
{'type': 'loss', 'content': 0.0157272107899189, 'timestamp': '2025-10-02 00:43:41.517675', 'step': 18330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:41.585881', 'step': 18330, 'epoch': 2}
{'type': 'loss', 'content': 0.06577258557081223, 'timestamp': '2025-10-02 00:43:41.594586', 'step': 18331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:41.659672', 'step': 18331, 'epoch': 2}
{'type': 'loss', 'content': 0.018477663397789, 'timestamp': '2025-10-02 00:43:41.667822', 'step': 18332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:41.726763', 'step': 18332, 'epoch': 2}
{'type': 'loss', 'content': 0.06369485706090927, 'timestamp': '2025-10-02 00:43:41.731214', 'step': 18333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:41.794729', 'step': 18333, 'epoch': 2}
{'type': 'loss', 'content': 0.0059982528910040855, 'timestamp': '2025-10-02 00:43:41.803739', 'step': 18334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:43:41.872349', 'step': 18334, 'epoch': 2}
{'type': 'loss', 'content': 0.11042992770671844, 'timestamp': '2025-10-02 00:43:41.876239', 'step': 18335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:41.936042', 'step': 18335, 'epoch': 2}
{'type': 'loss', 'content': 0.16076388955116272, 'timestamp': '2025-10-02 00:43:41.952771', 'step': 18336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:43:42.052212', 'step': 18336, 'epoch': 2}
{'type': 'loss', 'content': 0.0321301594376564, 'timestamp': '2025-10-02 00:43:42.065572', 'step': 18337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:42.136698', 'step': 18337, 'epoch': 2}
{'type': 'loss', 'content': 0.09098634123802185, 'timestamp': '2025-10-02 00:43:42.139917', 'step': 18338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:43:42.204899', 'step': 18338, 'epoch': 2}
{'type': 'loss', 'content': 0.011662999168038368, 'timestamp': '2025-10-02 00:43:42.215349', 'step': 18339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:42.276529', 'step': 18339, 'epoch': 2}
{'type': 'loss', 'content': 0.11653251200914383, 'timestamp': '2025-10-02 00:43:42.288842', 'step': 18340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:42.351442', 'step': 18340, 'epoch': 2}
{'type': 'loss', 'content': 0.04597818851470947, 'timestamp': '2025-10-02 00:43:42.357257', 'step': 18341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:42.425489', 'step': 18341, 'epoch': 2}
{'type': 'loss', 'content': 0.09529958665370941, 'timestamp': '2025-10-02 00:43:42.432146', 'step': 18342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:42.492913', 'step': 18342, 'epoch': 2}
{'type': 'loss', 'content': 0.03434675931930542, 'timestamp': '2025-10-02 00:43:42.501680', 'step': 18343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:42.581981', 'step': 18343, 'epoch': 2}
{'type': 'loss', 'content': 0.008601085282862186, 'timestamp': '2025-10-02 00:43:42.597560', 'step': 18344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:42.676322', 'step': 18344, 'epoch': 2}
{'type': 'loss', 'content': 0.024313753470778465, 'timestamp': '2025-10-02 00:43:42.685154', 'step': 18345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:42.742095', 'step': 18345, 'epoch': 2}
{'type': 'loss', 'content': 0.04172805696725845, 'timestamp': '2025-10-02 00:43:42.749114', 'step': 18346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:42.817778', 'step': 18346, 'epoch': 2}
{'type': 'loss', 'content': 0.018205158412456512, 'timestamp': '2025-10-02 00:43:42.828331', 'step': 18347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:42.900672', 'step': 18347, 'epoch': 2}
{'type': 'loss', 'content': 0.1646496206521988, 'timestamp': '2025-10-02 00:43:42.914172', 'step': 18348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:42.990768', 'step': 18348, 'epoch': 2}
{'type': 'loss', 'content': 0.12009373307228088, 'timestamp': '2025-10-02 00:43:42.996397', 'step': 18349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:43.055714', 'step': 18349, 'epoch': 2}
{'type': 'loss', 'content': 0.0312271099537611, 'timestamp': '2025-10-02 00:43:43.060412', 'step': 18350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:43.125536', 'step': 18350, 'epoch': 2}
{'type': 'loss', 'content': 0.07910636812448502, 'timestamp': '2025-10-02 00:43:43.132930', 'step': 18351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:43.192804', 'step': 18351, 'epoch': 2}
{'type': 'loss', 'content': 0.08087070286273956, 'timestamp': '2025-10-02 00:43:43.200306', 'step': 18352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:43.259301', 'step': 18352, 'epoch': 2}
{'type': 'loss', 'content': 0.12333769351243973, 'timestamp': '2025-10-02 00:43:43.265988', 'step': 18353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:43.326288', 'step': 18353, 'epoch': 2}
{'type': 'loss', 'content': 0.026285143569111824, 'timestamp': '2025-10-02 00:43:43.333275', 'step': 18354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:43.392224', 'step': 18354, 'epoch': 2}
{'type': 'loss', 'content': 0.08362525701522827, 'timestamp': '2025-10-02 00:43:43.395020', 'step': 18355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:43.453476', 'step': 18355, 'epoch': 2}
{'type': 'loss', 'content': 0.10580507665872574, 'timestamp': '2025-10-02 00:43:43.461051', 'step': 18356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:43.519605', 'step': 18356, 'epoch': 2}
{'type': 'loss', 'content': 0.0390719473361969, 'timestamp': '2025-10-02 00:43:43.526357', 'step': 18357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:43.582242', 'step': 18357, 'epoch': 2}
{'type': 'loss', 'content': 0.008321414701640606, 'timestamp': '2025-10-02 00:43:43.591080', 'step': 18358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:43.648301', 'step': 18358, 'epoch': 2}
{'type': 'loss', 'content': 0.01441181916743517, 'timestamp': '2025-10-02 00:43:43.651992', 'step': 18359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:43.710937', 'step': 18359, 'epoch': 2}
{'type': 'loss', 'content': 0.023803019896149635, 'timestamp': '2025-10-02 00:43:43.719356', 'step': 18360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:43.776988', 'step': 18360, 'epoch': 2}
{'type': 'loss', 'content': 0.20371441543102264, 'timestamp': '2025-10-02 00:43:43.781120', 'step': 18361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:43.840839', 'step': 18361, 'epoch': 2}
{'type': 'loss', 'content': 0.015685493126511574, 'timestamp': '2025-10-02 00:43:43.851154', 'step': 18362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:43.920289', 'step': 18362, 'epoch': 2}
{'type': 'loss', 'content': 0.01870470494031906, 'timestamp': '2025-10-02 00:43:43.926106', 'step': 18363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:43.992828', 'step': 18363, 'epoch': 2}
{'type': 'loss', 'content': 0.03354020044207573, 'timestamp': '2025-10-02 00:43:44.004014', 'step': 18364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:44.074039', 'step': 18364, 'epoch': 2}
{'type': 'loss', 'content': 0.04349232092499733, 'timestamp': '2025-10-02 00:43:44.079623', 'step': 18365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:44.155705', 'step': 18365, 'epoch': 2}
{'type': 'loss', 'content': 0.016458993777632713, 'timestamp': '2025-10-02 00:43:44.162856', 'step': 18366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:44.231551', 'step': 18366, 'epoch': 2}
{'type': 'loss', 'content': 0.04027711600065231, 'timestamp': '2025-10-02 00:43:44.235367', 'step': 18367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:44.291873', 'step': 18367, 'epoch': 2}
{'type': 'loss', 'content': 0.0680735856294632, 'timestamp': '2025-10-02 00:43:44.298735', 'step': 18368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:44.368277', 'step': 18368, 'epoch': 2}
{'type': 'loss', 'content': 0.05820242688059807, 'timestamp': '2025-10-02 00:43:44.373812', 'step': 18369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:44.436591', 'step': 18369, 'epoch': 2}
{'type': 'loss', 'content': 0.016648218035697937, 'timestamp': '2025-10-02 00:43:44.451386', 'step': 18370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:44.515347', 'step': 18370, 'epoch': 2}
{'type': 'loss', 'content': 0.06513851881027222, 'timestamp': '2025-10-02 00:43:44.519239', 'step': 18371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:44.579429', 'step': 18371, 'epoch': 2}
{'type': 'loss', 'content': 0.07518472522497177, 'timestamp': '2025-10-02 00:43:44.592880', 'step': 18372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:44.653589', 'step': 18372, 'epoch': 2}
{'type': 'loss', 'content': 0.058867502957582474, 'timestamp': '2025-10-02 00:43:44.660696', 'step': 18373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:43:44.734459', 'step': 18373, 'epoch': 2}
{'type': 'loss', 'content': 0.0014582984149456024, 'timestamp': '2025-10-02 00:43:44.745344', 'step': 18374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:44.802196', 'step': 18374, 'epoch': 2}
{'type': 'loss', 'content': 0.05256228521466255, 'timestamp': '2025-10-02 00:43:44.806128', 'step': 18375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:44.866914', 'step': 18375, 'epoch': 2}
{'type': 'loss', 'content': 0.036433592438697815, 'timestamp': '2025-10-02 00:43:44.874769', 'step': 18376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:44.943149', 'step': 18376, 'epoch': 2}
{'type': 'loss', 'content': 0.03192705288529396, 'timestamp': '2025-10-02 00:43:44.947327', 'step': 18377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:45.017015', 'step': 18377, 'epoch': 2}
{'type': 'loss', 'content': 0.010973677970468998, 'timestamp': '2025-10-02 00:43:45.027168', 'step': 18378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:45.094635', 'step': 18378, 'epoch': 2}
{'type': 'loss', 'content': 0.016644073650240898, 'timestamp': '2025-10-02 00:43:45.102489', 'step': 18379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:45.167482', 'step': 18379, 'epoch': 2}
{'type': 'loss', 'content': 0.05742203816771507, 'timestamp': '2025-10-02 00:43:45.177918', 'step': 18380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:43:45.240039', 'step': 18380, 'epoch': 2}
{'type': 'loss', 'content': 0.01311101857572794, 'timestamp': '2025-10-02 00:43:45.246080', 'step': 18381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:45.319253', 'step': 18381, 'epoch': 2}
{'type': 'loss', 'content': 0.01868223026394844, 'timestamp': '2025-10-02 00:43:45.328838', 'step': 18382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:45.388661', 'step': 18382, 'epoch': 2}
{'type': 'loss', 'content': 0.01437169685959816, 'timestamp': '2025-10-02 00:43:45.392550', 'step': 18383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:45.457698', 'step': 18383, 'epoch': 2}
{'type': 'loss', 'content': 0.058170709758996964, 'timestamp': '2025-10-02 00:43:45.464935', 'step': 18384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:45.532191', 'step': 18384, 'epoch': 2}
{'type': 'loss', 'content': 0.026443298906087875, 'timestamp': '2025-10-02 00:43:45.535586', 'step': 18385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:45.598522', 'step': 18385, 'epoch': 2}
{'type': 'loss', 'content': 0.10468023270368576, 'timestamp': '2025-10-02 00:43:45.605100', 'step': 18386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:45.668881', 'step': 18386, 'epoch': 2}
{'type': 'loss', 'content': 0.09947793930768967, 'timestamp': '2025-10-02 00:43:45.675351', 'step': 18387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:45.734327', 'step': 18387, 'epoch': 2}
{'type': 'loss', 'content': 0.12819413840770721, 'timestamp': '2025-10-02 00:43:45.741101', 'step': 18388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:45.796896', 'step': 18388, 'epoch': 2}
{'type': 'loss', 'content': 0.061682917177677155, 'timestamp': '2025-10-02 00:43:45.803454', 'step': 18389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:45.868222', 'step': 18389, 'epoch': 2}
{'type': 'loss', 'content': 0.0360897071659565, 'timestamp': '2025-10-02 00:43:45.871847', 'step': 18390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:45.928923', 'step': 18390, 'epoch': 2}
{'type': 'loss', 'content': 0.060085635632276535, 'timestamp': '2025-10-02 00:43:45.938789', 'step': 18391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:46.014062', 'step': 18391, 'epoch': 2}
{'type': 'loss', 'content': 0.05305388569831848, 'timestamp': '2025-10-02 00:43:46.021319', 'step': 18392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:46.107029', 'step': 18392, 'epoch': 2}
{'type': 'loss', 'content': 0.05625148117542267, 'timestamp': '2025-10-02 00:43:46.116208', 'step': 18393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:46.187645', 'step': 18393, 'epoch': 2}
{'type': 'loss', 'content': 0.012560800649225712, 'timestamp': '2025-10-02 00:43:46.197829', 'step': 18394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:46.257522', 'step': 18394, 'epoch': 2}
{'type': 'loss', 'content': 0.13568826019763947, 'timestamp': '2025-10-02 00:43:46.260501', 'step': 18395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:46.317289', 'step': 18395, 'epoch': 2}
{'type': 'loss', 'content': 0.1414574384689331, 'timestamp': '2025-10-02 00:43:46.323789', 'step': 18396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:46.379259', 'step': 18396, 'epoch': 2}
{'type': 'loss', 'content': 0.09408243000507355, 'timestamp': '2025-10-02 00:43:46.382795', 'step': 18397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:46.443676', 'step': 18397, 'epoch': 2}
{'type': 'loss', 'content': 0.024581337347626686, 'timestamp': '2025-10-02 00:43:46.453836', 'step': 18398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:43:46.516894', 'step': 18398, 'epoch': 2}
{'type': 'loss', 'content': 0.01617184467613697, 'timestamp': '2025-10-02 00:43:46.527540', 'step': 18399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:46.591212', 'step': 18399, 'epoch': 2}
{'type': 'loss', 'content': 0.0345330573618412, 'timestamp': '2025-10-02 00:43:46.601506', 'step': 18400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:46.658482', 'step': 18400, 'epoch': 2}
{'type': 'loss', 'content': 0.14445631206035614, 'timestamp': '2025-10-02 00:43:46.660900', 'step': 18401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:46.719183', 'step': 18401, 'epoch': 2}
{'type': 'loss', 'content': 0.16767637431621552, 'timestamp': '2025-10-02 00:43:46.721567', 'step': 18402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:46.777280', 'step': 18402, 'epoch': 2}
{'type': 'loss', 'content': 0.011888832785189152, 'timestamp': '2025-10-02 00:43:46.786794', 'step': 18403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:46.847791', 'step': 18403, 'epoch': 2}
{'type': 'loss', 'content': 0.01699560135602951, 'timestamp': '2025-10-02 00:43:46.858139', 'step': 18404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:46.917129', 'step': 18404, 'epoch': 2}
{'type': 'loss', 'content': 0.02341368794441223, 'timestamp': '2025-10-02 00:43:46.920952', 'step': 18405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:46.977235', 'step': 18405, 'epoch': 2}
{'type': 'loss', 'content': 0.12403003126382828, 'timestamp': '2025-10-02 00:43:46.979748', 'step': 18406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:47.035340', 'step': 18406, 'epoch': 2}
{'type': 'loss', 'content': 0.040091048926115036, 'timestamp': '2025-10-02 00:43:47.040612', 'step': 18407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:43:47.103725', 'step': 18407, 'epoch': 2}
{'type': 'loss', 'content': 0.04413428157567978, 'timestamp': '2025-10-02 00:43:47.115151', 'step': 18408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:47.169404', 'step': 18408, 'epoch': 2}
{'type': 'loss', 'content': 0.15289443731307983, 'timestamp': '2025-10-02 00:43:47.172031', 'step': 18409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:47.226575', 'step': 18409, 'epoch': 2}
{'type': 'loss', 'content': 0.0788978561758995, 'timestamp': '2025-10-02 00:43:47.229638', 'step': 18410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:47.286692', 'step': 18410, 'epoch': 2}
{'type': 'loss', 'content': 0.05874945595860481, 'timestamp': '2025-10-02 00:43:47.289347', 'step': 18411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:47.353218', 'step': 18411, 'epoch': 2}
{'type': 'loss', 'content': 0.09411122649908066, 'timestamp': '2025-10-02 00:43:47.359711', 'step': 18412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:47.421108', 'step': 18412, 'epoch': 2}
{'type': 'loss', 'content': 0.10078928619623184, 'timestamp': '2025-10-02 00:43:47.424823', 'step': 18413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:43:47.488394', 'step': 18413, 'epoch': 2}
{'type': 'loss', 'content': 0.013366121798753738, 'timestamp': '2025-10-02 00:43:47.499047', 'step': 18414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:47.559169', 'step': 18414, 'epoch': 2}
{'type': 'loss', 'content': 0.12795531749725342, 'timestamp': '2025-10-02 00:43:47.566168', 'step': 18415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:47.627227', 'step': 18415, 'epoch': 2}
{'type': 'loss', 'content': 0.10152440518140793, 'timestamp': '2025-10-02 00:43:47.634248', 'step': 18416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:47.688762', 'step': 18416, 'epoch': 2}
{'type': 'loss', 'content': 0.1272871345281601, 'timestamp': '2025-10-02 00:43:47.692094', 'step': 18417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:47.746774', 'step': 18417, 'epoch': 2}
{'type': 'loss', 'content': 0.08376497030258179, 'timestamp': '2025-10-02 00:43:47.749980', 'step': 18418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:47.818685', 'step': 18418, 'epoch': 2}
{'type': 'loss', 'content': 0.031054601073265076, 'timestamp': '2025-10-02 00:43:47.826484', 'step': 18419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:47.883416', 'step': 18419, 'epoch': 2}
{'type': 'loss', 'content': 0.12044275552034378, 'timestamp': '2025-10-02 00:43:47.892260', 'step': 18420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:47.946931', 'step': 18420, 'epoch': 2}
{'type': 'loss', 'content': 0.13515456020832062, 'timestamp': '2025-10-02 00:43:47.954554', 'step': 18421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:48.022521', 'step': 18421, 'epoch': 2}
{'type': 'loss', 'content': 0.018693851307034492, 'timestamp': '2025-10-02 00:43:48.032075', 'step': 18422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:48.098498', 'step': 18422, 'epoch': 2}
{'type': 'loss', 'content': 0.02687351405620575, 'timestamp': '2025-10-02 00:43:48.107717', 'step': 18423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:48.164855', 'step': 18423, 'epoch': 2}
{'type': 'loss', 'content': 0.10511464625597, 'timestamp': '2025-10-02 00:43:48.175744', 'step': 18424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:43:48.243593', 'step': 18424, 'epoch': 2}
{'type': 'loss', 'content': 0.033184267580509186, 'timestamp': '2025-10-02 00:43:48.255336', 'step': 18425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:43:48.318867', 'step': 18425, 'epoch': 2}
{'type': 'loss', 'content': 0.08872407674789429, 'timestamp': '2025-10-02 00:43:48.329338', 'step': 18426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:48.389515', 'step': 18426, 'epoch': 2}
{'type': 'loss', 'content': 0.15025797486305237, 'timestamp': '2025-10-02 00:43:48.392595', 'step': 18427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:48.448356', 'step': 18427, 'epoch': 2}
{'type': 'loss', 'content': 0.06190144643187523, 'timestamp': '2025-10-02 00:43:48.456344', 'step': 18428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:48.512681', 'step': 18428, 'epoch': 2}
{'type': 'loss', 'content': 0.07342695444822311, 'timestamp': '2025-10-02 00:43:48.515597', 'step': 18429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:48.570132', 'step': 18429, 'epoch': 2}
{'type': 'loss', 'content': 0.049420036375522614, 'timestamp': '2025-10-02 00:43:48.573520', 'step': 18430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:48.628941', 'step': 18430, 'epoch': 2}
{'type': 'loss', 'content': 0.07353401184082031, 'timestamp': '2025-10-02 00:43:48.631260', 'step': 18431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:48.685430', 'step': 18431, 'epoch': 2}
{'type': 'loss', 'content': 0.08776763826608658, 'timestamp': '2025-10-02 00:43:48.691964', 'step': 18432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:48.745411', 'step': 18432, 'epoch': 2}
{'type': 'loss', 'content': 0.03310612961649895, 'timestamp': '2025-10-02 00:43:48.747683', 'step': 18433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:48.801497', 'step': 18433, 'epoch': 2}
{'type': 'loss', 'content': 0.08080317080020905, 'timestamp': '2025-10-02 00:43:48.803904', 'step': 18434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:48.858874', 'step': 18434, 'epoch': 2}
{'type': 'loss', 'content': 0.05541779845952988, 'timestamp': '2025-10-02 00:43:48.867578', 'step': 18435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:43:48.931808', 'step': 18435, 'epoch': 2}
{'type': 'loss', 'content': 0.053812526166439056, 'timestamp': '2025-10-02 00:43:48.943230', 'step': 18436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:43:49.004991', 'step': 18436, 'epoch': 2}
{'type': 'loss', 'content': 0.03506535291671753, 'timestamp': '2025-10-02 00:43:49.016299', 'step': 18437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:49.070530', 'step': 18437, 'epoch': 2}
{'type': 'loss', 'content': 0.08643738180398941, 'timestamp': '2025-10-02 00:43:49.072891', 'step': 18438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:49.127775', 'step': 18438, 'epoch': 2}
{'type': 'loss', 'content': 0.009942996315658092, 'timestamp': '2025-10-02 00:43:49.130275', 'step': 18439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:49.184944', 'step': 18439, 'epoch': 2}
{'type': 'loss', 'content': 0.04483511671423912, 'timestamp': '2025-10-02 00:43:49.193030', 'step': 18440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:49.246400', 'step': 18440, 'epoch': 2}
{'type': 'loss', 'content': 0.07111665606498718, 'timestamp': '2025-10-02 00:43:49.248810', 'step': 18441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:49.304724', 'step': 18441, 'epoch': 2}
{'type': 'loss', 'content': 0.03743341937661171, 'timestamp': '2025-10-02 00:43:49.307340', 'step': 18442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:49.362742', 'step': 18442, 'epoch': 2}
{'type': 'loss', 'content': 0.045949243009090424, 'timestamp': '2025-10-02 00:43:49.372229', 'step': 18443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:43:49.440669', 'step': 18443, 'epoch': 2}
{'type': 'loss', 'content': 0.01300374697893858, 'timestamp': '2025-10-02 00:43:49.453421', 'step': 18444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:49.507794', 'step': 18444, 'epoch': 2}
{'type': 'loss', 'content': 0.017335936427116394, 'timestamp': '2025-10-02 00:43:49.510069', 'step': 18445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:49.564651', 'step': 18445, 'epoch': 2}
{'type': 'loss', 'content': 0.035347383469343185, 'timestamp': '2025-10-02 00:43:49.567117', 'step': 18446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:49.622808', 'step': 18446, 'epoch': 2}
{'type': 'loss', 'content': 0.02483706921339035, 'timestamp': '2025-10-02 00:43:49.625540', 'step': 18447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:49.679659', 'step': 18447, 'epoch': 2}
{'type': 'loss', 'content': 0.0404294915497303, 'timestamp': '2025-10-02 00:43:49.686030', 'step': 18448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:49.740083', 'step': 18448, 'epoch': 2}
{'type': 'loss', 'content': 0.046094153076410294, 'timestamp': '2025-10-02 00:43:49.742740', 'step': 18449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:49.797475', 'step': 18449, 'epoch': 2}
{'type': 'loss', 'content': 0.05702872574329376, 'timestamp': '2025-10-02 00:43:49.800005', 'step': 18450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:49.855364', 'step': 18450, 'epoch': 2}
{'type': 'loss', 'content': 0.06311530619859695, 'timestamp': '2025-10-02 00:43:49.860937', 'step': 18451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:43:49.919759', 'step': 18451, 'epoch': 2}
{'type': 'loss', 'content': 0.07751300185918808, 'timestamp': '2025-10-02 00:43:49.925811', 'step': 18452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:49.984641', 'step': 18452, 'epoch': 2}
{'type': 'loss', 'content': 0.02078917995095253, 'timestamp': '2025-10-02 00:43:49.995652', 'step': 18453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:50.051035', 'step': 18453, 'epoch': 2}
{'type': 'loss', 'content': 0.10109970718622208, 'timestamp': '2025-10-02 00:43:50.056562', 'step': 18454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:50.111077', 'step': 18454, 'epoch': 2}
{'type': 'loss', 'content': 0.1813407987356186, 'timestamp': '2025-10-02 00:43:50.113743', 'step': 18455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:50.168966', 'step': 18455, 'epoch': 2}
{'type': 'loss', 'content': 0.06306829303503036, 'timestamp': '2025-10-02 00:43:50.175201', 'step': 18456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:50.229467', 'step': 18456, 'epoch': 2}
{'type': 'loss', 'content': 0.024721436202526093, 'timestamp': '2025-10-02 00:43:50.235088', 'step': 18457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:50.291075', 'step': 18457, 'epoch': 2}
{'type': 'loss', 'content': 0.015302153304219246, 'timestamp': '2025-10-02 00:43:50.298298', 'step': 18458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:50.353993', 'step': 18458, 'epoch': 2}
{'type': 'loss', 'content': 0.05566056817770004, 'timestamp': '2025-10-02 00:43:50.356239', 'step': 18459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:50.410469', 'step': 18459, 'epoch': 2}
{'type': 'loss', 'content': 0.050940241664648056, 'timestamp': '2025-10-02 00:43:50.418367', 'step': 18460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:50.471998', 'step': 18460, 'epoch': 2}
{'type': 'loss', 'content': 0.029903089627623558, 'timestamp': '2025-10-02 00:43:50.474321', 'step': 18461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:50.529039', 'step': 18461, 'epoch': 2}
{'type': 'loss', 'content': 0.10239455848932266, 'timestamp': '2025-10-02 00:43:50.531520', 'step': 18462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:50.585955', 'step': 18462, 'epoch': 2}
{'type': 'loss', 'content': 0.031490348279476166, 'timestamp': '2025-10-02 00:43:50.588769', 'step': 18463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:50.643726', 'step': 18463, 'epoch': 2}
{'type': 'loss', 'content': 0.016512636095285416, 'timestamp': '2025-10-02 00:43:50.649783', 'step': 18464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:50.703963', 'step': 18464, 'epoch': 2}
{'type': 'loss', 'content': 0.02511114440858364, 'timestamp': '2025-10-02 00:43:50.706443', 'step': 18465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:50.760185', 'step': 18465, 'epoch': 2}
{'type': 'loss', 'content': 0.1182146891951561, 'timestamp': '2025-10-02 00:43:50.762774', 'step': 18466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:50.817317', 'step': 18466, 'epoch': 2}
{'type': 'loss', 'content': 0.04330764338374138, 'timestamp': '2025-10-02 00:43:50.819544', 'step': 18467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:50.873662', 'step': 18467, 'epoch': 2}
{'type': 'loss', 'content': 0.15365362167358398, 'timestamp': '2025-10-02 00:43:50.879872', 'step': 18468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:50.934499', 'step': 18468, 'epoch': 2}
{'type': 'loss', 'content': 0.03465878963470459, 'timestamp': '2025-10-02 00:43:50.936977', 'step': 18469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:43:51.004370', 'step': 18469, 'epoch': 2}
{'type': 'loss', 'content': 0.06052043288946152, 'timestamp': '2025-10-02 00:43:51.016345', 'step': 18470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:51.072169', 'step': 18470, 'epoch': 2}
{'type': 'loss', 'content': 0.05346548184752464, 'timestamp': '2025-10-02 00:43:51.081513', 'step': 18471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:51.136993', 'step': 18471, 'epoch': 2}
{'type': 'loss', 'content': 0.06397287547588348, 'timestamp': '2025-10-02 00:43:51.147289', 'step': 18472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:43:51.207765', 'step': 18472, 'epoch': 2}
{'type': 'loss', 'content': 0.03287120535969734, 'timestamp': '2025-10-02 00:43:51.219112', 'step': 18473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:51.273054', 'step': 18473, 'epoch': 2}
{'type': 'loss', 'content': 0.10856065154075623, 'timestamp': '2025-10-02 00:43:51.275499', 'step': 18474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:51.331217', 'step': 18474, 'epoch': 2}
{'type': 'loss', 'content': 0.10455648601055145, 'timestamp': '2025-10-02 00:43:51.333545', 'step': 18475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:51.387626', 'step': 18475, 'epoch': 2}
{'type': 'loss', 'content': 0.03459319472312927, 'timestamp': '2025-10-02 00:43:51.393895', 'step': 18476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:43:51.448156', 'step': 18476, 'epoch': 2}
{'type': 'loss', 'content': 0.09467750042676926, 'timestamp': '2025-10-02 00:43:51.450412', 'step': 18477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:51.504484', 'step': 18477, 'epoch': 2}
{'type': 'loss', 'content': 0.07884646952152252, 'timestamp': '2025-10-02 00:43:51.507449', 'step': 18478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:51.561666', 'step': 18478, 'epoch': 2}
{'type': 'loss', 'content': 0.014397213235497475, 'timestamp': '2025-10-02 00:43:51.577528', 'step': 18479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:51.632348', 'step': 18479, 'epoch': 2}
{'type': 'loss', 'content': 0.09115009009838104, 'timestamp': '2025-10-02 00:43:51.639947', 'step': 18480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:51.694291', 'step': 18480, 'epoch': 2}
{'type': 'loss', 'content': 0.08455865830183029, 'timestamp': '2025-10-02 00:43:51.696871', 'step': 18481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:51.751312', 'step': 18481, 'epoch': 2}
{'type': 'loss', 'content': 0.01548268087208271, 'timestamp': '2025-10-02 00:43:51.753965', 'step': 18482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:51.809428', 'step': 18482, 'epoch': 2}
{'type': 'loss', 'content': 0.0985158309340477, 'timestamp': '2025-10-02 00:43:51.812048', 'step': 18483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:51.866342', 'step': 18483, 'epoch': 2}
{'type': 'loss', 'content': 0.07283882796764374, 'timestamp': '2025-10-02 00:43:51.872346', 'step': 18484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:43:51.934064', 'step': 18484, 'epoch': 2}
{'type': 'loss', 'content': 0.05856073275208473, 'timestamp': '2025-10-02 00:43:51.945577', 'step': 18485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:43:52.022801', 'step': 18485, 'epoch': 2}
{'type': 'loss', 'content': 0.02457907237112522, 'timestamp': '2025-10-02 00:43:52.036452', 'step': 18486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:52.091683', 'step': 18486, 'epoch': 2}
{'type': 'loss', 'content': 0.030317114666104317, 'timestamp': '2025-10-02 00:43:52.094580', 'step': 18487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:52.149321', 'step': 18487, 'epoch': 2}
{'type': 'loss', 'content': 0.02505320869386196, 'timestamp': '2025-10-02 00:43:52.155225', 'step': 18488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:52.209263', 'step': 18488, 'epoch': 2}
{'type': 'loss', 'content': 0.14163699746131897, 'timestamp': '2025-10-02 00:43:52.211818', 'step': 18489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:43:52.275032', 'step': 18489, 'epoch': 2}
{'type': 'loss', 'content': 0.013086335733532906, 'timestamp': '2025-10-02 00:43:52.285656', 'step': 18490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:52.341617', 'step': 18490, 'epoch': 2}
{'type': 'loss', 'content': 0.005766328424215317, 'timestamp': '2025-10-02 00:43:52.344230', 'step': 18491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:52.398607', 'step': 18491, 'epoch': 2}
{'type': 'loss', 'content': 0.13705742359161377, 'timestamp': '2025-10-02 00:43:52.404469', 'step': 18492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:52.458773', 'step': 18492, 'epoch': 2}
{'type': 'loss', 'content': 0.06900295615196228, 'timestamp': '2025-10-02 00:43:52.461142', 'step': 18493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:52.515837', 'step': 18493, 'epoch': 2}
{'type': 'loss', 'content': 0.11273904144763947, 'timestamp': '2025-10-02 00:43:52.518364', 'step': 18494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:52.573412', 'step': 18494, 'epoch': 2}
{'type': 'loss', 'content': 0.02400105632841587, 'timestamp': '2025-10-02 00:43:52.579082', 'step': 18495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:52.634057', 'step': 18495, 'epoch': 2}
{'type': 'loss', 'content': 0.03075053170323372, 'timestamp': '2025-10-02 00:43:52.640572', 'step': 18496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:52.694714', 'step': 18496, 'epoch': 2}
{'type': 'loss', 'content': 0.07451488822698593, 'timestamp': '2025-10-02 00:43:52.697329', 'step': 18497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:52.756137', 'step': 18497, 'epoch': 2}
{'type': 'loss', 'content': 0.009881780482828617, 'timestamp': '2025-10-02 00:43:52.766308', 'step': 18498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:43:52.835900', 'step': 18498, 'epoch': 2}
{'type': 'loss', 'content': 0.046607308089733124, 'timestamp': '2025-10-02 00:43:52.848196', 'step': 18499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:52.902852', 'step': 18499, 'epoch': 2}
{'type': 'loss', 'content': 0.09909816086292267, 'timestamp': '2025-10-02 00:43:52.909162', 'step': 18500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 18500', 'timestamp': '2025-10-02 00:43:53.316362', 'step': 18500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:53.373505', 'step': 18500, 'epoch': 2}
{'type': 'loss', 'content': 0.035540904849767685, 'timestamp': '2025-10-02 00:43:53.376116', 'step': 18501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:53.433513', 'step': 18501, 'epoch': 2}
{'type': 'loss', 'content': 0.010058179497718811, 'timestamp': '2025-10-02 00:43:53.436139', 'step': 18502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:53.495576', 'step': 18502, 'epoch': 2}
{'type': 'loss', 'content': 0.05225040763616562, 'timestamp': '2025-10-02 00:43:53.498714', 'step': 18503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:43:53.554954', 'step': 18503, 'epoch': 2}
{'type': 'loss', 'content': 0.06687010079622269, 'timestamp': '2025-10-02 00:43:53.562369', 'step': 18504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:53.617384', 'step': 18504, 'epoch': 2}
{'type': 'loss', 'content': 0.030999043956398964, 'timestamp': '2025-10-02 00:43:53.620430', 'step': 18505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:53.675987', 'step': 18505, 'epoch': 2}
{'type': 'loss', 'content': 0.12238715589046478, 'timestamp': '2025-10-02 00:43:53.679357', 'step': 18506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:53.738478', 'step': 18506, 'epoch': 2}
{'type': 'loss', 'content': 0.015638627111911774, 'timestamp': '2025-10-02 00:43:53.743685', 'step': 18507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:53.799818', 'step': 18507, 'epoch': 2}
{'type': 'loss', 'content': 0.04207635670900345, 'timestamp': '2025-10-02 00:43:53.810072', 'step': 18508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:53.874333', 'step': 18508, 'epoch': 2}
{'type': 'loss', 'content': 0.030149148777127266, 'timestamp': '2025-10-02 00:43:53.877236', 'step': 18509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:43:53.940934', 'step': 18509, 'epoch': 2}
{'type': 'loss', 'content': 0.03884327784180641, 'timestamp': '2025-10-02 00:43:53.951602', 'step': 18510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:54.008049', 'step': 18510, 'epoch': 2}
{'type': 'loss', 'content': 0.10040024667978287, 'timestamp': '2025-10-02 00:43:54.010612', 'step': 18511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:54.066240', 'step': 18511, 'epoch': 2}
{'type': 'loss', 'content': 0.09350206702947617, 'timestamp': '2025-10-02 00:43:54.072796', 'step': 18512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:54.133031', 'step': 18512, 'epoch': 2}
{'type': 'loss', 'content': 0.025533728301525116, 'timestamp': '2025-10-02 00:43:54.144039', 'step': 18513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:54.200729', 'step': 18513, 'epoch': 2}
{'type': 'loss', 'content': 0.045876394957304, 'timestamp': '2025-10-02 00:43:54.203685', 'step': 18514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:54.263687', 'step': 18514, 'epoch': 2}
{'type': 'loss', 'content': 0.03404773771762848, 'timestamp': '2025-10-02 00:43:54.273863', 'step': 18515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:43:54.336033', 'step': 18515, 'epoch': 2}
{'type': 'loss', 'content': 0.04142916947603226, 'timestamp': '2025-10-02 00:43:54.347253', 'step': 18516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:54.403049', 'step': 18516, 'epoch': 2}
{'type': 'loss', 'content': 0.08933381736278534, 'timestamp': '2025-10-02 00:43:54.412219', 'step': 18517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:54.471941', 'step': 18517, 'epoch': 2}
{'type': 'loss', 'content': 0.056025877594947815, 'timestamp': '2025-10-02 00:43:54.476147', 'step': 18518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:54.533285', 'step': 18518, 'epoch': 2}
{'type': 'loss', 'content': 0.021831179037690163, 'timestamp': '2025-10-02 00:43:54.536648', 'step': 18519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:54.591983', 'step': 18519, 'epoch': 2}
{'type': 'loss', 'content': 0.08423814177513123, 'timestamp': '2025-10-02 00:43:54.598737', 'step': 18520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:54.654863', 'step': 18520, 'epoch': 2}
{'type': 'loss', 'content': 0.017678335309028625, 'timestamp': '2025-10-02 00:43:54.662184', 'step': 18521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:54.720500', 'step': 18521, 'epoch': 2}
{'type': 'loss', 'content': 0.0406821146607399, 'timestamp': '2025-10-02 00:43:54.729617', 'step': 18522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:54.784750', 'step': 18522, 'epoch': 2}
{'type': 'loss', 'content': 0.04028649255633354, 'timestamp': '2025-10-02 00:43:54.794060', 'step': 18523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:54.855125', 'step': 18523, 'epoch': 2}
{'type': 'loss', 'content': 0.019692810252308846, 'timestamp': '2025-10-02 00:43:54.866136', 'step': 18524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:54.921508', 'step': 18524, 'epoch': 2}
{'type': 'loss', 'content': 0.1088365763425827, 'timestamp': '2025-10-02 00:43:54.931794', 'step': 18525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:54.987141', 'step': 18525, 'epoch': 2}
{'type': 'loss', 'content': 0.1313188076019287, 'timestamp': '2025-10-02 00:43:54.989851', 'step': 18526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:43:55.051407', 'step': 18526, 'epoch': 2}
{'type': 'loss', 'content': 0.01882428303360939, 'timestamp': '2025-10-02 00:43:55.061918', 'step': 18527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:55.118674', 'step': 18527, 'epoch': 2}
{'type': 'loss', 'content': 0.061539147049188614, 'timestamp': '2025-10-02 00:43:55.125014', 'step': 18528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:55.179894', 'step': 18528, 'epoch': 2}
{'type': 'loss', 'content': 0.028302889317274094, 'timestamp': '2025-10-02 00:43:55.182302', 'step': 18529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:55.236925', 'step': 18529, 'epoch': 2}
{'type': 'loss', 'content': 0.01202624011784792, 'timestamp': '2025-10-02 00:43:55.239649', 'step': 18530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:55.294096', 'step': 18530, 'epoch': 2}
{'type': 'loss', 'content': 0.016808755695819855, 'timestamp': '2025-10-02 00:43:55.296244', 'step': 18531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:55.350340', 'step': 18531, 'epoch': 2}
{'type': 'loss', 'content': 0.04114939272403717, 'timestamp': '2025-10-02 00:43:55.357407', 'step': 18532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:55.410558', 'step': 18532, 'epoch': 2}
{'type': 'loss', 'content': 0.14415515959262848, 'timestamp': '2025-10-02 00:43:55.412687', 'step': 18533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:55.467708', 'step': 18533, 'epoch': 2}
{'type': 'loss', 'content': 0.048710908740758896, 'timestamp': '2025-10-02 00:43:55.473250', 'step': 18534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:55.527926', 'step': 18534, 'epoch': 2}
{'type': 'loss', 'content': 0.04341617971658707, 'timestamp': '2025-10-02 00:43:55.537030', 'step': 18535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:55.591231', 'step': 18535, 'epoch': 2}
{'type': 'loss', 'content': 0.07511692494153976, 'timestamp': '2025-10-02 00:43:55.597212', 'step': 18536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:43:55.656380', 'step': 18536, 'epoch': 2}
{'type': 'loss', 'content': 0.15975168347358704, 'timestamp': '2025-10-02 00:43:55.667668', 'step': 18537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:55.722452', 'step': 18537, 'epoch': 2}
{'type': 'loss', 'content': 0.06970880180597305, 'timestamp': '2025-10-02 00:43:55.724953', 'step': 18538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:55.784672', 'step': 18538, 'epoch': 2}
{'type': 'loss', 'content': 0.023567717522382736, 'timestamp': '2025-10-02 00:43:55.794852', 'step': 18539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:55.849962', 'step': 18539, 'epoch': 2}
{'type': 'loss', 'content': 0.07420169562101364, 'timestamp': '2025-10-02 00:43:55.856238', 'step': 18540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:55.910510', 'step': 18540, 'epoch': 2}
{'type': 'loss', 'content': 0.038642048835754395, 'timestamp': '2025-10-02 00:43:55.917770', 'step': 18541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:55.974174', 'step': 18541, 'epoch': 2}
{'type': 'loss', 'content': 0.04442816600203514, 'timestamp': '2025-10-02 00:43:55.976719', 'step': 18542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:56.030800', 'step': 18542, 'epoch': 2}
{'type': 'loss', 'content': 0.07189379632472992, 'timestamp': '2025-10-02 00:43:56.033404', 'step': 18543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:56.088068', 'step': 18543, 'epoch': 2}
{'type': 'loss', 'content': 0.06298627704381943, 'timestamp': '2025-10-02 00:43:56.094091', 'step': 18544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:56.147777', 'step': 18544, 'epoch': 2}
{'type': 'loss', 'content': 0.10868823528289795, 'timestamp': '2025-10-02 00:43:56.150289', 'step': 18545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:56.204530', 'step': 18545, 'epoch': 2}
{'type': 'loss', 'content': 0.14611977338790894, 'timestamp': '2025-10-02 00:43:56.206984', 'step': 18546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:56.260935', 'step': 18546, 'epoch': 2}
{'type': 'loss', 'content': 0.11128097772598267, 'timestamp': '2025-10-02 00:43:56.263431', 'step': 18547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:56.317783', 'step': 18547, 'epoch': 2}
{'type': 'loss', 'content': 0.022934166714549065, 'timestamp': '2025-10-02 00:43:56.327659', 'step': 18548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:56.380650', 'step': 18548, 'epoch': 2}
{'type': 'loss', 'content': 0.08052828907966614, 'timestamp': '2025-10-02 00:43:56.383021', 'step': 18549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:43:56.444742', 'step': 18549, 'epoch': 2}
{'type': 'loss', 'content': 0.04568662866950035, 'timestamp': '2025-10-02 00:43:56.455363', 'step': 18550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:56.511416', 'step': 18550, 'epoch': 2}
{'type': 'loss', 'content': 0.0641823410987854, 'timestamp': '2025-10-02 00:43:56.513777', 'step': 18551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:56.568200', 'step': 18551, 'epoch': 2}
{'type': 'loss', 'content': 0.02707580476999283, 'timestamp': '2025-10-02 00:43:56.574107', 'step': 18552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:56.628596', 'step': 18552, 'epoch': 2}
{'type': 'loss', 'content': 0.14853954315185547, 'timestamp': '2025-10-02 00:43:56.631213', 'step': 18553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:56.685289', 'step': 18553, 'epoch': 2}
{'type': 'loss', 'content': 0.03295467421412468, 'timestamp': '2025-10-02 00:43:56.687892', 'step': 18554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:56.743021', 'step': 18554, 'epoch': 2}
{'type': 'loss', 'content': 0.12945778667926788, 'timestamp': '2025-10-02 00:43:56.745485', 'step': 18555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:56.800735', 'step': 18555, 'epoch': 2}
{'type': 'loss', 'content': 0.04411612078547478, 'timestamp': '2025-10-02 00:43:56.807057', 'step': 18556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:56.861218', 'step': 18556, 'epoch': 2}
{'type': 'loss', 'content': 0.07225534319877625, 'timestamp': '2025-10-02 00:43:56.868448', 'step': 18557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:56.923135', 'step': 18557, 'epoch': 2}
{'type': 'loss', 'content': 0.04580291733145714, 'timestamp': '2025-10-02 00:43:56.925946', 'step': 18558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:56.984832', 'step': 18558, 'epoch': 2}
{'type': 'loss', 'content': 0.007816523313522339, 'timestamp': '2025-10-02 00:43:56.994997', 'step': 18559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:57.050006', 'step': 18559, 'epoch': 2}
{'type': 'loss', 'content': 0.01598949171602726, 'timestamp': '2025-10-02 00:43:57.056216', 'step': 18560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:57.109394', 'step': 18560, 'epoch': 2}
{'type': 'loss', 'content': 0.041912734508514404, 'timestamp': '2025-10-02 00:43:57.111655', 'step': 18561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:57.168570', 'step': 18561, 'epoch': 2}
{'type': 'loss', 'content': 0.024337967857718468, 'timestamp': '2025-10-02 00:43:57.178072', 'step': 18562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:57.232270', 'step': 18562, 'epoch': 2}
{'type': 'loss', 'content': 0.03849490359425545, 'timestamp': '2025-10-02 00:43:57.234509', 'step': 18563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:57.288669', 'step': 18563, 'epoch': 2}
{'type': 'loss', 'content': 0.08903630822896957, 'timestamp': '2025-10-02 00:43:57.294765', 'step': 18564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:57.348111', 'step': 18564, 'epoch': 2}
{'type': 'loss', 'content': 0.037659212946891785, 'timestamp': '2025-10-02 00:43:57.350870', 'step': 18565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:57.405373', 'step': 18565, 'epoch': 2}
{'type': 'loss', 'content': 0.02161935530602932, 'timestamp': '2025-10-02 00:43:57.414533', 'step': 18566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:57.469190', 'step': 18566, 'epoch': 2}
{'type': 'loss', 'content': 0.09370305389165878, 'timestamp': '2025-10-02 00:43:57.471869', 'step': 18567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:57.527172', 'step': 18567, 'epoch': 2}
{'type': 'loss', 'content': 0.14889152348041534, 'timestamp': '2025-10-02 00:43:57.534922', 'step': 18568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:57.593615', 'step': 18568, 'epoch': 2}
{'type': 'loss', 'content': 0.04806216433644295, 'timestamp': '2025-10-02 00:43:57.596194', 'step': 18569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:43:57.650629', 'step': 18569, 'epoch': 2}
{'type': 'loss', 'content': 0.06735199689865112, 'timestamp': '2025-10-02 00:43:57.653497', 'step': 18570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:43:57.709039', 'step': 18570, 'epoch': 2}
{'type': 'loss', 'content': 0.09606664627790451, 'timestamp': '2025-10-02 00:43:57.711488', 'step': 18571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:57.766235', 'step': 18571, 'epoch': 2}
{'type': 'loss', 'content': 0.016987135633826256, 'timestamp': '2025-10-02 00:43:57.772827', 'step': 18572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:43:57.828853', 'step': 18572, 'epoch': 2}
{'type': 'loss', 'content': 0.08668472617864609, 'timestamp': '2025-10-02 00:43:57.831025', 'step': 18573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:57.889158', 'step': 18573, 'epoch': 2}
{'type': 'loss', 'content': 0.11862273514270782, 'timestamp': '2025-10-02 00:43:57.898377', 'step': 18574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:57.958913', 'step': 18574, 'epoch': 2}
{'type': 'loss', 'content': 0.007346526253968477, 'timestamp': '2025-10-02 00:43:57.969103', 'step': 18575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:58.025923', 'step': 18575, 'epoch': 2}
{'type': 'loss', 'content': 0.1029362827539444, 'timestamp': '2025-10-02 00:43:58.031965', 'step': 18576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:58.085601', 'step': 18576, 'epoch': 2}
{'type': 'loss', 'content': 0.07997956871986389, 'timestamp': '2025-10-02 00:43:58.088129', 'step': 18577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:58.142956', 'step': 18577, 'epoch': 2}
{'type': 'loss', 'content': 0.003070300444960594, 'timestamp': '2025-10-02 00:43:58.148662', 'step': 18578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:58.203142', 'step': 18578, 'epoch': 2}
{'type': 'loss', 'content': 0.026528965681791306, 'timestamp': '2025-10-02 00:43:58.208714', 'step': 18579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:58.264054', 'step': 18579, 'epoch': 2}
{'type': 'loss', 'content': 0.05037207156419754, 'timestamp': '2025-10-02 00:43:58.270514', 'step': 18580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:43:58.340076', 'step': 18580, 'epoch': 2}
{'type': 'loss', 'content': 0.012816863134503365, 'timestamp': '2025-10-02 00:43:58.350962', 'step': 18581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:58.407941', 'step': 18581, 'epoch': 2}
{'type': 'loss', 'content': 0.03362888842821121, 'timestamp': '2025-10-02 00:43:58.417456', 'step': 18582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:58.471427', 'step': 18582, 'epoch': 2}
{'type': 'loss', 'content': 0.11622396856546402, 'timestamp': '2025-10-02 00:43:58.473865', 'step': 18583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:58.529513', 'step': 18583, 'epoch': 2}
{'type': 'loss', 'content': 0.048596642911434174, 'timestamp': '2025-10-02 00:43:58.535554', 'step': 18584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:58.591613', 'step': 18584, 'epoch': 2}
{'type': 'loss', 'content': 0.15963461995124817, 'timestamp': '2025-10-02 00:43:58.594232', 'step': 18585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:58.652015', 'step': 18585, 'epoch': 2}
{'type': 'loss', 'content': 0.038417935371398926, 'timestamp': '2025-10-02 00:43:58.661577', 'step': 18586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:58.721705', 'step': 18586, 'epoch': 2}
{'type': 'loss', 'content': 0.12826988101005554, 'timestamp': '2025-10-02 00:43:58.723887', 'step': 18587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:58.778573', 'step': 18587, 'epoch': 2}
{'type': 'loss', 'content': 0.08413896709680557, 'timestamp': '2025-10-02 00:43:58.786504', 'step': 18588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:58.851638', 'step': 18588, 'epoch': 2}
{'type': 'loss', 'content': 0.17595311999320984, 'timestamp': '2025-10-02 00:43:58.853881', 'step': 18589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:43:58.909028', 'step': 18589, 'epoch': 2}
{'type': 'loss', 'content': 0.09692791104316711, 'timestamp': '2025-10-02 00:43:58.914631', 'step': 18590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:58.968811', 'step': 18590, 'epoch': 2}
{'type': 'loss', 'content': 0.10983575135469437, 'timestamp': '2025-10-02 00:43:58.971561', 'step': 18591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:43:59.035390', 'step': 18591, 'epoch': 2}
{'type': 'loss', 'content': 0.012977847829461098, 'timestamp': '2025-10-02 00:43:59.046965', 'step': 18592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:59.101639', 'step': 18592, 'epoch': 2}
{'type': 'loss', 'content': 0.10428247600793839, 'timestamp': '2025-10-02 00:43:59.104088', 'step': 18593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:43:59.158871', 'step': 18593, 'epoch': 2}
{'type': 'loss', 'content': 0.14813017845153809, 'timestamp': '2025-10-02 00:43:59.161413', 'step': 18594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:59.216242', 'step': 18594, 'epoch': 2}
{'type': 'loss', 'content': 0.0295471902936697, 'timestamp': '2025-10-02 00:43:59.218729', 'step': 18595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:43:59.275338', 'step': 18595, 'epoch': 2}
{'type': 'loss', 'content': 0.10547348856925964, 'timestamp': '2025-10-02 00:43:59.281309', 'step': 18596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:43:59.337627', 'step': 18596, 'epoch': 2}
{'type': 'loss', 'content': 0.01139053050428629, 'timestamp': '2025-10-02 00:43:59.340173', 'step': 18597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:59.395417', 'step': 18597, 'epoch': 2}
{'type': 'loss', 'content': 0.0031460197642445564, 'timestamp': '2025-10-02 00:43:59.402740', 'step': 18598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:43:59.459229', 'step': 18598, 'epoch': 2}
{'type': 'loss', 'content': 0.003260629950091243, 'timestamp': '2025-10-02 00:43:59.468761', 'step': 18599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:43:59.524080', 'step': 18599, 'epoch': 2}
{'type': 'loss', 'content': 0.03115953505039215, 'timestamp': '2025-10-02 00:43:59.530376', 'step': 18600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:59.584495', 'step': 18600, 'epoch': 2}
{'type': 'loss', 'content': 0.053898781538009644, 'timestamp': '2025-10-02 00:43:59.586960', 'step': 18601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:43:59.641401', 'step': 18601, 'epoch': 2}
{'type': 'loss', 'content': 0.03461318835616112, 'timestamp': '2025-10-02 00:43:59.644026', 'step': 18602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:43:59.698901', 'step': 18602, 'epoch': 2}
{'type': 'loss', 'content': 0.10823772847652435, 'timestamp': '2025-10-02 00:43:59.701497', 'step': 18603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:43:59.757752', 'step': 18603, 'epoch': 2}
{'type': 'loss', 'content': 0.05351341515779495, 'timestamp': '2025-10-02 00:43:59.763670', 'step': 18604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:43:59.817927', 'step': 18604, 'epoch': 2}
{'type': 'loss', 'content': 0.02749522402882576, 'timestamp': '2025-10-02 00:43:59.825082', 'step': 18605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:43:59.879609', 'step': 18605, 'epoch': 2}
{'type': 'loss', 'content': 0.05811142921447754, 'timestamp': '2025-10-02 00:43:59.881760', 'step': 18606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:43:59.936984', 'step': 18606, 'epoch': 2}
{'type': 'loss', 'content': 0.024358974769711494, 'timestamp': '2025-10-02 00:43:59.946100', 'step': 18607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:00.000479', 'step': 18607, 'epoch': 2}
{'type': 'loss', 'content': 0.10356509685516357, 'timestamp': '2025-10-02 00:44:00.006488', 'step': 18608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:00.060776', 'step': 18608, 'epoch': 2}
{'type': 'loss', 'content': 0.05732499808073044, 'timestamp': '2025-10-02 00:44:00.063390', 'step': 18609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:00.125092', 'step': 18609, 'epoch': 2}
{'type': 'loss', 'content': 0.07170794159173965, 'timestamp': '2025-10-02 00:44:00.135682', 'step': 18610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:00.190521', 'step': 18610, 'epoch': 2}
{'type': 'loss', 'content': 0.1049305871129036, 'timestamp': '2025-10-02 00:44:00.192884', 'step': 18611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:00.247172', 'step': 18611, 'epoch': 2}
{'type': 'loss', 'content': 0.1051437109708786, 'timestamp': '2025-10-02 00:44:00.253305', 'step': 18612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:00.310614', 'step': 18612, 'epoch': 2}
{'type': 'loss', 'content': 0.026800192892551422, 'timestamp': '2025-10-02 00:44:00.321555', 'step': 18613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:00.376642', 'step': 18613, 'epoch': 2}
{'type': 'loss', 'content': 0.006768274120986462, 'timestamp': '2025-10-02 00:44:00.379257', 'step': 18614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:44:00.449905', 'step': 18614, 'epoch': 2}
{'type': 'loss', 'content': 0.022319631651043892, 'timestamp': '2025-10-02 00:44:00.462541', 'step': 18615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:00.518359', 'step': 18615, 'epoch': 2}
{'type': 'loss', 'content': 0.09833754599094391, 'timestamp': '2025-10-02 00:44:00.524808', 'step': 18616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:00.578347', 'step': 18616, 'epoch': 2}
{'type': 'loss', 'content': 0.05512106791138649, 'timestamp': '2025-10-02 00:44:00.580723', 'step': 18617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:00.640414', 'step': 18617, 'epoch': 2}
{'type': 'loss', 'content': 0.01562581956386566, 'timestamp': '2025-10-02 00:44:00.650592', 'step': 18618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:44:00.713068', 'step': 18618, 'epoch': 2}
{'type': 'loss', 'content': 0.04584367200732231, 'timestamp': '2025-10-02 00:44:00.723874', 'step': 18619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:00.779341', 'step': 18619, 'epoch': 2}
{'type': 'loss', 'content': 0.03990394249558449, 'timestamp': '2025-10-02 00:44:00.785369', 'step': 18620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:00.839606', 'step': 18620, 'epoch': 2}
{'type': 'loss', 'content': 0.018876226618885994, 'timestamp': '2025-10-02 00:44:00.846860', 'step': 18621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:00.903172', 'step': 18621, 'epoch': 2}
{'type': 'loss', 'content': 0.04506240040063858, 'timestamp': '2025-10-02 00:44:00.905580', 'step': 18622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:00.960444', 'step': 18622, 'epoch': 2}
{'type': 'loss', 'content': 0.030998405069112778, 'timestamp': '2025-10-02 00:44:00.967610', 'step': 18623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:44:01.041752', 'step': 18623, 'epoch': 2}
{'type': 'loss', 'content': 0.02331637404859066, 'timestamp': '2025-10-02 00:44:01.055696', 'step': 18624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:01.110083', 'step': 18624, 'epoch': 2}
{'type': 'loss', 'content': 0.0761968344449997, 'timestamp': '2025-10-02 00:44:01.112903', 'step': 18625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:01.168435', 'step': 18625, 'epoch': 2}
{'type': 'loss', 'content': 0.03789860010147095, 'timestamp': '2025-10-02 00:44:01.170784', 'step': 18626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:01.225255', 'step': 18626, 'epoch': 2}
{'type': 'loss', 'content': 0.053152333945035934, 'timestamp': '2025-10-02 00:44:01.227807', 'step': 18627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:01.284060', 'step': 18627, 'epoch': 2}
{'type': 'loss', 'content': 0.10122521221637726, 'timestamp': '2025-10-02 00:44:01.294369', 'step': 18628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:01.348337', 'step': 18628, 'epoch': 2}
{'type': 'loss', 'content': 0.106241375207901, 'timestamp': '2025-10-02 00:44:01.350820', 'step': 18629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:44:01.404962', 'step': 18629, 'epoch': 2}
{'type': 'loss', 'content': 0.122590571641922, 'timestamp': '2025-10-02 00:44:01.407288', 'step': 18630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:01.461870', 'step': 18630, 'epoch': 2}
{'type': 'loss', 'content': 0.015409684740006924, 'timestamp': '2025-10-02 00:44:01.467347', 'step': 18631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:01.522164', 'step': 18631, 'epoch': 2}
{'type': 'loss', 'content': 0.10599850863218307, 'timestamp': '2025-10-02 00:44:01.527956', 'step': 18632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:01.582564', 'step': 18632, 'epoch': 2}
{'type': 'loss', 'content': 0.03265347331762314, 'timestamp': '2025-10-02 00:44:01.591699', 'step': 18633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:01.646457', 'step': 18633, 'epoch': 2}
{'type': 'loss', 'content': 0.05962357670068741, 'timestamp': '2025-10-02 00:44:01.648508', 'step': 18634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:01.703574', 'step': 18634, 'epoch': 2}
{'type': 'loss', 'content': 0.03794631361961365, 'timestamp': '2025-10-02 00:44:01.708955', 'step': 18635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:01.764994', 'step': 18635, 'epoch': 2}
{'type': 'loss', 'content': 0.08590386807918549, 'timestamp': '2025-10-02 00:44:01.775295', 'step': 18636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:44:01.847260', 'step': 18636, 'epoch': 2}
{'type': 'loss', 'content': 0.017591198906302452, 'timestamp': '2025-10-02 00:44:01.861614', 'step': 18637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:44:01.929852', 'step': 18637, 'epoch': 2}
{'type': 'loss', 'content': 0.0071548274718225, 'timestamp': '2025-10-02 00:44:01.941846', 'step': 18638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:02.005084', 'step': 18638, 'epoch': 2}
{'type': 'loss', 'content': 0.09193883836269379, 'timestamp': '2025-10-02 00:44:02.014527', 'step': 18639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:02.069619', 'step': 18639, 'epoch': 2}
{'type': 'loss', 'content': 0.14276154339313507, 'timestamp': '2025-10-02 00:44:02.075673', 'step': 18640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:02.129948', 'step': 18640, 'epoch': 2}
{'type': 'loss', 'content': 0.04849645122885704, 'timestamp': '2025-10-02 00:44:02.132518', 'step': 18641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:02.186950', 'step': 18641, 'epoch': 2}
{'type': 'loss', 'content': 0.15576094388961792, 'timestamp': '2025-10-02 00:44:02.189355', 'step': 18642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:02.245103', 'step': 18642, 'epoch': 2}
{'type': 'loss', 'content': 0.08338820934295654, 'timestamp': '2025-10-02 00:44:02.247684', 'step': 18643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:02.302828', 'step': 18643, 'epoch': 2}
{'type': 'loss', 'content': 0.045343831181526184, 'timestamp': '2025-10-02 00:44:02.308797', 'step': 18644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:02.362657', 'step': 18644, 'epoch': 2}
{'type': 'loss', 'content': 0.033634673804044724, 'timestamp': '2025-10-02 00:44:02.370086', 'step': 18645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:02.424499', 'step': 18645, 'epoch': 2}
{'type': 'loss', 'content': 0.0644366517663002, 'timestamp': '2025-10-02 00:44:02.426955', 'step': 18646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:02.482223', 'step': 18646, 'epoch': 2}
{'type': 'loss', 'content': 0.17642870545387268, 'timestamp': '2025-10-02 00:44:02.484363', 'step': 18647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:02.539435', 'step': 18647, 'epoch': 2}
{'type': 'loss', 'content': 0.0389774851500988, 'timestamp': '2025-10-02 00:44:02.545393', 'step': 18648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:02.600032', 'step': 18648, 'epoch': 2}
{'type': 'loss', 'content': 0.05958184599876404, 'timestamp': '2025-10-02 00:44:02.602591', 'step': 18649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:02.657221', 'step': 18649, 'epoch': 2}
{'type': 'loss', 'content': 0.1263297200202942, 'timestamp': '2025-10-02 00:44:02.659826', 'step': 18650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:02.715333', 'step': 18650, 'epoch': 2}
{'type': 'loss', 'content': 0.07502663135528564, 'timestamp': '2025-10-02 00:44:02.718368', 'step': 18651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:02.773872', 'step': 18651, 'epoch': 2}
{'type': 'loss', 'content': 0.12337522208690643, 'timestamp': '2025-10-02 00:44:02.780205', 'step': 18652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:02.836173', 'step': 18652, 'epoch': 2}
{'type': 'loss', 'content': 0.01935894787311554, 'timestamp': '2025-10-02 00:44:02.838482', 'step': 18653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:02.893385', 'step': 18653, 'epoch': 2}
{'type': 'loss', 'content': 0.08587078750133514, 'timestamp': '2025-10-02 00:44:02.900488', 'step': 18654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:02.956202', 'step': 18654, 'epoch': 2}
{'type': 'loss', 'content': 0.09272285550832748, 'timestamp': '2025-10-02 00:44:02.958332', 'step': 18655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:03.012920', 'step': 18655, 'epoch': 2}
{'type': 'loss', 'content': 0.08244780451059341, 'timestamp': '2025-10-02 00:44:03.019165', 'step': 18656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:03.073962', 'step': 18656, 'epoch': 2}
{'type': 'loss', 'content': 0.031395021826028824, 'timestamp': '2025-10-02 00:44:03.076518', 'step': 18657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:03.133095', 'step': 18657, 'epoch': 2}
{'type': 'loss', 'content': 0.020513392984867096, 'timestamp': '2025-10-02 00:44:03.138641', 'step': 18658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:03.197147', 'step': 18658, 'epoch': 2}
{'type': 'loss', 'content': 0.04915013536810875, 'timestamp': '2025-10-02 00:44:03.202569', 'step': 18659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:03.260221', 'step': 18659, 'epoch': 2}
{'type': 'loss', 'content': 0.05510209873318672, 'timestamp': '2025-10-02 00:44:03.265892', 'step': 18660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:44:03.338607', 'step': 18660, 'epoch': 2}
{'type': 'loss', 'content': 0.027131197974085808, 'timestamp': '2025-10-02 00:44:03.353236', 'step': 18661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:03.408445', 'step': 18661, 'epoch': 2}
{'type': 'loss', 'content': 0.049984462559223175, 'timestamp': '2025-10-02 00:44:03.411712', 'step': 18662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:03.469107', 'step': 18662, 'epoch': 2}
{'type': 'loss', 'content': 0.025993678718805313, 'timestamp': '2025-10-02 00:44:03.472851', 'step': 18663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:03.531390', 'step': 18663, 'epoch': 2}
{'type': 'loss', 'content': 0.33462122082710266, 'timestamp': '2025-10-02 00:44:03.537519', 'step': 18664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:03.593838', 'step': 18664, 'epoch': 2}
{'type': 'loss', 'content': 0.039292044937610626, 'timestamp': '2025-10-02 00:44:03.596954', 'step': 18665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:03.655080', 'step': 18665, 'epoch': 2}
{'type': 'loss', 'content': 0.0292502474039793, 'timestamp': '2025-10-02 00:44:03.658814', 'step': 18666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:44:03.734837', 'step': 18666, 'epoch': 2}
{'type': 'loss', 'content': 0.00497876200824976, 'timestamp': '2025-10-02 00:44:03.747169', 'step': 18667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:03.803855', 'step': 18667, 'epoch': 2}
{'type': 'loss', 'content': 0.052623726427555084, 'timestamp': '2025-10-02 00:44:03.810479', 'step': 18668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:03.868182', 'step': 18668, 'epoch': 2}
{'type': 'loss', 'content': 0.0616031251847744, 'timestamp': '2025-10-02 00:44:03.871380', 'step': 18669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:03.928583', 'step': 18669, 'epoch': 2}
{'type': 'loss', 'content': 0.10961493849754333, 'timestamp': '2025-10-02 00:44:03.931227', 'step': 18670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:03.995441', 'step': 18670, 'epoch': 2}
{'type': 'loss', 'content': 0.005304416175931692, 'timestamp': '2025-10-02 00:44:04.006058', 'step': 18671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:04.066379', 'step': 18671, 'epoch': 2}
{'type': 'loss', 'content': 0.016162008047103882, 'timestamp': '2025-10-02 00:44:04.073019', 'step': 18672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:04.126842', 'step': 18672, 'epoch': 2}
{'type': 'loss', 'content': 0.05441676080226898, 'timestamp': '2025-10-02 00:44:04.129242', 'step': 18673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:04.184472', 'step': 18673, 'epoch': 2}
{'type': 'loss', 'content': 0.07795659452676773, 'timestamp': '2025-10-02 00:44:04.187248', 'step': 18674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:04.243304', 'step': 18674, 'epoch': 2}
{'type': 'loss', 'content': 0.046793438494205475, 'timestamp': '2025-10-02 00:44:04.250356', 'step': 18675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:04.306958', 'step': 18675, 'epoch': 2}
{'type': 'loss', 'content': 0.20444193482398987, 'timestamp': '2025-10-02 00:44:04.313132', 'step': 18676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:04.370904', 'step': 18676, 'epoch': 2}
{'type': 'loss', 'content': 0.13783489167690277, 'timestamp': '2025-10-02 00:44:04.373673', 'step': 18677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:04.429224', 'step': 18677, 'epoch': 2}
{'type': 'loss', 'content': 0.05798676609992981, 'timestamp': '2025-10-02 00:44:04.431654', 'step': 18678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:04.489163', 'step': 18678, 'epoch': 2}
{'type': 'loss', 'content': 0.0629466325044632, 'timestamp': '2025-10-02 00:44:04.492609', 'step': 18679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:04.549490', 'step': 18679, 'epoch': 2}
{'type': 'loss', 'content': 0.06131521984934807, 'timestamp': '2025-10-02 00:44:04.556529', 'step': 18680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:04.614779', 'step': 18680, 'epoch': 2}
{'type': 'loss', 'content': 0.18637381494045258, 'timestamp': '2025-10-02 00:44:04.617904', 'step': 18681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:04.675625', 'step': 18681, 'epoch': 2}
{'type': 'loss', 'content': 0.0814402773976326, 'timestamp': '2025-10-02 00:44:04.679127', 'step': 18682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:04.736623', 'step': 18682, 'epoch': 2}
{'type': 'loss', 'content': 0.04280634596943855, 'timestamp': '2025-10-02 00:44:04.743793', 'step': 18683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:04.800709', 'step': 18683, 'epoch': 2}
{'type': 'loss', 'content': 0.037527937442064285, 'timestamp': '2025-10-02 00:44:04.807126', 'step': 18684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:04.862552', 'step': 18684, 'epoch': 2}
{'type': 'loss', 'content': 0.07213909178972244, 'timestamp': '2025-10-02 00:44:04.872732', 'step': 18685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:04.931680', 'step': 18685, 'epoch': 2}
{'type': 'loss', 'content': 0.09897720068693161, 'timestamp': '2025-10-02 00:44:04.934534', 'step': 18686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:04.992889', 'step': 18686, 'epoch': 2}
{'type': 'loss', 'content': 0.08673758804798126, 'timestamp': '2025-10-02 00:44:04.998307', 'step': 18687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:05.055089', 'step': 18687, 'epoch': 2}
{'type': 'loss', 'content': 0.036949798464775085, 'timestamp': '2025-10-02 00:44:05.061453', 'step': 18688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:44:05.123535', 'step': 18688, 'epoch': 2}
{'type': 'loss', 'content': 0.08067280799150467, 'timestamp': '2025-10-02 00:44:05.134854', 'step': 18689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:05.196716', 'step': 18689, 'epoch': 2}
{'type': 'loss', 'content': 0.032643873244524, 'timestamp': '2025-10-02 00:44:05.206869', 'step': 18690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:05.262685', 'step': 18690, 'epoch': 2}
{'type': 'loss', 'content': 0.05015063285827637, 'timestamp': '2025-10-02 00:44:05.265707', 'step': 18691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:05.322271', 'step': 18691, 'epoch': 2}
{'type': 'loss', 'content': 0.059869181364774704, 'timestamp': '2025-10-02 00:44:05.328566', 'step': 18692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:05.385621', 'step': 18692, 'epoch': 2}
{'type': 'loss', 'content': 0.10958440601825714, 'timestamp': '2025-10-02 00:44:05.388303', 'step': 18693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:05.445075', 'step': 18693, 'epoch': 2}
{'type': 'loss', 'content': 0.055657096207141876, 'timestamp': '2025-10-02 00:44:05.450547', 'step': 18694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:05.508758', 'step': 18694, 'epoch': 2}
{'type': 'loss', 'content': 0.031731296330690384, 'timestamp': '2025-10-02 00:44:05.518289', 'step': 18695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:05.573795', 'step': 18695, 'epoch': 2}
{'type': 'loss', 'content': 0.0457124225795269, 'timestamp': '2025-10-02 00:44:05.581492', 'step': 18696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:05.637000', 'step': 18696, 'epoch': 2}
{'type': 'loss', 'content': 0.0950108990073204, 'timestamp': '2025-10-02 00:44:05.640490', 'step': 18697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:05.694839', 'step': 18697, 'epoch': 2}
{'type': 'loss', 'content': 0.14494669437408447, 'timestamp': '2025-10-02 00:44:05.697283', 'step': 18698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:05.752678', 'step': 18698, 'epoch': 2}
{'type': 'loss', 'content': 0.03770255297422409, 'timestamp': '2025-10-02 00:44:05.755268', 'step': 18699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:05.810946', 'step': 18699, 'epoch': 2}
{'type': 'loss', 'content': 0.03374447301030159, 'timestamp': '2025-10-02 00:44:05.818895', 'step': 18700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:05.873674', 'step': 18700, 'epoch': 2}
{'type': 'loss', 'content': 0.03997263312339783, 'timestamp': '2025-10-02 00:44:05.879069', 'step': 18701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:05.933668', 'step': 18701, 'epoch': 2}
{'type': 'loss', 'content': 0.019344262778759003, 'timestamp': '2025-10-02 00:44:05.942825', 'step': 18702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:05.999841', 'step': 18702, 'epoch': 2}
{'type': 'loss', 'content': 0.020455943420529366, 'timestamp': '2025-10-02 00:44:06.001625', 'step': 18703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:06.056000', 'step': 18703, 'epoch': 2}
{'type': 'loss', 'content': 0.07331149280071259, 'timestamp': '2025-10-02 00:44:06.061915', 'step': 18704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:06.116690', 'step': 18704, 'epoch': 2}
{'type': 'loss', 'content': 0.05345740169286728, 'timestamp': '2025-10-02 00:44:06.118739', 'step': 18705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:06.174180', 'step': 18705, 'epoch': 2}
{'type': 'loss', 'content': 0.1581054925918579, 'timestamp': '2025-10-02 00:44:06.176045', 'step': 18706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:06.230061', 'step': 18706, 'epoch': 2}
{'type': 'loss', 'content': 0.20932897925376892, 'timestamp': '2025-10-02 00:44:06.232191', 'step': 18707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:06.287696', 'step': 18707, 'epoch': 2}
{'type': 'loss', 'content': 0.06615681946277618, 'timestamp': '2025-10-02 00:44:06.293387', 'step': 18708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:06.347825', 'step': 18708, 'epoch': 2}
{'type': 'loss', 'content': 0.04120466858148575, 'timestamp': '2025-10-02 00:44:06.357120', 'step': 18709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:06.413978', 'step': 18709, 'epoch': 2}
{'type': 'loss', 'content': 0.0019428699743002653, 'timestamp': '2025-10-02 00:44:06.416295', 'step': 18710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:44:06.477907', 'step': 18710, 'epoch': 2}
{'type': 'loss', 'content': 0.059213753789663315, 'timestamp': '2025-10-02 00:44:06.480322', 'step': 18711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:06.535859', 'step': 18711, 'epoch': 2}
{'type': 'loss', 'content': 0.07516912370920181, 'timestamp': '2025-10-02 00:44:06.541838', 'step': 18712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:06.596266', 'step': 18712, 'epoch': 2}
{'type': 'loss', 'content': 0.058597564697265625, 'timestamp': '2025-10-02 00:44:06.603377', 'step': 18713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:06.658047', 'step': 18713, 'epoch': 2}
{'type': 'loss', 'content': 0.07538650929927826, 'timestamp': '2025-10-02 00:44:06.660108', 'step': 18714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:06.715623', 'step': 18714, 'epoch': 2}
{'type': 'loss', 'content': 0.07742921262979507, 'timestamp': '2025-10-02 00:44:06.717902', 'step': 18715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:06.777941', 'step': 18715, 'epoch': 2}
{'type': 'loss', 'content': 0.02318982034921646, 'timestamp': '2025-10-02 00:44:06.788942', 'step': 18716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:06.843243', 'step': 18716, 'epoch': 2}
{'type': 'loss', 'content': 0.1421940177679062, 'timestamp': '2025-10-02 00:44:06.845565', 'step': 18717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:06.899614', 'step': 18717, 'epoch': 2}
{'type': 'loss', 'content': 0.07872384786605835, 'timestamp': '2025-10-02 00:44:06.902119', 'step': 18718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:06.957869', 'step': 18718, 'epoch': 2}
{'type': 'loss', 'content': 0.11049734055995941, 'timestamp': '2025-10-02 00:44:06.960232', 'step': 18719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:07.015034', 'step': 18719, 'epoch': 2}
{'type': 'loss', 'content': 0.08805352449417114, 'timestamp': '2025-10-02 00:44:07.020553', 'step': 18720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:07.074939', 'step': 18720, 'epoch': 2}
{'type': 'loss', 'content': 0.04237697646021843, 'timestamp': '2025-10-02 00:44:07.085113', 'step': 18721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:07.140518', 'step': 18721, 'epoch': 2}
{'type': 'loss', 'content': 0.007838554680347443, 'timestamp': '2025-10-02 00:44:07.149408', 'step': 18722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:07.204417', 'step': 18722, 'epoch': 2}
{'type': 'loss', 'content': 0.08120513707399368, 'timestamp': '2025-10-02 00:44:07.206597', 'step': 18723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:07.261486', 'step': 18723, 'epoch': 2}
{'type': 'loss', 'content': 0.06805430352687836, 'timestamp': '2025-10-02 00:44:07.267360', 'step': 18724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:07.325038', 'step': 18724, 'epoch': 2}
{'type': 'loss', 'content': 0.041657835245132446, 'timestamp': '2025-10-02 00:44:07.336009', 'step': 18725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:07.398318', 'step': 18725, 'epoch': 2}
{'type': 'loss', 'content': 0.04301687702536583, 'timestamp': '2025-10-02 00:44:07.408897', 'step': 18726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:07.464175', 'step': 18726, 'epoch': 2}
{'type': 'loss', 'content': 0.0480976365506649, 'timestamp': '2025-10-02 00:44:07.466072', 'step': 18727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:07.521125', 'step': 18727, 'epoch': 2}
{'type': 'loss', 'content': 0.032982539385557175, 'timestamp': '2025-10-02 00:44:07.526939', 'step': 18728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:07.587726', 'step': 18728, 'epoch': 2}
{'type': 'loss', 'content': 0.0329156294465065, 'timestamp': '2025-10-02 00:44:07.599106', 'step': 18729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:07.653881', 'step': 18729, 'epoch': 2}
{'type': 'loss', 'content': 0.03031005710363388, 'timestamp': '2025-10-02 00:44:07.660862', 'step': 18730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:07.716059', 'step': 18730, 'epoch': 2}
{'type': 'loss', 'content': 0.042415011674165726, 'timestamp': '2025-10-02 00:44:07.718503', 'step': 18731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:07.773513', 'step': 18731, 'epoch': 2}
{'type': 'loss', 'content': 0.11302928626537323, 'timestamp': '2025-10-02 00:44:07.779423', 'step': 18732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:07.834027', 'step': 18732, 'epoch': 2}
{'type': 'loss', 'content': 0.07780544459819794, 'timestamp': '2025-10-02 00:44:07.836426', 'step': 18733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:44:07.898279', 'step': 18733, 'epoch': 2}
{'type': 'loss', 'content': 0.018997620791196823, 'timestamp': '2025-10-02 00:44:07.908731', 'step': 18734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:07.964133', 'step': 18734, 'epoch': 2}
{'type': 'loss', 'content': 0.0815824419260025, 'timestamp': '2025-10-02 00:44:07.966517', 'step': 18735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:08.023221', 'step': 18735, 'epoch': 2}
{'type': 'loss', 'content': 0.016184836626052856, 'timestamp': '2025-10-02 00:44:08.029165', 'step': 18736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:08.083799', 'step': 18736, 'epoch': 2}
{'type': 'loss', 'content': 0.06649076193571091, 'timestamp': '2025-10-02 00:44:08.085791', 'step': 18737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:08.141694', 'step': 18737, 'epoch': 2}
{'type': 'loss', 'content': 0.041798219084739685, 'timestamp': '2025-10-02 00:44:08.143916', 'step': 18738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:08.199064', 'step': 18738, 'epoch': 2}
{'type': 'loss', 'content': 0.201407790184021, 'timestamp': '2025-10-02 00:44:08.202124', 'step': 18739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:08.257813', 'step': 18739, 'epoch': 2}
{'type': 'loss', 'content': 0.11197730898857117, 'timestamp': '2025-10-02 00:44:08.263716', 'step': 18740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:08.317784', 'step': 18740, 'epoch': 2}
{'type': 'loss', 'content': 0.023624537512660027, 'timestamp': '2025-10-02 00:44:08.326824', 'step': 18741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:08.381838', 'step': 18741, 'epoch': 2}
{'type': 'loss', 'content': 0.06831279397010803, 'timestamp': '2025-10-02 00:44:08.387483', 'step': 18742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:08.443426', 'step': 18742, 'epoch': 2}
{'type': 'loss', 'content': 0.04165009409189224, 'timestamp': '2025-10-02 00:44:08.445806', 'step': 18743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:08.501003', 'step': 18743, 'epoch': 2}
{'type': 'loss', 'content': 0.1577991247177124, 'timestamp': '2025-10-02 00:44:08.506628', 'step': 18744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:08.567865', 'step': 18744, 'epoch': 2}
{'type': 'loss', 'content': 0.04143195599317551, 'timestamp': '2025-10-02 00:44:08.579401', 'step': 18745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:08.634357', 'step': 18745, 'epoch': 2}
{'type': 'loss', 'content': 0.04113655537366867, 'timestamp': '2025-10-02 00:44:08.636692', 'step': 18746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:08.691899', 'step': 18746, 'epoch': 2}
{'type': 'loss', 'content': 0.09256933629512787, 'timestamp': '2025-10-02 00:44:08.694292', 'step': 18747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:08.749018', 'step': 18747, 'epoch': 2}
{'type': 'loss', 'content': 0.08603311330080032, 'timestamp': '2025-10-02 00:44:08.755015', 'step': 18748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:08.809249', 'step': 18748, 'epoch': 2}
{'type': 'loss', 'content': 0.0353691503405571, 'timestamp': '2025-10-02 00:44:08.811580', 'step': 18749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:08.865805', 'step': 18749, 'epoch': 2}
{'type': 'loss', 'content': 0.04885947331786156, 'timestamp': '2025-10-02 00:44:08.871364', 'step': 18750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:08.926893', 'step': 18750, 'epoch': 2}
{'type': 'loss', 'content': 0.07537060230970383, 'timestamp': '2025-10-02 00:44:08.932349', 'step': 18751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:08.988962', 'step': 18751, 'epoch': 2}
{'type': 'loss', 'content': 0.020678387954831123, 'timestamp': '2025-10-02 00:44:08.999368', 'step': 18752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:09.053882', 'step': 18752, 'epoch': 2}
{'type': 'loss', 'content': 0.036799512803554535, 'timestamp': '2025-10-02 00:44:09.055925', 'step': 18753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:09.111304', 'step': 18753, 'epoch': 2}
{'type': 'loss', 'content': 0.06932521611452103, 'timestamp': '2025-10-02 00:44:09.116741', 'step': 18754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:09.172136', 'step': 18754, 'epoch': 2}
{'type': 'loss', 'content': 0.059837594628334045, 'timestamp': '2025-10-02 00:44:09.174616', 'step': 18755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:09.230450', 'step': 18755, 'epoch': 2}
{'type': 'loss', 'content': 0.017062269151210785, 'timestamp': '2025-10-02 00:44:09.236886', 'step': 18756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:09.295478', 'step': 18756, 'epoch': 2}
{'type': 'loss', 'content': 0.06072076037526131, 'timestamp': '2025-10-02 00:44:09.306466', 'step': 18757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:09.361668', 'step': 18757, 'epoch': 2}
{'type': 'loss', 'content': 0.031505290418863297, 'timestamp': '2025-10-02 00:44:09.363927', 'step': 18758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:09.418851', 'step': 18758, 'epoch': 2}
{'type': 'loss', 'content': 0.052976589649915695, 'timestamp': '2025-10-02 00:44:09.424168', 'step': 18759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:09.479456', 'step': 18759, 'epoch': 2}
{'type': 'loss', 'content': 0.048386771231889725, 'timestamp': '2025-10-02 00:44:09.485008', 'step': 18760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:09.539496', 'step': 18760, 'epoch': 2}
{'type': 'loss', 'content': 0.25519269704818726, 'timestamp': '2025-10-02 00:44:09.541666', 'step': 18761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:09.596780', 'step': 18761, 'epoch': 2}
{'type': 'loss', 'content': 0.021044449880719185, 'timestamp': '2025-10-02 00:44:09.602054', 'step': 18762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:09.658197', 'step': 18762, 'epoch': 2}
{'type': 'loss', 'content': 0.0791378989815712, 'timestamp': '2025-10-02 00:44:09.660561', 'step': 18763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:09.715899', 'step': 18763, 'epoch': 2}
{'type': 'loss', 'content': 0.01952992007136345, 'timestamp': '2025-10-02 00:44:09.722156', 'step': 18764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:09.777312', 'step': 18764, 'epoch': 2}
{'type': 'loss', 'content': 0.0661395713686943, 'timestamp': '2025-10-02 00:44:09.779541', 'step': 18765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:09.834379', 'step': 18765, 'epoch': 2}
{'type': 'loss', 'content': 0.05613561347126961, 'timestamp': '2025-10-02 00:44:09.836661', 'step': 18766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:09.893847', 'step': 18766, 'epoch': 2}
{'type': 'loss', 'content': 0.012308264151215553, 'timestamp': '2025-10-02 00:44:09.899273', 'step': 18767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:09.954536', 'step': 18767, 'epoch': 2}
{'type': 'loss', 'content': 0.1775742769241333, 'timestamp': '2025-10-02 00:44:09.960090', 'step': 18768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:10.014726', 'step': 18768, 'epoch': 2}
{'type': 'loss', 'content': 0.09989774972200394, 'timestamp': '2025-10-02 00:44:10.016670', 'step': 18769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:10.072553', 'step': 18769, 'epoch': 2}
{'type': 'loss', 'content': 0.0343148373067379, 'timestamp': '2025-10-02 00:44:10.074950', 'step': 18770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:10.130678', 'step': 18770, 'epoch': 2}
{'type': 'loss', 'content': 0.008226967416703701, 'timestamp': '2025-10-02 00:44:10.137633', 'step': 18771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:10.194115', 'step': 18771, 'epoch': 2}
{'type': 'loss', 'content': 0.03860769048333168, 'timestamp': '2025-10-02 00:44:10.200412', 'step': 18772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:10.254779', 'step': 18772, 'epoch': 2}
{'type': 'loss', 'content': 0.05332038179039955, 'timestamp': '2025-10-02 00:44:10.257392', 'step': 18773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:10.313692', 'step': 18773, 'epoch': 2}
{'type': 'loss', 'content': 0.051266055554151535, 'timestamp': '2025-10-02 00:44:10.316402', 'step': 18774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:10.371728', 'step': 18774, 'epoch': 2}
{'type': 'loss', 'content': 0.021365266293287277, 'timestamp': '2025-10-02 00:44:10.377296', 'step': 18775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:10.431829', 'step': 18775, 'epoch': 2}
{'type': 'loss', 'content': 0.10649711638689041, 'timestamp': '2025-10-02 00:44:10.438063', 'step': 18776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:10.493387', 'step': 18776, 'epoch': 2}
{'type': 'loss', 'content': 0.0976128950715065, 'timestamp': '2025-10-02 00:44:10.495658', 'step': 18777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:10.552783', 'step': 18777, 'epoch': 2}
{'type': 'loss', 'content': 0.07760697603225708, 'timestamp': '2025-10-02 00:44:10.559019', 'step': 18778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:10.619677', 'step': 18778, 'epoch': 2}
{'type': 'loss', 'content': 0.10537517815828323, 'timestamp': '2025-10-02 00:44:10.622120', 'step': 18779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:10.677810', 'step': 18779, 'epoch': 2}
{'type': 'loss', 'content': 0.005932126194238663, 'timestamp': '2025-10-02 00:44:10.683868', 'step': 18780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:10.737647', 'step': 18780, 'epoch': 2}
{'type': 'loss', 'content': 0.07487157732248306, 'timestamp': '2025-10-02 00:44:10.740023', 'step': 18781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:44:10.804622', 'step': 18781, 'epoch': 2}
{'type': 'loss', 'content': 0.0276656337082386, 'timestamp': '2025-10-02 00:44:10.815422', 'step': 18782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:10.871539', 'step': 18782, 'epoch': 2}
{'type': 'loss', 'content': 0.03216075152158737, 'timestamp': '2025-10-02 00:44:10.873543', 'step': 18783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:10.927744', 'step': 18783, 'epoch': 2}
{'type': 'loss', 'content': 0.09481427073478699, 'timestamp': '2025-10-02 00:44:10.934299', 'step': 18784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:10.990867', 'step': 18784, 'epoch': 2}
{'type': 'loss', 'content': 0.03994474187493324, 'timestamp': '2025-10-02 00:44:11.001098', 'step': 18785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:11.057616', 'step': 18785, 'epoch': 2}
{'type': 'loss', 'content': 0.10974565148353577, 'timestamp': '2025-10-02 00:44:11.062764', 'step': 18786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:11.125087', 'step': 18786, 'epoch': 2}
{'type': 'loss', 'content': 0.03171340003609657, 'timestamp': '2025-10-02 00:44:11.135272', 'step': 18787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:11.193350', 'step': 18787, 'epoch': 2}
{'type': 'loss', 'content': 0.13775748014450073, 'timestamp': '2025-10-02 00:44:11.199350', 'step': 18788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:11.254111', 'step': 18788, 'epoch': 2}
{'type': 'loss', 'content': 0.012227118015289307, 'timestamp': '2025-10-02 00:44:11.259687', 'step': 18789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:11.314699', 'step': 18789, 'epoch': 2}
{'type': 'loss', 'content': 0.03601511940360069, 'timestamp': '2025-10-02 00:44:11.320189', 'step': 18790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:11.375679', 'step': 18790, 'epoch': 2}
{'type': 'loss', 'content': 0.0026322598569095135, 'timestamp': '2025-10-02 00:44:11.377934', 'step': 18791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:11.432830', 'step': 18791, 'epoch': 2}
{'type': 'loss', 'content': 0.06983613222837448, 'timestamp': '2025-10-02 00:44:11.438994', 'step': 18792, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:44:38.753781', 'step': 18792, 'epoch': 2}
{'type': 'pplx', 'content': 87.78403749270308, 'timestamp': '2025-10-02 00:44:38.761370', 'step': 18792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:38.822293', 'step': 18792, 'epoch': 2}
{'type': 'loss', 'content': 0.025586765259504318, 'timestamp': '2025-10-02 00:44:38.825706', 'step': 18793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:38.890196', 'step': 18793, 'epoch': 2}
{'type': 'loss', 'content': 0.07912987470626831, 'timestamp': '2025-10-02 00:44:38.894015', 'step': 18794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:38.950959', 'step': 18794, 'epoch': 2}
{'type': 'loss', 'content': 0.0009431056096218526, 'timestamp': '2025-10-02 00:44:38.958038', 'step': 18795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:39.016961', 'step': 18795, 'epoch': 2}
{'type': 'loss', 'content': 0.05864614248275757, 'timestamp': '2025-10-02 00:44:39.028631', 'step': 18796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:39.089519', 'step': 18796, 'epoch': 2}
{'type': 'loss', 'content': 0.13031704723834991, 'timestamp': '2025-10-02 00:44:39.095155', 'step': 18797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:39.166699', 'step': 18797, 'epoch': 2}
{'type': 'loss', 'content': 0.02339845709502697, 'timestamp': '2025-10-02 00:44:39.170031', 'step': 18798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:39.229572', 'step': 18798, 'epoch': 2}
{'type': 'loss', 'content': 0.042948149144649506, 'timestamp': '2025-10-02 00:44:39.235407', 'step': 18799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:39.300509', 'step': 18799, 'epoch': 2}
{'type': 'loss', 'content': 0.04238375276327133, 'timestamp': '2025-10-02 00:44:39.310849', 'step': 18800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:39.374187', 'step': 18800, 'epoch': 2}
{'type': 'loss', 'content': 0.046846289187669754, 'timestamp': '2025-10-02 00:44:39.381901', 'step': 18801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:39.451753', 'step': 18801, 'epoch': 2}
{'type': 'loss', 'content': 0.034306786954402924, 'timestamp': '2025-10-02 00:44:39.458435', 'step': 18802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:39.517291', 'step': 18802, 'epoch': 2}
{'type': 'loss', 'content': 0.03298966959118843, 'timestamp': '2025-10-02 00:44:39.526783', 'step': 18803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:39.586823', 'step': 18803, 'epoch': 2}
{'type': 'loss', 'content': 0.02567017450928688, 'timestamp': '2025-10-02 00:44:39.593518', 'step': 18804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:39.648743', 'step': 18804, 'epoch': 2}
{'type': 'loss', 'content': 0.09252822399139404, 'timestamp': '2025-10-02 00:44:39.651353', 'step': 18805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:39.720607', 'step': 18805, 'epoch': 2}
{'type': 'loss', 'content': 0.021435827016830444, 'timestamp': '2025-10-02 00:44:39.730746', 'step': 18806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:39.793065', 'step': 18806, 'epoch': 2}
{'type': 'loss', 'content': 0.03846775367856026, 'timestamp': '2025-10-02 00:44:39.798594', 'step': 18807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:39.854327', 'step': 18807, 'epoch': 2}
{'type': 'loss', 'content': 0.074037104845047, 'timestamp': '2025-10-02 00:44:39.863847', 'step': 18808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:39.934901', 'step': 18808, 'epoch': 2}
{'type': 'loss', 'content': 0.06030584126710892, 'timestamp': '2025-10-02 00:44:39.944149', 'step': 18809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:40.011044', 'step': 18809, 'epoch': 2}
{'type': 'loss', 'content': 0.07354652136564255, 'timestamp': '2025-10-02 00:44:40.017026', 'step': 18810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:40.082603', 'step': 18810, 'epoch': 2}
{'type': 'loss', 'content': 0.03615633398294449, 'timestamp': '2025-10-02 00:44:40.088098', 'step': 18811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:40.163688', 'step': 18811, 'epoch': 2}
{'type': 'loss', 'content': 0.08957351744174957, 'timestamp': '2025-10-02 00:44:40.175155', 'step': 18812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:40.254045', 'step': 18812, 'epoch': 2}
{'type': 'loss', 'content': 0.049363866448402405, 'timestamp': '2025-10-02 00:44:40.261335', 'step': 18813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:40.328508', 'step': 18813, 'epoch': 2}
{'type': 'loss', 'content': 0.08611501008272171, 'timestamp': '2025-10-02 00:44:40.337150', 'step': 18814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:44:40.404361', 'step': 18814, 'epoch': 2}
{'type': 'loss', 'content': 0.023376625031232834, 'timestamp': '2025-10-02 00:44:40.414866', 'step': 18815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:40.493615', 'step': 18815, 'epoch': 2}
{'type': 'loss', 'content': 0.09625867754220963, 'timestamp': '2025-10-02 00:44:40.500634', 'step': 18816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:40.561346', 'step': 18816, 'epoch': 2}
{'type': 'loss', 'content': 0.05775390565395355, 'timestamp': '2025-10-02 00:44:40.570640', 'step': 18817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:40.639807', 'step': 18817, 'epoch': 2}
{'type': 'loss', 'content': 0.01339430920779705, 'timestamp': '2025-10-02 00:44:40.650473', 'step': 18818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:40.722396', 'step': 18818, 'epoch': 2}
{'type': 'loss', 'content': 0.0909309908747673, 'timestamp': '2025-10-02 00:44:40.728649', 'step': 18819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:40.796700', 'step': 18819, 'epoch': 2}
{'type': 'loss', 'content': 0.07995329052209854, 'timestamp': '2025-10-02 00:44:40.802760', 'step': 18820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:40.873313', 'step': 18820, 'epoch': 2}
{'type': 'loss', 'content': 0.023386798799037933, 'timestamp': '2025-10-02 00:44:40.884806', 'step': 18821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:40.958655', 'step': 18821, 'epoch': 2}
{'type': 'loss', 'content': 0.037921611219644547, 'timestamp': '2025-10-02 00:44:40.968206', 'step': 18822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:41.042470', 'step': 18822, 'epoch': 2}
{'type': 'loss', 'content': 0.11946432292461395, 'timestamp': '2025-10-02 00:44:41.048912', 'step': 18823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:41.110781', 'step': 18823, 'epoch': 2}
{'type': 'loss', 'content': 0.07155423611402512, 'timestamp': '2025-10-02 00:44:41.123810', 'step': 18824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:41.191931', 'step': 18824, 'epoch': 2}
{'type': 'loss', 'content': 0.059119272977113724, 'timestamp': '2025-10-02 00:44:41.195410', 'step': 18825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:41.267293', 'step': 18825, 'epoch': 2}
{'type': 'loss', 'content': 0.06558330357074738, 'timestamp': '2025-10-02 00:44:41.272353', 'step': 18826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:41.348445', 'step': 18826, 'epoch': 2}
{'type': 'loss', 'content': 0.0958612859249115, 'timestamp': '2025-10-02 00:44:41.353978', 'step': 18827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:41.416791', 'step': 18827, 'epoch': 2}
{'type': 'loss', 'content': 0.011799151077866554, 'timestamp': '2025-10-02 00:44:41.424353', 'step': 18828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:44:41.505498', 'step': 18828, 'epoch': 2}
{'type': 'loss', 'content': 0.0271921306848526, 'timestamp': '2025-10-02 00:44:41.519891', 'step': 18829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:41.589674', 'step': 18829, 'epoch': 2}
{'type': 'loss', 'content': 0.11688300967216492, 'timestamp': '2025-10-02 00:44:41.593590', 'step': 18830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:41.659691', 'step': 18830, 'epoch': 2}
{'type': 'loss', 'content': 0.11611678451299667, 'timestamp': '2025-10-02 00:44:41.662828', 'step': 18831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:41.734980', 'step': 18831, 'epoch': 2}
{'type': 'loss', 'content': 0.04216707870364189, 'timestamp': '2025-10-02 00:44:41.747598', 'step': 18832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:41.819032', 'step': 18832, 'epoch': 2}
{'type': 'loss', 'content': 0.026507161557674408, 'timestamp': '2025-10-02 00:44:41.827907', 'step': 18833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:41.894544', 'step': 18833, 'epoch': 2}
{'type': 'loss', 'content': 0.12551330029964447, 'timestamp': '2025-10-02 00:44:41.897924', 'step': 18834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:41.968899', 'step': 18834, 'epoch': 2}
{'type': 'loss', 'content': 0.08210115879774094, 'timestamp': '2025-10-02 00:44:41.971943', 'step': 18835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:42.034984', 'step': 18835, 'epoch': 2}
{'type': 'loss', 'content': 0.029339931905269623, 'timestamp': '2025-10-02 00:44:42.044619', 'step': 18836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:42.109443', 'step': 18836, 'epoch': 2}
{'type': 'loss', 'content': 0.05538075417280197, 'timestamp': '2025-10-02 00:44:42.118575', 'step': 18837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:42.195417', 'step': 18837, 'epoch': 2}
{'type': 'loss', 'content': 0.011490385048091412, 'timestamp': '2025-10-02 00:44:42.205638', 'step': 18838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:42.269866', 'step': 18838, 'epoch': 2}
{'type': 'loss', 'content': 0.09331183135509491, 'timestamp': '2025-10-02 00:44:42.273001', 'step': 18839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:42.331120', 'step': 18839, 'epoch': 2}
{'type': 'loss', 'content': 0.023931391537189484, 'timestamp': '2025-10-02 00:44:42.341442', 'step': 18840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:42.417835', 'step': 18840, 'epoch': 2}
{'type': 'loss', 'content': 0.038313690572977066, 'timestamp': '2025-10-02 00:44:42.428093', 'step': 18841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:42.488025', 'step': 18841, 'epoch': 2}
{'type': 'loss', 'content': 0.11413012444972992, 'timestamp': '2025-10-02 00:44:42.491908', 'step': 18842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:42.552356', 'step': 18842, 'epoch': 2}
{'type': 'loss', 'content': 0.28910204768180847, 'timestamp': '2025-10-02 00:44:42.560118', 'step': 18843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:42.626286', 'step': 18843, 'epoch': 2}
{'type': 'loss', 'content': 0.0602135956287384, 'timestamp': '2025-10-02 00:44:42.637560', 'step': 18844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:42.701418', 'step': 18844, 'epoch': 2}
{'type': 'loss', 'content': 0.009803472086787224, 'timestamp': '2025-10-02 00:44:42.712972', 'step': 18845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:42.779880', 'step': 18845, 'epoch': 2}
{'type': 'loss', 'content': 0.12229081243276596, 'timestamp': '2025-10-02 00:44:42.783232', 'step': 18846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:42.853057', 'step': 18846, 'epoch': 2}
{'type': 'loss', 'content': 0.027218829840421677, 'timestamp': '2025-10-02 00:44:42.861794', 'step': 18847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:42.920916', 'step': 18847, 'epoch': 2}
{'type': 'loss', 'content': 0.052842192351818085, 'timestamp': '2025-10-02 00:44:42.927977', 'step': 18848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:42.999639', 'step': 18848, 'epoch': 2}
{'type': 'loss', 'content': 0.05748371779918671, 'timestamp': '2025-10-02 00:44:43.008813', 'step': 18849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:43.078055', 'step': 18849, 'epoch': 2}
{'type': 'loss', 'content': 0.12867575883865356, 'timestamp': '2025-10-02 00:44:43.081704', 'step': 18850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:43.190036', 'step': 18850, 'epoch': 2}
{'type': 'loss', 'content': 0.07338132709264755, 'timestamp': '2025-10-02 00:44:43.201857', 'step': 18851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:44:43.281509', 'step': 18851, 'epoch': 2}
{'type': 'loss', 'content': 0.04277703911066055, 'timestamp': '2025-10-02 00:44:43.293092', 'step': 18852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:43.360583', 'step': 18852, 'epoch': 2}
{'type': 'loss', 'content': 0.03431099280714989, 'timestamp': '2025-10-02 00:44:43.369740', 'step': 18853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:43.429845', 'step': 18853, 'epoch': 2}
{'type': 'loss', 'content': 0.08241955190896988, 'timestamp': '2025-10-02 00:44:43.440365', 'step': 18854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:43.515265', 'step': 18854, 'epoch': 2}
{'type': 'loss', 'content': 0.03891798108816147, 'timestamp': '2025-10-02 00:44:43.518955', 'step': 18855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:43.586159', 'step': 18855, 'epoch': 2}
{'type': 'loss', 'content': 0.07457155734300613, 'timestamp': '2025-10-02 00:44:43.593853', 'step': 18856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:43.662298', 'step': 18856, 'epoch': 2}
{'type': 'loss', 'content': 0.01660262979567051, 'timestamp': '2025-10-02 00:44:43.671397', 'step': 18857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:44:43.737310', 'step': 18857, 'epoch': 2}
{'type': 'loss', 'content': 0.033905789256095886, 'timestamp': '2025-10-02 00:44:43.747786', 'step': 18858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:43.827525', 'step': 18858, 'epoch': 2}
{'type': 'loss', 'content': 0.03673365339636803, 'timestamp': '2025-10-02 00:44:43.841858', 'step': 18859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:43.904930', 'step': 18859, 'epoch': 2}
{'type': 'loss', 'content': 0.17431601881980896, 'timestamp': '2025-10-02 00:44:43.912327', 'step': 18860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:43.970545', 'step': 18860, 'epoch': 2}
{'type': 'loss', 'content': 0.05478794127702713, 'timestamp': '2025-10-02 00:44:43.979619', 'step': 18861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:44.050889', 'step': 18861, 'epoch': 2}
{'type': 'loss', 'content': 0.013635399751365185, 'timestamp': '2025-10-02 00:44:44.053642', 'step': 18862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:44.113564', 'step': 18862, 'epoch': 2}
{'type': 'loss', 'content': 0.1463645100593567, 'timestamp': '2025-10-02 00:44:44.117314', 'step': 18863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:44.192632', 'step': 18863, 'epoch': 2}
{'type': 'loss', 'content': 0.04087820649147034, 'timestamp': '2025-10-02 00:44:44.203528', 'step': 18864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:44.273363', 'step': 18864, 'epoch': 2}
{'type': 'loss', 'content': 0.08483636379241943, 'timestamp': '2025-10-02 00:44:44.277092', 'step': 18865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:44.344904', 'step': 18865, 'epoch': 2}
{'type': 'loss', 'content': 0.09066351503133774, 'timestamp': '2025-10-02 00:44:44.347897', 'step': 18866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:44.413935', 'step': 18866, 'epoch': 2}
{'type': 'loss', 'content': 0.05420386791229248, 'timestamp': '2025-10-02 00:44:44.417388', 'step': 18867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:44.478614', 'step': 18867, 'epoch': 2}
{'type': 'loss', 'content': 0.14583636820316315, 'timestamp': '2025-10-02 00:44:44.485710', 'step': 18868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:44.558658', 'step': 18868, 'epoch': 2}
{'type': 'loss', 'content': 0.14109380543231964, 'timestamp': '2025-10-02 00:44:44.568947', 'step': 18869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:44.638574', 'step': 18869, 'epoch': 2}
{'type': 'loss', 'content': 0.055198028683662415, 'timestamp': '2025-10-02 00:44:44.643457', 'step': 18870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:44.724589', 'step': 18870, 'epoch': 2}
{'type': 'loss', 'content': 0.01670079678297043, 'timestamp': '2025-10-02 00:44:44.727826', 'step': 18871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:44.787159', 'step': 18871, 'epoch': 2}
{'type': 'loss', 'content': 0.04743320122361183, 'timestamp': '2025-10-02 00:44:44.793839', 'step': 18872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:44.863745', 'step': 18872, 'epoch': 2}
{'type': 'loss', 'content': 0.05533662065863609, 'timestamp': '2025-10-02 00:44:44.869161', 'step': 18873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:44.937482', 'step': 18873, 'epoch': 2}
{'type': 'loss', 'content': 0.031620629131793976, 'timestamp': '2025-10-02 00:44:44.948318', 'step': 18874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:45.024945', 'step': 18874, 'epoch': 2}
{'type': 'loss', 'content': 0.09831708669662476, 'timestamp': '2025-10-02 00:44:45.028685', 'step': 18875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:45.087695', 'step': 18875, 'epoch': 2}
{'type': 'loss', 'content': 0.025658536702394485, 'timestamp': '2025-10-02 00:44:45.095312', 'step': 18876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:45.156939', 'step': 18876, 'epoch': 2}
{'type': 'loss', 'content': 0.0016077602049335837, 'timestamp': '2025-10-02 00:44:45.168632', 'step': 18877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:45.226623', 'step': 18877, 'epoch': 2}
{'type': 'loss', 'content': 0.04941880330443382, 'timestamp': '2025-10-02 00:44:45.229566', 'step': 18878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:45.305057', 'step': 18878, 'epoch': 2}
{'type': 'loss', 'content': 0.12709404528141022, 'timestamp': '2025-10-02 00:44:45.307927', 'step': 18879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:45.385565', 'step': 18879, 'epoch': 2}
{'type': 'loss', 'content': 0.005537237972021103, 'timestamp': '2025-10-02 00:44:45.396531', 'step': 18880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:45.462654', 'step': 18880, 'epoch': 2}
{'type': 'loss', 'content': 0.09475395828485489, 'timestamp': '2025-10-02 00:44:45.465407', 'step': 18881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:45.522509', 'step': 18881, 'epoch': 2}
{'type': 'loss', 'content': 0.032694704830646515, 'timestamp': '2025-10-02 00:44:45.531648', 'step': 18882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:45.599844', 'step': 18882, 'epoch': 2}
{'type': 'loss', 'content': 0.004313872195780277, 'timestamp': '2025-10-02 00:44:45.610042', 'step': 18883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:45.674675', 'step': 18883, 'epoch': 2}
{'type': 'loss', 'content': 0.034012697637081146, 'timestamp': '2025-10-02 00:44:45.686041', 'step': 18884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:45.745340', 'step': 18884, 'epoch': 2}
{'type': 'loss', 'content': 0.09404092282056808, 'timestamp': '2025-10-02 00:44:45.748674', 'step': 18885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:45.805690', 'step': 18885, 'epoch': 2}
{'type': 'loss', 'content': 0.062096256762742996, 'timestamp': '2025-10-02 00:44:45.811407', 'step': 18886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:45.867965', 'step': 18886, 'epoch': 2}
{'type': 'loss', 'content': 0.008466470055282116, 'timestamp': '2025-10-02 00:44:45.873560', 'step': 18887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:45.935230', 'step': 18887, 'epoch': 2}
{'type': 'loss', 'content': 0.032058943063020706, 'timestamp': '2025-10-02 00:44:45.945194', 'step': 18888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:46.008929', 'step': 18888, 'epoch': 2}
{'type': 'loss', 'content': 0.01827928237617016, 'timestamp': '2025-10-02 00:44:46.016018', 'step': 18889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:46.081332', 'step': 18889, 'epoch': 2}
{'type': 'loss', 'content': 0.11253949254751205, 'timestamp': '2025-10-02 00:44:46.084303', 'step': 18890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:46.150792', 'step': 18890, 'epoch': 2}
{'type': 'loss', 'content': 0.018145455047488213, 'timestamp': '2025-10-02 00:44:46.157885', 'step': 18891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:46.222779', 'step': 18891, 'epoch': 2}
{'type': 'loss', 'content': 0.014958294108510017, 'timestamp': '2025-10-02 00:44:46.229151', 'step': 18892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:46.284056', 'step': 18892, 'epoch': 2}
{'type': 'loss', 'content': 0.05796036124229431, 'timestamp': '2025-10-02 00:44:46.287817', 'step': 18893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:46.352645', 'step': 18893, 'epoch': 2}
{'type': 'loss', 'content': 0.0660797581076622, 'timestamp': '2025-10-02 00:44:46.362847', 'step': 18894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:46.426274', 'step': 18894, 'epoch': 2}
{'type': 'loss', 'content': 0.04799709469079971, 'timestamp': '2025-10-02 00:44:46.431828', 'step': 18895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:46.488831', 'step': 18895, 'epoch': 2}
{'type': 'loss', 'content': 0.11246656626462936, 'timestamp': '2025-10-02 00:44:46.495159', 'step': 18896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:46.560071', 'step': 18896, 'epoch': 2}
{'type': 'loss', 'content': 0.055365853011608124, 'timestamp': '2025-10-02 00:44:46.566893', 'step': 18897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:46.629193', 'step': 18897, 'epoch': 2}
{'type': 'loss', 'content': 0.11685841530561447, 'timestamp': '2025-10-02 00:44:46.632955', 'step': 18898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:46.691231', 'step': 18898, 'epoch': 2}
{'type': 'loss', 'content': 0.09247735142707825, 'timestamp': '2025-10-02 00:44:46.695220', 'step': 18899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:46.761506', 'step': 18899, 'epoch': 2}
{'type': 'loss', 'content': 0.046090226620435715, 'timestamp': '2025-10-02 00:44:46.768329', 'step': 18900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:46.843380', 'step': 18900, 'epoch': 2}
{'type': 'loss', 'content': 0.05346310883760452, 'timestamp': '2025-10-02 00:44:46.847027', 'step': 18901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:46.909999', 'step': 18901, 'epoch': 2}
{'type': 'loss', 'content': 0.029184440150856972, 'timestamp': '2025-10-02 00:44:46.913420', 'step': 18902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:46.980211', 'step': 18902, 'epoch': 2}
{'type': 'loss', 'content': 0.08859703689813614, 'timestamp': '2025-10-02 00:44:46.983313', 'step': 18903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:47.045919', 'step': 18903, 'epoch': 2}
{'type': 'loss', 'content': 0.03948698937892914, 'timestamp': '2025-10-02 00:44:47.053204', 'step': 18904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:47.126045', 'step': 18904, 'epoch': 2}
{'type': 'loss', 'content': 0.029591068625450134, 'timestamp': '2025-10-02 00:44:47.132805', 'step': 18905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:47.190522', 'step': 18905, 'epoch': 2}
{'type': 'loss', 'content': 0.03290138393640518, 'timestamp': '2025-10-02 00:44:47.193489', 'step': 18906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:47.254429', 'step': 18906, 'epoch': 2}
{'type': 'loss', 'content': 0.06010054796934128, 'timestamp': '2025-10-02 00:44:47.261256', 'step': 18907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:47.317322', 'step': 18907, 'epoch': 2}
{'type': 'loss', 'content': 0.053626518696546555, 'timestamp': '2025-10-02 00:44:47.324094', 'step': 18908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:47.389637', 'step': 18908, 'epoch': 2}
{'type': 'loss', 'content': 0.0609547458589077, 'timestamp': '2025-10-02 00:44:47.392245', 'step': 18909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:47.451373', 'step': 18909, 'epoch': 2}
{'type': 'loss', 'content': 0.06638570129871368, 'timestamp': '2025-10-02 00:44:47.454282', 'step': 18910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:47.524892', 'step': 18910, 'epoch': 2}
{'type': 'loss', 'content': 0.03749232366681099, 'timestamp': '2025-10-02 00:44:47.529344', 'step': 18911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:47.587293', 'step': 18911, 'epoch': 2}
{'type': 'loss', 'content': 0.11127844452857971, 'timestamp': '2025-10-02 00:44:47.594581', 'step': 18912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:47.662537', 'step': 18912, 'epoch': 2}
{'type': 'loss', 'content': 0.06700826436281204, 'timestamp': '2025-10-02 00:44:47.665574', 'step': 18913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:47.731615', 'step': 18913, 'epoch': 2}
{'type': 'loss', 'content': 0.019181054085493088, 'timestamp': '2025-10-02 00:44:47.735154', 'step': 18914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:47.806212', 'step': 18914, 'epoch': 2}
{'type': 'loss', 'content': 0.04647871106863022, 'timestamp': '2025-10-02 00:44:47.812910', 'step': 18915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:47.870951', 'step': 18915, 'epoch': 2}
{'type': 'loss', 'content': 0.040521811693906784, 'timestamp': '2025-10-02 00:44:47.877737', 'step': 18916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:47.933965', 'step': 18916, 'epoch': 2}
{'type': 'loss', 'content': 0.0759177953004837, 'timestamp': '2025-10-02 00:44:47.939676', 'step': 18917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:48.009651', 'step': 18917, 'epoch': 2}
{'type': 'loss', 'content': 0.17036882042884827, 'timestamp': '2025-10-02 00:44:48.015886', 'step': 18918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:48.078096', 'step': 18918, 'epoch': 2}
{'type': 'loss', 'content': 0.027860229834914207, 'timestamp': '2025-10-02 00:44:48.087627', 'step': 18919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:48.154718', 'step': 18919, 'epoch': 2}
{'type': 'loss', 'content': 0.05446847900748253, 'timestamp': '2025-10-02 00:44:48.161328', 'step': 18920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:48.223597', 'step': 18920, 'epoch': 2}
{'type': 'loss', 'content': 0.0015351936453953385, 'timestamp': '2025-10-02 00:44:48.230893', 'step': 18921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:48.293163', 'step': 18921, 'epoch': 2}
{'type': 'loss', 'content': 0.07948639988899231, 'timestamp': '2025-10-02 00:44:48.300248', 'step': 18922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:48.373630', 'step': 18922, 'epoch': 2}
{'type': 'loss', 'content': 0.09951470792293549, 'timestamp': '2025-10-02 00:44:48.376818', 'step': 18923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:48.445278', 'step': 18923, 'epoch': 2}
{'type': 'loss', 'content': 0.09405706077814102, 'timestamp': '2025-10-02 00:44:48.456531', 'step': 18924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:48.512416', 'step': 18924, 'epoch': 2}
{'type': 'loss', 'content': 0.0394713431596756, 'timestamp': '2025-10-02 00:44:48.515411', 'step': 18925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:48.575159', 'step': 18925, 'epoch': 2}
{'type': 'loss', 'content': 0.025123775005340576, 'timestamp': '2025-10-02 00:44:48.577484', 'step': 18926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 00:44:48.659219', 'step': 18926, 'epoch': 2}
{'type': 'loss', 'content': 0.03814081847667694, 'timestamp': '2025-10-02 00:44:48.673946', 'step': 18927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:48.730430', 'step': 18927, 'epoch': 2}
{'type': 'loss', 'content': 0.009617593139410019, 'timestamp': '2025-10-02 00:44:48.736668', 'step': 18928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:48.791474', 'step': 18928, 'epoch': 2}
{'type': 'loss', 'content': 0.043374817818403244, 'timestamp': '2025-10-02 00:44:48.798817', 'step': 18929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:48.853822', 'step': 18929, 'epoch': 2}
{'type': 'loss', 'content': 0.0655459463596344, 'timestamp': '2025-10-02 00:44:48.863102', 'step': 18930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:48.918345', 'step': 18930, 'epoch': 2}
{'type': 'loss', 'content': 0.058867063373327255, 'timestamp': '2025-10-02 00:44:48.920498', 'step': 18931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:48.982888', 'step': 18931, 'epoch': 2}
{'type': 'loss', 'content': 0.031778570264577866, 'timestamp': '2025-10-02 00:44:48.994301', 'step': 18932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:49.049920', 'step': 18932, 'epoch': 2}
{'type': 'loss', 'content': 0.02524416334927082, 'timestamp': '2025-10-02 00:44:49.057039', 'step': 18933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:49.112520', 'step': 18933, 'epoch': 2}
{'type': 'loss', 'content': 0.08993373066186905, 'timestamp': '2025-10-02 00:44:49.117981', 'step': 18934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:49.174628', 'step': 18934, 'epoch': 2}
{'type': 'loss', 'content': 0.06438920646905899, 'timestamp': '2025-10-02 00:44:49.181538', 'step': 18935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:44:49.252272', 'step': 18935, 'epoch': 2}
{'type': 'loss', 'content': 0.039431024342775345, 'timestamp': '2025-10-02 00:44:49.265487', 'step': 18936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:49.324823', 'step': 18936, 'epoch': 2}
{'type': 'loss', 'content': 0.02169186994433403, 'timestamp': '2025-10-02 00:44:49.326985', 'step': 18937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:49.382927', 'step': 18937, 'epoch': 2}
{'type': 'loss', 'content': 0.07836972177028656, 'timestamp': '2025-10-02 00:44:49.388432', 'step': 18938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:49.444896', 'step': 18938, 'epoch': 2}
{'type': 'loss', 'content': 0.09058863669633865, 'timestamp': '2025-10-02 00:44:49.447248', 'step': 18939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:44:49.517411', 'step': 18939, 'epoch': 2}
{'type': 'loss', 'content': 0.026255987584590912, 'timestamp': '2025-10-02 00:44:49.530517', 'step': 18940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:44:49.592488', 'step': 18940, 'epoch': 2}
{'type': 'loss', 'content': 0.0737786740064621, 'timestamp': '2025-10-02 00:44:49.604246', 'step': 18941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:49.659823', 'step': 18941, 'epoch': 2}
{'type': 'loss', 'content': 0.029324080795049667, 'timestamp': '2025-10-02 00:44:49.662393', 'step': 18942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:49.717088', 'step': 18942, 'epoch': 2}
{'type': 'loss', 'content': 0.08195042610168457, 'timestamp': '2025-10-02 00:44:49.719568', 'step': 18943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:49.774622', 'step': 18943, 'epoch': 2}
{'type': 'loss', 'content': 0.07693026959896088, 'timestamp': '2025-10-02 00:44:49.781181', 'step': 18944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:49.835850', 'step': 18944, 'epoch': 2}
{'type': 'loss', 'content': 0.11866666376590729, 'timestamp': '2025-10-02 00:44:49.838136', 'step': 18945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:44:49.899861', 'step': 18945, 'epoch': 2}
{'type': 'loss', 'content': 0.10384161025285721, 'timestamp': '2025-10-02 00:44:49.902187', 'step': 18946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:49.959108', 'step': 18946, 'epoch': 2}
{'type': 'loss', 'content': 0.10113852471113205, 'timestamp': '2025-10-02 00:44:49.962144', 'step': 18947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:50.017200', 'step': 18947, 'epoch': 2}
{'type': 'loss', 'content': 0.05405784770846367, 'timestamp': '2025-10-02 00:44:50.023214', 'step': 18948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:50.078660', 'step': 18948, 'epoch': 2}
{'type': 'loss', 'content': 0.03565178066492081, 'timestamp': '2025-10-02 00:44:50.084107', 'step': 18949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:50.139747', 'step': 18949, 'epoch': 2}
{'type': 'loss', 'content': 0.1399177610874176, 'timestamp': '2025-10-02 00:44:50.142352', 'step': 18950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:50.197742', 'step': 18950, 'epoch': 2}
{'type': 'loss', 'content': 0.09059840440750122, 'timestamp': '2025-10-02 00:44:50.200689', 'step': 18951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:50.258097', 'step': 18951, 'epoch': 2}
{'type': 'loss', 'content': 0.047764912247657776, 'timestamp': '2025-10-02 00:44:50.264345', 'step': 18952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:50.318625', 'step': 18952, 'epoch': 2}
{'type': 'loss', 'content': 0.049495939165353775, 'timestamp': '2025-10-02 00:44:50.323972', 'step': 18953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:50.380012', 'step': 18953, 'epoch': 2}
{'type': 'loss', 'content': 0.03740774467587471, 'timestamp': '2025-10-02 00:44:50.382252', 'step': 18954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:50.438091', 'step': 18954, 'epoch': 2}
{'type': 'loss', 'content': 0.1622101366519928, 'timestamp': '2025-10-02 00:44:50.440890', 'step': 18955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:50.496696', 'step': 18955, 'epoch': 2}
{'type': 'loss', 'content': 0.032577045261859894, 'timestamp': '2025-10-02 00:44:50.504533', 'step': 18956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:50.558754', 'step': 18956, 'epoch': 2}
{'type': 'loss', 'content': 0.08877816051244736, 'timestamp': '2025-10-02 00:44:50.560951', 'step': 18957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:50.615442', 'step': 18957, 'epoch': 2}
{'type': 'loss', 'content': 0.11262738704681396, 'timestamp': '2025-10-02 00:44:50.617234', 'step': 18958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:50.672533', 'step': 18958, 'epoch': 2}
{'type': 'loss', 'content': 0.05595950037240982, 'timestamp': '2025-10-02 00:44:50.674769', 'step': 18959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:50.730017', 'step': 18959, 'epoch': 2}
{'type': 'loss', 'content': 0.028411082923412323, 'timestamp': '2025-10-02 00:44:50.737825', 'step': 18960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:50.793422', 'step': 18960, 'epoch': 2}
{'type': 'loss', 'content': 0.06781560182571411, 'timestamp': '2025-10-02 00:44:50.795755', 'step': 18961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:50.851429', 'step': 18961, 'epoch': 2}
{'type': 'loss', 'content': 0.018184250220656395, 'timestamp': '2025-10-02 00:44:50.853646', 'step': 18962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:50.909251', 'step': 18962, 'epoch': 2}
{'type': 'loss', 'content': 0.2639317810535431, 'timestamp': '2025-10-02 00:44:50.911117', 'step': 18963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:50.965597', 'step': 18963, 'epoch': 2}
{'type': 'loss', 'content': 0.042634859681129456, 'timestamp': '2025-10-02 00:44:50.971506', 'step': 18964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:51.026498', 'step': 18964, 'epoch': 2}
{'type': 'loss', 'content': 0.037303999066352844, 'timestamp': '2025-10-02 00:44:51.031850', 'step': 18965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:51.088548', 'step': 18965, 'epoch': 2}
{'type': 'loss', 'content': 0.0635145753622055, 'timestamp': '2025-10-02 00:44:51.091613', 'step': 18966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:51.148783', 'step': 18966, 'epoch': 2}
{'type': 'loss', 'content': 0.154672309756279, 'timestamp': '2025-10-02 00:44:51.151535', 'step': 18967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:51.207360', 'step': 18967, 'epoch': 2}
{'type': 'loss', 'content': 0.10288076102733612, 'timestamp': '2025-10-02 00:44:51.213322', 'step': 18968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:51.267657', 'step': 18968, 'epoch': 2}
{'type': 'loss', 'content': 0.014654711820185184, 'timestamp': '2025-10-02 00:44:51.277907', 'step': 18969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:51.334537', 'step': 18969, 'epoch': 2}
{'type': 'loss', 'content': 0.006543830037117004, 'timestamp': '2025-10-02 00:44:51.343920', 'step': 18970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:51.399431', 'step': 18970, 'epoch': 2}
{'type': 'loss', 'content': 0.050264086574316025, 'timestamp': '2025-10-02 00:44:51.402084', 'step': 18971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:51.457518', 'step': 18971, 'epoch': 2}
{'type': 'loss', 'content': 0.07465068995952606, 'timestamp': '2025-10-02 00:44:51.463411', 'step': 18972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:51.518095', 'step': 18972, 'epoch': 2}
{'type': 'loss', 'content': 0.031438663601875305, 'timestamp': '2025-10-02 00:44:51.527399', 'step': 18973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:51.582285', 'step': 18973, 'epoch': 2}
{'type': 'loss', 'content': 0.017224274575710297, 'timestamp': '2025-10-02 00:44:51.584568', 'step': 18974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:51.641461', 'step': 18974, 'epoch': 2}
{'type': 'loss', 'content': 0.04332662746310234, 'timestamp': '2025-10-02 00:44:51.643820', 'step': 18975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:51.698321', 'step': 18975, 'epoch': 2}
{'type': 'loss', 'content': 0.09697854518890381, 'timestamp': '2025-10-02 00:44:51.704273', 'step': 18976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:51.760331', 'step': 18976, 'epoch': 2}
{'type': 'loss', 'content': 0.030863992869853973, 'timestamp': '2025-10-02 00:44:51.762872', 'step': 18977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:51.818717', 'step': 18977, 'epoch': 2}
{'type': 'loss', 'content': 0.11601127684116364, 'timestamp': '2025-10-02 00:44:51.820931', 'step': 18978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:51.880942', 'step': 18978, 'epoch': 2}
{'type': 'loss', 'content': 0.004307345021516085, 'timestamp': '2025-10-02 00:44:51.890512', 'step': 18979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:51.956709', 'step': 18979, 'epoch': 2}
{'type': 'loss', 'content': 0.07674448937177658, 'timestamp': '2025-10-02 00:44:51.965398', 'step': 18980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:52.022470', 'step': 18980, 'epoch': 2}
{'type': 'loss', 'content': 0.05953969061374664, 'timestamp': '2025-10-02 00:44:52.024722', 'step': 18981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:52.079510', 'step': 18981, 'epoch': 2}
{'type': 'loss', 'content': 0.04986412823200226, 'timestamp': '2025-10-02 00:44:52.082150', 'step': 18982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:52.137748', 'step': 18982, 'epoch': 2}
{'type': 'loss', 'content': 0.08118286728858948, 'timestamp': '2025-10-02 00:44:52.142138', 'step': 18983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:52.198025', 'step': 18983, 'epoch': 2}
{'type': 'loss', 'content': 0.09106173366308212, 'timestamp': '2025-10-02 00:44:52.204548', 'step': 18984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:52.258883', 'step': 18984, 'epoch': 2}
{'type': 'loss', 'content': 0.07536935061216354, 'timestamp': '2025-10-02 00:44:52.261335', 'step': 18985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:52.318770', 'step': 18985, 'epoch': 2}
{'type': 'loss', 'content': 0.03517036512494087, 'timestamp': '2025-10-02 00:44:52.328291', 'step': 18986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:52.384715', 'step': 18986, 'epoch': 2}
{'type': 'loss', 'content': 0.026060519739985466, 'timestamp': '2025-10-02 00:44:52.394266', 'step': 18987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:52.461250', 'step': 18987, 'epoch': 2}
{'type': 'loss', 'content': 0.06383184343576431, 'timestamp': '2025-10-02 00:44:52.467211', 'step': 18988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:52.521440', 'step': 18988, 'epoch': 2}
{'type': 'loss', 'content': 0.06868186593055725, 'timestamp': '2025-10-02 00:44:52.524435', 'step': 18989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:52.589809', 'step': 18989, 'epoch': 2}
{'type': 'loss', 'content': 0.03942388668656349, 'timestamp': '2025-10-02 00:44:52.599051', 'step': 18990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:52.654961', 'step': 18990, 'epoch': 2}
{'type': 'loss', 'content': 0.07987849414348602, 'timestamp': '2025-10-02 00:44:52.657621', 'step': 18991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:52.713560', 'step': 18991, 'epoch': 2}
{'type': 'loss', 'content': 0.07363925129175186, 'timestamp': '2025-10-02 00:44:52.718913', 'step': 18992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:52.772857', 'step': 18992, 'epoch': 2}
{'type': 'loss', 'content': 0.18552643060684204, 'timestamp': '2025-10-02 00:44:52.775298', 'step': 18993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:52.830927', 'step': 18993, 'epoch': 2}
{'type': 'loss', 'content': 0.03258848935365677, 'timestamp': '2025-10-02 00:44:52.833316', 'step': 18994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:52.888676', 'step': 18994, 'epoch': 2}
{'type': 'loss', 'content': 0.03468402847647667, 'timestamp': '2025-10-02 00:44:52.894111', 'step': 18995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:52.951135', 'step': 18995, 'epoch': 2}
{'type': 'loss', 'content': 0.20815402269363403, 'timestamp': '2025-10-02 00:44:52.962051', 'step': 18996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:53.022260', 'step': 18996, 'epoch': 2}
{'type': 'loss', 'content': 0.11394471675157547, 'timestamp': '2025-10-02 00:44:53.024358', 'step': 18997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:53.079091', 'step': 18997, 'epoch': 2}
{'type': 'loss', 'content': 0.13924579322338104, 'timestamp': '2025-10-02 00:44:53.081626', 'step': 18998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:53.137523', 'step': 18998, 'epoch': 2}
{'type': 'loss', 'content': 0.07250626385211945, 'timestamp': '2025-10-02 00:44:53.139972', 'step': 18999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:53.194608', 'step': 18999, 'epoch': 2}
{'type': 'loss', 'content': 0.06451600044965744, 'timestamp': '2025-10-02 00:44:53.200640', 'step': 19000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 19000', 'timestamp': '2025-10-02 00:44:53.609034', 'step': 19000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:53.668499', 'step': 19000, 'epoch': 2}
{'type': 'loss', 'content': 0.0025661673862487078, 'timestamp': '2025-10-02 00:44:53.670733', 'step': 19001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:53.726354', 'step': 19001, 'epoch': 2}
{'type': 'loss', 'content': 0.0431445874273777, 'timestamp': '2025-10-02 00:44:53.728710', 'step': 19002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:53.784402', 'step': 19002, 'epoch': 2}
{'type': 'loss', 'content': 0.1812954694032669, 'timestamp': '2025-10-02 00:44:53.786594', 'step': 19003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:53.841731', 'step': 19003, 'epoch': 2}
{'type': 'loss', 'content': 0.01984589174389839, 'timestamp': '2025-10-02 00:44:53.851024', 'step': 19004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:53.904901', 'step': 19004, 'epoch': 2}
{'type': 'loss', 'content': 0.08162358403205872, 'timestamp': '2025-10-02 00:44:53.907680', 'step': 19005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:53.962861', 'step': 19005, 'epoch': 2}
{'type': 'loss', 'content': 0.018868843093514442, 'timestamp': '2025-10-02 00:44:53.965343', 'step': 19006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:44:54.027991', 'step': 19006, 'epoch': 2}
{'type': 'loss', 'content': 0.032716117799282074, 'timestamp': '2025-10-02 00:44:54.038778', 'step': 19007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:54.093432', 'step': 19007, 'epoch': 2}
{'type': 'loss', 'content': 0.07059487700462341, 'timestamp': '2025-10-02 00:44:54.099443', 'step': 19008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:54.154608', 'step': 19008, 'epoch': 2}
{'type': 'loss', 'content': 0.01032311376184225, 'timestamp': '2025-10-02 00:44:54.156672', 'step': 19009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:54.212218', 'step': 19009, 'epoch': 2}
{'type': 'loss', 'content': 0.020110180601477623, 'timestamp': '2025-10-02 00:44:54.214715', 'step': 19010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:54.269769', 'step': 19010, 'epoch': 2}
{'type': 'loss', 'content': 0.1127285584807396, 'timestamp': '2025-10-02 00:44:54.271976', 'step': 19011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:54.328168', 'step': 19011, 'epoch': 2}
{'type': 'loss', 'content': 0.026354430243372917, 'timestamp': '2025-10-02 00:44:54.334371', 'step': 19012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:54.389658', 'step': 19012, 'epoch': 2}
{'type': 'loss', 'content': 0.03061918169260025, 'timestamp': '2025-10-02 00:44:54.392356', 'step': 19013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:54.447772', 'step': 19013, 'epoch': 2}
{'type': 'loss', 'content': 0.13918839395046234, 'timestamp': '2025-10-02 00:44:54.450223', 'step': 19014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:54.506309', 'step': 19014, 'epoch': 2}
{'type': 'loss', 'content': 0.05102866515517235, 'timestamp': '2025-10-02 00:44:54.513259', 'step': 19015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:54.568193', 'step': 19015, 'epoch': 2}
{'type': 'loss', 'content': 0.12125980854034424, 'timestamp': '2025-10-02 00:44:54.574163', 'step': 19016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:44:54.628830', 'step': 19016, 'epoch': 2}
{'type': 'loss', 'content': 0.039249710738658905, 'timestamp': '2025-10-02 00:44:54.631839', 'step': 19017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:54.687234', 'step': 19017, 'epoch': 2}
{'type': 'loss', 'content': 0.049723364412784576, 'timestamp': '2025-10-02 00:44:54.689720', 'step': 19018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:54.749665', 'step': 19018, 'epoch': 2}
{'type': 'loss', 'content': 0.07735409587621689, 'timestamp': '2025-10-02 00:44:54.759772', 'step': 19019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:54.815456', 'step': 19019, 'epoch': 2}
{'type': 'loss', 'content': 0.01469576545059681, 'timestamp': '2025-10-02 00:44:54.822054', 'step': 19020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:54.877064', 'step': 19020, 'epoch': 2}
{'type': 'loss', 'content': 0.04722875356674194, 'timestamp': '2025-10-02 00:44:54.879389', 'step': 19021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:44:54.941125', 'step': 19021, 'epoch': 2}
{'type': 'loss', 'content': 0.013604626059532166, 'timestamp': '2025-10-02 00:44:54.951588', 'step': 19022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:55.007995', 'step': 19022, 'epoch': 2}
{'type': 'loss', 'content': 0.05327686294913292, 'timestamp': '2025-10-02 00:44:55.010307', 'step': 19023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:44:55.084994', 'step': 19023, 'epoch': 2}
{'type': 'loss', 'content': 0.043468330055475235, 'timestamp': '2025-10-02 00:44:55.099194', 'step': 19024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:44:55.167603', 'step': 19024, 'epoch': 2}
{'type': 'loss', 'content': 0.02875416912138462, 'timestamp': '2025-10-02 00:44:55.180974', 'step': 19025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:55.238105', 'step': 19025, 'epoch': 2}
{'type': 'loss', 'content': 0.07058116793632507, 'timestamp': '2025-10-02 00:44:55.240688', 'step': 19026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:55.295846', 'step': 19026, 'epoch': 2}
{'type': 'loss', 'content': 0.13375014066696167, 'timestamp': '2025-10-02 00:44:55.297994', 'step': 19027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:55.353088', 'step': 19027, 'epoch': 2}
{'type': 'loss', 'content': 0.04874338582158089, 'timestamp': '2025-10-02 00:44:55.358933', 'step': 19028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:55.416554', 'step': 19028, 'epoch': 2}
{'type': 'loss', 'content': 0.13663408160209656, 'timestamp': '2025-10-02 00:44:55.418290', 'step': 19029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:55.472914', 'step': 19029, 'epoch': 2}
{'type': 'loss', 'content': 0.12284176051616669, 'timestamp': '2025-10-02 00:44:55.475218', 'step': 19030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:55.529501', 'step': 19030, 'epoch': 2}
{'type': 'loss', 'content': 0.18062102794647217, 'timestamp': '2025-10-02 00:44:55.531596', 'step': 19031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:44:55.604624', 'step': 19031, 'epoch': 2}
{'type': 'loss', 'content': 0.007093237712979317, 'timestamp': '2025-10-02 00:44:55.617701', 'step': 19032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:55.680075', 'step': 19032, 'epoch': 2}
{'type': 'loss', 'content': 0.05336557328701019, 'timestamp': '2025-10-02 00:44:55.685638', 'step': 19033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:55.742842', 'step': 19033, 'epoch': 2}
{'type': 'loss', 'content': 0.09382349252700806, 'timestamp': '2025-10-02 00:44:55.746739', 'step': 19034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:55.805516', 'step': 19034, 'epoch': 2}
{'type': 'loss', 'content': 0.044667914509773254, 'timestamp': '2025-10-02 00:44:55.812657', 'step': 19035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:55.874082', 'step': 19035, 'epoch': 2}
{'type': 'loss', 'content': 0.06718329340219498, 'timestamp': '2025-10-02 00:44:55.885002', 'step': 19036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:55.946083', 'step': 19036, 'epoch': 2}
{'type': 'loss', 'content': 0.008475085720419884, 'timestamp': '2025-10-02 00:44:55.951513', 'step': 19037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:56.007380', 'step': 19037, 'epoch': 2}
{'type': 'loss', 'content': 0.03366044536232948, 'timestamp': '2025-10-02 00:44:56.016697', 'step': 19038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:44:56.085338', 'step': 19038, 'epoch': 2}
{'type': 'loss', 'content': 0.0423072949051857, 'timestamp': '2025-10-02 00:44:56.095735', 'step': 19039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:56.158781', 'step': 19039, 'epoch': 2}
{'type': 'loss', 'content': 0.07881585508584976, 'timestamp': '2025-10-02 00:44:56.165380', 'step': 19040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:56.221717', 'step': 19040, 'epoch': 2}
{'type': 'loss', 'content': 0.08188892900943756, 'timestamp': '2025-10-02 00:44:56.224870', 'step': 19041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:44:56.289952', 'step': 19041, 'epoch': 2}
{'type': 'loss', 'content': 0.025825761258602142, 'timestamp': '2025-10-02 00:44:56.300375', 'step': 19042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:56.359379', 'step': 19042, 'epoch': 2}
{'type': 'loss', 'content': 0.10746334493160248, 'timestamp': '2025-10-02 00:44:56.364576', 'step': 19043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:56.421189', 'step': 19043, 'epoch': 2}
{'type': 'loss', 'content': 0.08188963681459427, 'timestamp': '2025-10-02 00:44:56.426587', 'step': 19044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:56.483376', 'step': 19044, 'epoch': 2}
{'type': 'loss', 'content': 0.07220002263784409, 'timestamp': '2025-10-02 00:44:56.486332', 'step': 19045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:56.541652', 'step': 19045, 'epoch': 2}
{'type': 'loss', 'content': 0.139447420835495, 'timestamp': '2025-10-02 00:44:56.544256', 'step': 19046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:44:56.606743', 'step': 19046, 'epoch': 2}
{'type': 'loss', 'content': 0.020961621776223183, 'timestamp': '2025-10-02 00:44:56.617169', 'step': 19047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:44:56.680680', 'step': 19047, 'epoch': 2}
{'type': 'loss', 'content': 0.021966788917779922, 'timestamp': '2025-10-02 00:44:56.691859', 'step': 19048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:56.747393', 'step': 19048, 'epoch': 2}
{'type': 'loss', 'content': 0.043131694197654724, 'timestamp': '2025-10-02 00:44:56.754309', 'step': 19049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:56.814548', 'step': 19049, 'epoch': 2}
{'type': 'loss', 'content': 0.02722611464560032, 'timestamp': '2025-10-02 00:44:56.824661', 'step': 19050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:56.880952', 'step': 19050, 'epoch': 2}
{'type': 'loss', 'content': 0.08554616570472717, 'timestamp': '2025-10-02 00:44:56.883362', 'step': 19051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:44:56.947001', 'step': 19051, 'epoch': 2}
{'type': 'loss', 'content': 0.011659280396997929, 'timestamp': '2025-10-02 00:44:56.958604', 'step': 19052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:57.014620', 'step': 19052, 'epoch': 2}
{'type': 'loss', 'content': 0.06489616632461548, 'timestamp': '2025-10-02 00:44:57.017628', 'step': 19053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:57.072424', 'step': 19053, 'epoch': 2}
{'type': 'loss', 'content': 0.052126627415418625, 'timestamp': '2025-10-02 00:44:57.076403', 'step': 19054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:44:57.133430', 'step': 19054, 'epoch': 2}
{'type': 'loss', 'content': 0.01711813174188137, 'timestamp': '2025-10-02 00:44:57.136211', 'step': 19055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:57.194677', 'step': 19055, 'epoch': 2}
{'type': 'loss', 'content': 0.0327327586710453, 'timestamp': '2025-10-02 00:44:57.201076', 'step': 19056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:57.255655', 'step': 19056, 'epoch': 2}
{'type': 'loss', 'content': 0.08332203328609467, 'timestamp': '2025-10-02 00:44:57.261020', 'step': 19057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:57.315658', 'step': 19057, 'epoch': 2}
{'type': 'loss', 'content': 0.06418780982494354, 'timestamp': '2025-10-02 00:44:57.317878', 'step': 19058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:57.372477', 'step': 19058, 'epoch': 2}
{'type': 'loss', 'content': 0.035322122275829315, 'timestamp': '2025-10-02 00:44:57.374636', 'step': 19059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:57.429405', 'step': 19059, 'epoch': 2}
{'type': 'loss', 'content': 0.025365449488162994, 'timestamp': '2025-10-02 00:44:57.435606', 'step': 19060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:57.489653', 'step': 19060, 'epoch': 2}
{'type': 'loss', 'content': 0.04511558264493942, 'timestamp': '2025-10-02 00:44:57.496736', 'step': 19061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:57.556036', 'step': 19061, 'epoch': 2}
{'type': 'loss', 'content': 0.017721077427268028, 'timestamp': '2025-10-02 00:44:57.566201', 'step': 19062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:57.623492', 'step': 19062, 'epoch': 2}
{'type': 'loss', 'content': 0.04916653409600258, 'timestamp': '2025-10-02 00:44:57.630838', 'step': 19063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:57.686506', 'step': 19063, 'epoch': 2}
{'type': 'loss', 'content': 0.020581014454364777, 'timestamp': '2025-10-02 00:44:57.692814', 'step': 19064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:57.747877', 'step': 19064, 'epoch': 2}
{'type': 'loss', 'content': 0.008046329952776432, 'timestamp': '2025-10-02 00:44:57.754791', 'step': 19065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:57.809955', 'step': 19065, 'epoch': 2}
{'type': 'loss', 'content': 0.07916411012411118, 'timestamp': '2025-10-02 00:44:57.812353', 'step': 19066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:57.868058', 'step': 19066, 'epoch': 2}
{'type': 'loss', 'content': 0.026804480701684952, 'timestamp': '2025-10-02 00:44:57.870604', 'step': 19067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:57.926664', 'step': 19067, 'epoch': 2}
{'type': 'loss', 'content': 0.03421564772725105, 'timestamp': '2025-10-02 00:44:57.934568', 'step': 19068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:57.989025', 'step': 19068, 'epoch': 2}
{'type': 'loss', 'content': 0.09353452920913696, 'timestamp': '2025-10-02 00:44:57.991397', 'step': 19069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:44:58.046140', 'step': 19069, 'epoch': 2}
{'type': 'loss', 'content': 0.07708261162042618, 'timestamp': '2025-10-02 00:44:58.051617', 'step': 19070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:58.106783', 'step': 19070, 'epoch': 2}
{'type': 'loss', 'content': 0.08286077529191971, 'timestamp': '2025-10-02 00:44:58.108950', 'step': 19071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:44:58.171527', 'step': 19071, 'epoch': 2}
{'type': 'loss', 'content': 0.01006702147424221, 'timestamp': '2025-10-02 00:44:58.182954', 'step': 19072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:58.237314', 'step': 19072, 'epoch': 2}
{'type': 'loss', 'content': 0.06995011121034622, 'timestamp': '2025-10-02 00:44:58.239474', 'step': 19073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:58.293997', 'step': 19073, 'epoch': 2}
{'type': 'loss', 'content': 0.16649428009986877, 'timestamp': '2025-10-02 00:44:58.296670', 'step': 19074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:58.351959', 'step': 19074, 'epoch': 2}
{'type': 'loss', 'content': 0.09222813695669174, 'timestamp': '2025-10-02 00:44:58.354475', 'step': 19075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:58.409424', 'step': 19075, 'epoch': 2}
{'type': 'loss', 'content': 0.03331868350505829, 'timestamp': '2025-10-02 00:44:58.415979', 'step': 19076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:58.472010', 'step': 19076, 'epoch': 2}
{'type': 'loss', 'content': 0.02597430907189846, 'timestamp': '2025-10-02 00:44:58.474591', 'step': 19077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:44:58.531124', 'step': 19077, 'epoch': 2}
{'type': 'loss', 'content': 0.13705883920192719, 'timestamp': '2025-10-02 00:44:58.533295', 'step': 19078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:44:58.588339', 'step': 19078, 'epoch': 2}
{'type': 'loss', 'content': 0.16404478251934052, 'timestamp': '2025-10-02 00:44:58.590419', 'step': 19079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:58.644704', 'step': 19079, 'epoch': 2}
{'type': 'loss', 'content': 0.0292596984654665, 'timestamp': '2025-10-02 00:44:58.652710', 'step': 19080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:58.706242', 'step': 19080, 'epoch': 2}
{'type': 'loss', 'content': 0.04026312753558159, 'timestamp': '2025-10-02 00:44:58.708523', 'step': 19081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:44:58.762652', 'step': 19081, 'epoch': 2}
{'type': 'loss', 'content': 0.10766378045082092, 'timestamp': '2025-10-02 00:44:58.764913', 'step': 19082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:58.820505', 'step': 19082, 'epoch': 2}
{'type': 'loss', 'content': 0.12075506895780563, 'timestamp': '2025-10-02 00:44:58.822917', 'step': 19083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:44:58.882123', 'step': 19083, 'epoch': 2}
{'type': 'loss', 'content': 0.10494380444288254, 'timestamp': '2025-10-02 00:44:58.888512', 'step': 19084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:58.943802', 'step': 19084, 'epoch': 2}
{'type': 'loss', 'content': 0.039555907249450684, 'timestamp': '2025-10-02 00:44:58.951116', 'step': 19085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:44:59.012106', 'step': 19085, 'epoch': 2}
{'type': 'loss', 'content': 0.09805946797132492, 'timestamp': '2025-10-02 00:44:59.022506', 'step': 19086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:44:59.091127', 'step': 19086, 'epoch': 2}
{'type': 'loss', 'content': 0.05649203807115555, 'timestamp': '2025-10-02 00:44:59.103392', 'step': 19087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:44:59.158874', 'step': 19087, 'epoch': 2}
{'type': 'loss', 'content': 0.05931990221142769, 'timestamp': '2025-10-02 00:44:59.164881', 'step': 19088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:59.219074', 'step': 19088, 'epoch': 2}
{'type': 'loss', 'content': 0.06987258791923523, 'timestamp': '2025-10-02 00:44:59.221360', 'step': 19089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:59.281628', 'step': 19089, 'epoch': 2}
{'type': 'loss', 'content': 0.005613223183900118, 'timestamp': '2025-10-02 00:44:59.291756', 'step': 19090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:59.349579', 'step': 19090, 'epoch': 2}
{'type': 'loss', 'content': 0.018646543845534325, 'timestamp': '2025-10-02 00:44:59.351787', 'step': 19091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:59.407141', 'step': 19091, 'epoch': 2}
{'type': 'loss', 'content': 0.038987088948488235, 'timestamp': '2025-10-02 00:44:59.413107', 'step': 19092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:44:59.470605', 'step': 19092, 'epoch': 2}
{'type': 'loss', 'content': 0.06202917918562889, 'timestamp': '2025-10-02 00:44:59.481549', 'step': 19093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:44:59.536530', 'step': 19093, 'epoch': 2}
{'type': 'loss', 'content': 0.03069603070616722, 'timestamp': '2025-10-02 00:44:59.538919', 'step': 19094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:44:59.594124', 'step': 19094, 'epoch': 2}
{'type': 'loss', 'content': 0.04582967609167099, 'timestamp': '2025-10-02 00:44:59.596915', 'step': 19095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:44:59.652073', 'step': 19095, 'epoch': 2}
{'type': 'loss', 'content': 0.08079519867897034, 'timestamp': '2025-10-02 00:44:59.659863', 'step': 19096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:44:59.715325', 'step': 19096, 'epoch': 2}
{'type': 'loss', 'content': 0.038363173604011536, 'timestamp': '2025-10-02 00:44:59.724417', 'step': 19097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:44:59.779023', 'step': 19097, 'epoch': 2}
{'type': 'loss', 'content': 0.16627137362957, 'timestamp': '2025-10-02 00:44:59.781329', 'step': 19098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:44:59.836301', 'step': 19098, 'epoch': 2}
{'type': 'loss', 'content': 0.0681067630648613, 'timestamp': '2025-10-02 00:44:59.838384', 'step': 19099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:44:59.893020', 'step': 19099, 'epoch': 2}
{'type': 'loss', 'content': 0.02751910872757435, 'timestamp': '2025-10-02 00:44:59.898853', 'step': 19100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:44:59.953738', 'step': 19100, 'epoch': 2}
{'type': 'loss', 'content': 0.03684954345226288, 'timestamp': '2025-10-02 00:44:59.963799', 'step': 19101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:45:00.031772', 'step': 19101, 'epoch': 2}
{'type': 'loss', 'content': 0.009593859314918518, 'timestamp': '2025-10-02 00:45:00.043703', 'step': 19102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:00.098524', 'step': 19102, 'epoch': 2}
{'type': 'loss', 'content': 0.11173349618911743, 'timestamp': '2025-10-02 00:45:00.100572', 'step': 19103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:00.156258', 'step': 19103, 'epoch': 2}
{'type': 'loss', 'content': 0.12313662469387054, 'timestamp': '2025-10-02 00:45:00.162147', 'step': 19104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:00.217176', 'step': 19104, 'epoch': 2}
{'type': 'loss', 'content': 0.06050032749772072, 'timestamp': '2025-10-02 00:45:00.224474', 'step': 19105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:00.279452', 'step': 19105, 'epoch': 2}
{'type': 'loss', 'content': 0.0631154403090477, 'timestamp': '2025-10-02 00:45:00.282025', 'step': 19106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:00.343294', 'step': 19106, 'epoch': 2}
{'type': 'loss', 'content': 0.046404361724853516, 'timestamp': '2025-10-02 00:45:00.353738', 'step': 19107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:00.408939', 'step': 19107, 'epoch': 2}
{'type': 'loss', 'content': 0.06187748163938522, 'timestamp': '2025-10-02 00:45:00.415295', 'step': 19108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:00.469996', 'step': 19108, 'epoch': 2}
{'type': 'loss', 'content': 0.0371008962392807, 'timestamp': '2025-10-02 00:45:00.480198', 'step': 19109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:00.534506', 'step': 19109, 'epoch': 2}
{'type': 'loss', 'content': 0.07051452994346619, 'timestamp': '2025-10-02 00:45:00.536923', 'step': 19110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:00.591711', 'step': 19110, 'epoch': 2}
{'type': 'loss', 'content': 0.04041966050863266, 'timestamp': '2025-10-02 00:45:00.594339', 'step': 19111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:00.648533', 'step': 19111, 'epoch': 2}
{'type': 'loss', 'content': 0.03918979689478874, 'timestamp': '2025-10-02 00:45:00.654315', 'step': 19112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:00.708801', 'step': 19112, 'epoch': 2}
{'type': 'loss', 'content': 0.11185307800769806, 'timestamp': '2025-10-02 00:45:00.711357', 'step': 19113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:00.766685', 'step': 19113, 'epoch': 2}
{'type': 'loss', 'content': 0.007131374441087246, 'timestamp': '2025-10-02 00:45:00.768941', 'step': 19114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:00.823283', 'step': 19114, 'epoch': 2}
{'type': 'loss', 'content': 0.09436368942260742, 'timestamp': '2025-10-02 00:45:00.825881', 'step': 19115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:00.882087', 'step': 19115, 'epoch': 2}
{'type': 'loss', 'content': 0.03681630641222, 'timestamp': '2025-10-02 00:45:00.892043', 'step': 19116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:00.946561', 'step': 19116, 'epoch': 2}
{'type': 'loss', 'content': 0.0017295280704274774, 'timestamp': '2025-10-02 00:45:00.951882', 'step': 19117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:01.007270', 'step': 19117, 'epoch': 2}
{'type': 'loss', 'content': 0.046445831656455994, 'timestamp': '2025-10-02 00:45:01.014618', 'step': 19118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:01.069956', 'step': 19118, 'epoch': 2}
{'type': 'loss', 'content': 0.18281994760036469, 'timestamp': '2025-10-02 00:45:01.072355', 'step': 19119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:01.126646', 'step': 19119, 'epoch': 2}
{'type': 'loss', 'content': 0.08006442338228226, 'timestamp': '2025-10-02 00:45:01.134767', 'step': 19120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:01.188830', 'step': 19120, 'epoch': 2}
{'type': 'loss', 'content': 0.09443366527557373, 'timestamp': '2025-10-02 00:45:01.190993', 'step': 19121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:01.245899', 'step': 19121, 'epoch': 2}
{'type': 'loss', 'content': 0.024988051503896713, 'timestamp': '2025-10-02 00:45:01.248218', 'step': 19122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:01.303318', 'step': 19122, 'epoch': 2}
{'type': 'loss', 'content': 0.172474205493927, 'timestamp': '2025-10-02 00:45:01.305980', 'step': 19123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:45:01.388102', 'step': 19123, 'epoch': 2}
{'type': 'loss', 'content': 0.03497519716620445, 'timestamp': '2025-10-02 00:45:01.403706', 'step': 19124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:01.458401', 'step': 19124, 'epoch': 2}
{'type': 'loss', 'content': 0.0662061795592308, 'timestamp': '2025-10-02 00:45:01.460690', 'step': 19125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:01.515428', 'step': 19125, 'epoch': 2}
{'type': 'loss', 'content': 0.16979588568210602, 'timestamp': '2025-10-02 00:45:01.517900', 'step': 19126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:01.580350', 'step': 19126, 'epoch': 2}
{'type': 'loss', 'content': 0.011459197849035263, 'timestamp': '2025-10-02 00:45:01.590832', 'step': 19127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:01.647235', 'step': 19127, 'epoch': 2}
{'type': 'loss', 'content': 0.05176569148898125, 'timestamp': '2025-10-02 00:45:01.657522', 'step': 19128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:01.712190', 'step': 19128, 'epoch': 2}
{'type': 'loss', 'content': 0.050965167582035065, 'timestamp': '2025-10-02 00:45:01.714472', 'step': 19129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:01.768962', 'step': 19129, 'epoch': 2}
{'type': 'loss', 'content': 0.11819971352815628, 'timestamp': '2025-10-02 00:45:01.772089', 'step': 19130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:01.827491', 'step': 19130, 'epoch': 2}
{'type': 'loss', 'content': 0.058532096445560455, 'timestamp': '2025-10-02 00:45:01.829625', 'step': 19131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:45:01.884923', 'step': 19131, 'epoch': 2}
{'type': 'loss', 'content': 0.014550375752151012, 'timestamp': '2025-10-02 00:45:01.890952', 'step': 19132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:01.945488', 'step': 19132, 'epoch': 2}
{'type': 'loss', 'content': 0.08218073844909668, 'timestamp': '2025-10-02 00:45:01.951105', 'step': 19133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:02.005906', 'step': 19133, 'epoch': 2}
{'type': 'loss', 'content': 0.10650615394115448, 'timestamp': '2025-10-02 00:45:02.008264', 'step': 19134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:02.068567', 'step': 19134, 'epoch': 2}
{'type': 'loss', 'content': 0.03203689306974411, 'timestamp': '2025-10-02 00:45:02.078709', 'step': 19135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:02.133855', 'step': 19135, 'epoch': 2}
{'type': 'loss', 'content': 0.14322644472122192, 'timestamp': '2025-10-02 00:45:02.139757', 'step': 19136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:02.193491', 'step': 19136, 'epoch': 2}
{'type': 'loss', 'content': 0.07236911356449127, 'timestamp': '2025-10-02 00:45:02.195845', 'step': 19137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:02.250819', 'step': 19137, 'epoch': 2}
{'type': 'loss', 'content': 0.03789268061518669, 'timestamp': '2025-10-02 00:45:02.256555', 'step': 19138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:02.311732', 'step': 19138, 'epoch': 2}
{'type': 'loss', 'content': 0.027565620839595795, 'timestamp': '2025-10-02 00:45:02.318713', 'step': 19139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:02.373694', 'step': 19139, 'epoch': 2}
{'type': 'loss', 'content': 0.07067865878343582, 'timestamp': '2025-10-02 00:45:02.383800', 'step': 19140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:02.439300', 'step': 19140, 'epoch': 2}
{'type': 'loss', 'content': 0.12837842106819153, 'timestamp': '2025-10-02 00:45:02.441867', 'step': 19141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:02.498364', 'step': 19141, 'epoch': 2}
{'type': 'loss', 'content': 0.04552095755934715, 'timestamp': '2025-10-02 00:45:02.500949', 'step': 19142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:02.557140', 'step': 19142, 'epoch': 2}
{'type': 'loss', 'content': 0.01773679442703724, 'timestamp': '2025-10-02 00:45:02.562431', 'step': 19143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:02.617604', 'step': 19143, 'epoch': 2}
{'type': 'loss', 'content': 0.05787286162376404, 'timestamp': '2025-10-02 00:45:02.623512', 'step': 19144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:02.677611', 'step': 19144, 'epoch': 2}
{'type': 'loss', 'content': 0.19060657918453217, 'timestamp': '2025-10-02 00:45:02.679902', 'step': 19145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:02.737659', 'step': 19145, 'epoch': 2}
{'type': 'loss', 'content': 0.11694584786891937, 'timestamp': '2025-10-02 00:45:02.739899', 'step': 19146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:02.798064', 'step': 19146, 'epoch': 2}
{'type': 'loss', 'content': 0.005766368005424738, 'timestamp': '2025-10-02 00:45:02.805821', 'step': 19147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:02.861423', 'step': 19147, 'epoch': 2}
{'type': 'loss', 'content': 0.04678806662559509, 'timestamp': '2025-10-02 00:45:02.867490', 'step': 19148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:02.922501', 'step': 19148, 'epoch': 2}
{'type': 'loss', 'content': 0.05726620554924011, 'timestamp': '2025-10-02 00:45:02.924665', 'step': 19149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:02.979707', 'step': 19149, 'epoch': 2}
{'type': 'loss', 'content': 0.0995137169957161, 'timestamp': '2025-10-02 00:45:02.981904', 'step': 19150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:03.037017', 'step': 19150, 'epoch': 2}
{'type': 'loss', 'content': 0.03693118318915367, 'timestamp': '2025-10-02 00:45:03.039099', 'step': 19151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:03.094871', 'step': 19151, 'epoch': 2}
{'type': 'loss', 'content': 0.021850433200597763, 'timestamp': '2025-10-02 00:45:03.100860', 'step': 19152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:45:03.168121', 'step': 19152, 'epoch': 2}
{'type': 'loss', 'content': 0.029838837683200836, 'timestamp': '2025-10-02 00:45:03.181052', 'step': 19153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:03.236691', 'step': 19153, 'epoch': 2}
{'type': 'loss', 'content': 0.03220687806606293, 'timestamp': '2025-10-02 00:45:03.243902', 'step': 19154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:03.299338', 'step': 19154, 'epoch': 2}
{'type': 'loss', 'content': 0.060770049691200256, 'timestamp': '2025-10-02 00:45:03.305104', 'step': 19155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:03.363875', 'step': 19155, 'epoch': 2}
{'type': 'loss', 'content': 0.03566375747323036, 'timestamp': '2025-10-02 00:45:03.373439', 'step': 19156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:45:03.427543', 'step': 19156, 'epoch': 2}
{'type': 'loss', 'content': 0.06124520301818848, 'timestamp': '2025-10-02 00:45:03.430235', 'step': 19157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:03.484736', 'step': 19157, 'epoch': 2}
{'type': 'loss', 'content': 0.04729190096259117, 'timestamp': '2025-10-02 00:45:03.488717', 'step': 19158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:03.548172', 'step': 19158, 'epoch': 2}
{'type': 'loss', 'content': 0.02617187425494194, 'timestamp': '2025-10-02 00:45:03.552725', 'step': 19159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:03.608879', 'step': 19159, 'epoch': 2}
{'type': 'loss', 'content': 0.03366417437791824, 'timestamp': '2025-10-02 00:45:03.614887', 'step': 19160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:03.668890', 'step': 19160, 'epoch': 2}
{'type': 'loss', 'content': 0.09918386489152908, 'timestamp': '2025-10-02 00:45:03.670980', 'step': 19161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:03.725628', 'step': 19161, 'epoch': 2}
{'type': 'loss', 'content': 0.058476418256759644, 'timestamp': '2025-10-02 00:45:03.734950', 'step': 19162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:03.790207', 'step': 19162, 'epoch': 2}
{'type': 'loss', 'content': 0.019964057952165604, 'timestamp': '2025-10-02 00:45:03.795838', 'step': 19163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:03.850329', 'step': 19163, 'epoch': 2}
{'type': 'loss', 'content': 0.00631477078422904, 'timestamp': '2025-10-02 00:45:03.856277', 'step': 19164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:03.912498', 'step': 19164, 'epoch': 2}
{'type': 'loss', 'content': 0.03444252163171768, 'timestamp': '2025-10-02 00:45:03.919933', 'step': 19165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:03.975193', 'step': 19165, 'epoch': 2}
{'type': 'loss', 'content': 0.04874075949192047, 'timestamp': '2025-10-02 00:45:03.977380', 'step': 19166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:04.032181', 'step': 19166, 'epoch': 2}
{'type': 'loss', 'content': 0.05095314234495163, 'timestamp': '2025-10-02 00:45:04.034041', 'step': 19167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:04.089038', 'step': 19167, 'epoch': 2}
{'type': 'loss', 'content': 0.03607597574591637, 'timestamp': '2025-10-02 00:45:04.095167', 'step': 19168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:04.149316', 'step': 19168, 'epoch': 2}
{'type': 'loss', 'content': 0.03424384072422981, 'timestamp': '2025-10-02 00:45:04.158559', 'step': 19169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:04.213608', 'step': 19169, 'epoch': 2}
{'type': 'loss', 'content': 0.033777620643377304, 'timestamp': '2025-10-02 00:45:04.219060', 'step': 19170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:45:04.293487', 'step': 19170, 'epoch': 2}
{'type': 'loss', 'content': 0.016942104324698448, 'timestamp': '2025-10-02 00:45:04.306545', 'step': 19171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:04.363427', 'step': 19171, 'epoch': 2}
{'type': 'loss', 'content': 0.09082730859518051, 'timestamp': '2025-10-02 00:45:04.369597', 'step': 19172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:04.424380', 'step': 19172, 'epoch': 2}
{'type': 'loss', 'content': 0.014472238719463348, 'timestamp': '2025-10-02 00:45:04.427318', 'step': 19173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:45:04.489587', 'step': 19173, 'epoch': 2}
{'type': 'loss', 'content': 0.02965983748435974, 'timestamp': '2025-10-02 00:45:04.500396', 'step': 19174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:04.555668', 'step': 19174, 'epoch': 2}
{'type': 'loss', 'content': 0.05809479206800461, 'timestamp': '2025-10-02 00:45:04.562692', 'step': 19175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:04.617671', 'step': 19175, 'epoch': 2}
{'type': 'loss', 'content': 0.04777579382061958, 'timestamp': '2025-10-02 00:45:04.624067', 'step': 19176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:04.679431', 'step': 19176, 'epoch': 2}
{'type': 'loss', 'content': 0.05486131086945534, 'timestamp': '2025-10-02 00:45:04.681427', 'step': 19177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:04.735695', 'step': 19177, 'epoch': 2}
{'type': 'loss', 'content': 0.13912849128246307, 'timestamp': '2025-10-02 00:45:04.737922', 'step': 19178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:04.793094', 'step': 19178, 'epoch': 2}
{'type': 'loss', 'content': 0.09305959939956665, 'timestamp': '2025-10-02 00:45:04.795553', 'step': 19179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:04.850854', 'step': 19179, 'epoch': 2}
{'type': 'loss', 'content': 0.059934101998806, 'timestamp': '2025-10-02 00:45:04.856836', 'step': 19180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:04.911496', 'step': 19180, 'epoch': 2}
{'type': 'loss', 'content': 0.13049916923046112, 'timestamp': '2025-10-02 00:45:04.914860', 'step': 19181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:04.972187', 'step': 19181, 'epoch': 2}
{'type': 'loss', 'content': 0.03912404179573059, 'timestamp': '2025-10-02 00:45:04.975026', 'step': 19182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:05.035590', 'step': 19182, 'epoch': 2}
{'type': 'loss', 'content': 0.05814385414123535, 'timestamp': '2025-10-02 00:45:05.045933', 'step': 19183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:45:05.116837', 'step': 19183, 'epoch': 2}
{'type': 'loss', 'content': 0.031354110687971115, 'timestamp': '2025-10-02 00:45:05.129554', 'step': 19184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:05.191635', 'step': 19184, 'epoch': 2}
{'type': 'loss', 'content': 0.04639122635126114, 'timestamp': '2025-10-02 00:45:05.202570', 'step': 19185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:05.267027', 'step': 19185, 'epoch': 2}
{'type': 'loss', 'content': 0.019874123856425285, 'timestamp': '2025-10-02 00:45:05.277486', 'step': 19186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:05.334515', 'step': 19186, 'epoch': 2}
{'type': 'loss', 'content': 0.02340046875178814, 'timestamp': '2025-10-02 00:45:05.336742', 'step': 19187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:05.395187', 'step': 19187, 'epoch': 2}
{'type': 'loss', 'content': 0.04532192274928093, 'timestamp': '2025-10-02 00:45:05.401995', 'step': 19188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:05.457772', 'step': 19188, 'epoch': 2}
{'type': 'loss', 'content': 0.11715080589056015, 'timestamp': '2025-10-02 00:45:05.460904', 'step': 19189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:05.520497', 'step': 19189, 'epoch': 2}
{'type': 'loss', 'content': 0.057152390480041504, 'timestamp': '2025-10-02 00:45:05.523882', 'step': 19190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:05.580237', 'step': 19190, 'epoch': 2}
{'type': 'loss', 'content': 0.024525346234440804, 'timestamp': '2025-10-02 00:45:05.583915', 'step': 19191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:05.640807', 'step': 19191, 'epoch': 2}
{'type': 'loss', 'content': 0.06926726549863815, 'timestamp': '2025-10-02 00:45:05.647331', 'step': 19192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:05.703591', 'step': 19192, 'epoch': 2}
{'type': 'loss', 'content': 0.01329170260578394, 'timestamp': '2025-10-02 00:45:05.709343', 'step': 19193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:05.766374', 'step': 19193, 'epoch': 2}
{'type': 'loss', 'content': 0.10040553659200668, 'timestamp': '2025-10-02 00:45:05.768645', 'step': 19194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:05.824922', 'step': 19194, 'epoch': 2}
{'type': 'loss', 'content': 0.11577583104372025, 'timestamp': '2025-10-02 00:45:05.828447', 'step': 19195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:05.888182', 'step': 19195, 'epoch': 2}
{'type': 'loss', 'content': 0.04827260971069336, 'timestamp': '2025-10-02 00:45:05.898447', 'step': 19196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:05.953873', 'step': 19196, 'epoch': 2}
{'type': 'loss', 'content': 0.051874659955501556, 'timestamp': '2025-10-02 00:45:05.964075', 'step': 19197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 00:45:06.053894', 'step': 19197, 'epoch': 2}
{'type': 'loss', 'content': 0.014598152600228786, 'timestamp': '2025-10-02 00:45:06.070363', 'step': 19198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:45:06.148331', 'step': 19198, 'epoch': 2}
{'type': 'loss', 'content': 0.01399808656424284, 'timestamp': '2025-10-02 00:45:06.161530', 'step': 19199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:06.224266', 'step': 19199, 'epoch': 2}
{'type': 'loss', 'content': 0.04452235996723175, 'timestamp': '2025-10-02 00:45:06.235532', 'step': 19200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:06.292331', 'step': 19200, 'epoch': 2}
{'type': 'loss', 'content': 0.028322730213403702, 'timestamp': '2025-10-02 00:45:06.295328', 'step': 19201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:06.351448', 'step': 19201, 'epoch': 2}
{'type': 'loss', 'content': 0.04258989542722702, 'timestamp': '2025-10-02 00:45:06.354459', 'step': 19202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:06.412383', 'step': 19202, 'epoch': 2}
{'type': 'loss', 'content': 0.04113153740763664, 'timestamp': '2025-10-02 00:45:06.414604', 'step': 19203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:06.470782', 'step': 19203, 'epoch': 2}
{'type': 'loss', 'content': 0.013412319123744965, 'timestamp': '2025-10-02 00:45:06.477449', 'step': 19204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:06.535256', 'step': 19204, 'epoch': 2}
{'type': 'loss', 'content': 0.008328210562467575, 'timestamp': '2025-10-02 00:45:06.541201', 'step': 19205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:06.603041', 'step': 19205, 'epoch': 2}
{'type': 'loss', 'content': 0.04405548423528671, 'timestamp': '2025-10-02 00:45:06.605272', 'step': 19206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:06.662402', 'step': 19206, 'epoch': 2}
{'type': 'loss', 'content': 0.005962323397397995, 'timestamp': '2025-10-02 00:45:06.665662', 'step': 19207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:06.722780', 'step': 19207, 'epoch': 2}
{'type': 'loss', 'content': 0.09012177586555481, 'timestamp': '2025-10-02 00:45:06.729636', 'step': 19208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:06.783949', 'step': 19208, 'epoch': 2}
{'type': 'loss', 'content': 0.07286211848258972, 'timestamp': '2025-10-02 00:45:06.786768', 'step': 19209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:45:06.850572', 'step': 19209, 'epoch': 2}
{'type': 'loss', 'content': 0.02065940573811531, 'timestamp': '2025-10-02 00:45:06.861348', 'step': 19210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:45:06.932328', 'step': 19210, 'epoch': 2}
{'type': 'loss', 'content': 0.02874031290411949, 'timestamp': '2025-10-02 00:45:06.944663', 'step': 19211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:45:07.000057', 'step': 19211, 'epoch': 2}
{'type': 'loss', 'content': 0.08065291494131088, 'timestamp': '2025-10-02 00:45:07.007096', 'step': 19212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:07.068877', 'step': 19212, 'epoch': 2}
{'type': 'loss', 'content': 0.032747525721788406, 'timestamp': '2025-10-02 00:45:07.080204', 'step': 19213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:07.137099', 'step': 19213, 'epoch': 2}
{'type': 'loss', 'content': 0.08519192785024643, 'timestamp': '2025-10-02 00:45:07.139790', 'step': 19214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:07.201211', 'step': 19214, 'epoch': 2}
{'type': 'loss', 'content': 0.023602819070219994, 'timestamp': '2025-10-02 00:45:07.211389', 'step': 19215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:07.267052', 'step': 19215, 'epoch': 2}
{'type': 'loss', 'content': 0.03076389618217945, 'timestamp': '2025-10-02 00:45:07.273090', 'step': 19216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:07.327646', 'step': 19216, 'epoch': 2}
{'type': 'loss', 'content': 0.088605597615242, 'timestamp': '2025-10-02 00:45:07.329997', 'step': 19217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:45:07.388285', 'step': 19217, 'epoch': 2}
{'type': 'loss', 'content': 0.0437672957777977, 'timestamp': '2025-10-02 00:45:07.390881', 'step': 19218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:07.446313', 'step': 19218, 'epoch': 2}
{'type': 'loss', 'content': 0.05184751749038696, 'timestamp': '2025-10-02 00:45:07.448537', 'step': 19219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:07.503403', 'step': 19219, 'epoch': 2}
{'type': 'loss', 'content': 0.03880862519145012, 'timestamp': '2025-10-02 00:45:07.509202', 'step': 19220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:07.564909', 'step': 19220, 'epoch': 2}
{'type': 'loss', 'content': 0.04809968173503876, 'timestamp': '2025-10-02 00:45:07.567333', 'step': 19221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:07.622632', 'step': 19221, 'epoch': 2}
{'type': 'loss', 'content': 0.063709557056427, 'timestamp': '2025-10-02 00:45:07.625673', 'step': 19222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:07.682869', 'step': 19222, 'epoch': 2}
{'type': 'loss', 'content': 0.01519672479480505, 'timestamp': '2025-10-02 00:45:07.685245', 'step': 19223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:07.740836', 'step': 19223, 'epoch': 2}
{'type': 'loss', 'content': 0.04727540537714958, 'timestamp': '2025-10-02 00:45:07.746555', 'step': 19224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:07.802100', 'step': 19224, 'epoch': 2}
{'type': 'loss', 'content': 0.08298845589160919, 'timestamp': '2025-10-02 00:45:07.804547', 'step': 19225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:07.863304', 'step': 19225, 'epoch': 2}
{'type': 'loss', 'content': 0.042399290949106216, 'timestamp': '2025-10-02 00:45:07.873473', 'step': 19226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:07.929751', 'step': 19226, 'epoch': 2}
{'type': 'loss', 'content': 0.017213784158229828, 'timestamp': '2025-10-02 00:45:07.939307', 'step': 19227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:07.996303', 'step': 19227, 'epoch': 2}
{'type': 'loss', 'content': 0.03664457052946091, 'timestamp': '2025-10-02 00:45:08.002553', 'step': 19228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:08.058017', 'step': 19228, 'epoch': 2}
{'type': 'loss', 'content': 0.16076217591762543, 'timestamp': '2025-10-02 00:45:08.060374', 'step': 19229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:08.117665', 'step': 19229, 'epoch': 2}
{'type': 'loss', 'content': 0.021211784332990646, 'timestamp': '2025-10-02 00:45:08.123317', 'step': 19230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:08.179136', 'step': 19230, 'epoch': 2}
{'type': 'loss', 'content': 0.024540429934859276, 'timestamp': '2025-10-02 00:45:08.184049', 'step': 19231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:08.239726', 'step': 19231, 'epoch': 2}
{'type': 'loss', 'content': 0.03207225725054741, 'timestamp': '2025-10-02 00:45:08.245244', 'step': 19232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:08.299494', 'step': 19232, 'epoch': 2}
{'type': 'loss', 'content': 0.03259489685297012, 'timestamp': '2025-10-02 00:45:08.301826', 'step': 19233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:08.356277', 'step': 19233, 'epoch': 2}
{'type': 'loss', 'content': 0.03251133859157562, 'timestamp': '2025-10-02 00:45:08.358466', 'step': 19234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:08.413009', 'step': 19234, 'epoch': 2}
{'type': 'loss', 'content': 0.09937682002782822, 'timestamp': '2025-10-02 00:45:08.415345', 'step': 19235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:08.471277', 'step': 19235, 'epoch': 2}
{'type': 'loss', 'content': 0.1360589861869812, 'timestamp': '2025-10-02 00:45:08.476947', 'step': 19236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:08.530821', 'step': 19236, 'epoch': 2}
{'type': 'loss', 'content': 0.04508717730641365, 'timestamp': '2025-10-02 00:45:08.533202', 'step': 19237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:08.588366', 'step': 19237, 'epoch': 2}
{'type': 'loss', 'content': 0.11021637916564941, 'timestamp': '2025-10-02 00:45:08.590599', 'step': 19238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:08.645008', 'step': 19238, 'epoch': 2}
{'type': 'loss', 'content': 0.03639866039156914, 'timestamp': '2025-10-02 00:45:08.647290', 'step': 19239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:08.702130', 'step': 19239, 'epoch': 2}
{'type': 'loss', 'content': 0.026764903217554092, 'timestamp': '2025-10-02 00:45:08.708095', 'step': 19240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:08.763923', 'step': 19240, 'epoch': 2}
{'type': 'loss', 'content': 0.027875391766428947, 'timestamp': '2025-10-02 00:45:08.766324', 'step': 19241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:08.820777', 'step': 19241, 'epoch': 2}
{'type': 'loss', 'content': 0.09258650243282318, 'timestamp': '2025-10-02 00:45:08.823160', 'step': 19242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:08.878373', 'step': 19242, 'epoch': 2}
{'type': 'loss', 'content': 0.06297053396701813, 'timestamp': '2025-10-02 00:45:08.885247', 'step': 19243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:08.939627', 'step': 19243, 'epoch': 2}
{'type': 'loss', 'content': 0.0795307457447052, 'timestamp': '2025-10-02 00:45:08.944946', 'step': 19244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:09.001603', 'step': 19244, 'epoch': 2}
{'type': 'loss', 'content': 0.07821009308099747, 'timestamp': '2025-10-02 00:45:09.010859', 'step': 19245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:09.067724', 'step': 19245, 'epoch': 2}
{'type': 'loss', 'content': 0.030055023729801178, 'timestamp': '2025-10-02 00:45:09.073236', 'step': 19246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:09.128301', 'step': 19246, 'epoch': 2}
{'type': 'loss', 'content': 0.11227282136678696, 'timestamp': '2025-10-02 00:45:09.130724', 'step': 19247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:45:09.185893', 'step': 19247, 'epoch': 2}
{'type': 'loss', 'content': 0.11883540451526642, 'timestamp': '2025-10-02 00:45:09.191932', 'step': 19248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:09.245150', 'step': 19248, 'epoch': 2}
{'type': 'loss', 'content': 0.04339984804391861, 'timestamp': '2025-10-02 00:45:09.247319', 'step': 19249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:09.301996', 'step': 19249, 'epoch': 2}
{'type': 'loss', 'content': 0.010448148474097252, 'timestamp': '2025-10-02 00:45:09.304437', 'step': 19250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:09.359249', 'step': 19250, 'epoch': 2}
{'type': 'loss', 'content': 0.042180512100458145, 'timestamp': '2025-10-02 00:45:09.361545', 'step': 19251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:09.416450', 'step': 19251, 'epoch': 2}
{'type': 'loss', 'content': 0.06190478429198265, 'timestamp': '2025-10-02 00:45:09.425015', 'step': 19252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:09.486173', 'step': 19252, 'epoch': 2}
{'type': 'loss', 'content': 0.01531689241528511, 'timestamp': '2025-10-02 00:45:09.496351', 'step': 19253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:09.573389', 'step': 19253, 'epoch': 2}
{'type': 'loss', 'content': 0.01689622551202774, 'timestamp': '2025-10-02 00:45:09.578463', 'step': 19254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:09.635686', 'step': 19254, 'epoch': 2}
{'type': 'loss', 'content': 0.027715522795915604, 'timestamp': '2025-10-02 00:45:09.645237', 'step': 19255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:09.708034', 'step': 19255, 'epoch': 2}
{'type': 'loss', 'content': 0.0378057137131691, 'timestamp': '2025-10-02 00:45:09.719214', 'step': 19256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:09.774195', 'step': 19256, 'epoch': 2}
{'type': 'loss', 'content': 0.01679391786456108, 'timestamp': '2025-10-02 00:45:09.781168', 'step': 19257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:09.836376', 'step': 19257, 'epoch': 2}
{'type': 'loss', 'content': 0.02669413760304451, 'timestamp': '2025-10-02 00:45:09.842079', 'step': 19258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:45:09.905579', 'step': 19258, 'epoch': 2}
{'type': 'loss', 'content': 0.006812224164605141, 'timestamp': '2025-10-02 00:45:09.916199', 'step': 19259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:09.971486', 'step': 19259, 'epoch': 2}
{'type': 'loss', 'content': 0.0185400303453207, 'timestamp': '2025-10-02 00:45:09.977421', 'step': 19260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:10.032778', 'step': 19260, 'epoch': 2}
{'type': 'loss', 'content': 0.01498103141784668, 'timestamp': '2025-10-02 00:45:10.035290', 'step': 19261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:10.090133', 'step': 19261, 'epoch': 2}
{'type': 'loss', 'content': 0.00984626542776823, 'timestamp': '2025-10-02 00:45:10.097296', 'step': 19262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:10.152728', 'step': 19262, 'epoch': 2}
{'type': 'loss', 'content': 0.021252231672406197, 'timestamp': '2025-10-02 00:45:10.159859', 'step': 19263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:10.215073', 'step': 19263, 'epoch': 2}
{'type': 'loss', 'content': 0.03946205973625183, 'timestamp': '2025-10-02 00:45:10.221204', 'step': 19264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:10.276399', 'step': 19264, 'epoch': 2}
{'type': 'loss', 'content': 0.030092690140008926, 'timestamp': '2025-10-02 00:45:10.279076', 'step': 19265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:10.333932', 'step': 19265, 'epoch': 2}
{'type': 'loss', 'content': 0.12470352649688721, 'timestamp': '2025-10-02 00:45:10.336722', 'step': 19266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:10.392500', 'step': 19266, 'epoch': 2}
{'type': 'loss', 'content': 0.03752753138542175, 'timestamp': '2025-10-02 00:45:10.394850', 'step': 19267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:45:10.448678', 'step': 19267, 'epoch': 2}
{'type': 'loss', 'content': 0.10600695013999939, 'timestamp': '2025-10-02 00:45:10.455115', 'step': 19268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:10.510882', 'step': 19268, 'epoch': 2}
{'type': 'loss', 'content': 0.07691957801580429, 'timestamp': '2025-10-02 00:45:10.513852', 'step': 19269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:10.568862', 'step': 19269, 'epoch': 2}
{'type': 'loss', 'content': 0.01792140118777752, 'timestamp': '2025-10-02 00:45:10.574506', 'step': 19270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:45:10.637790', 'step': 19270, 'epoch': 2}
{'type': 'loss', 'content': 0.07833437621593475, 'timestamp': '2025-10-02 00:45:10.648383', 'step': 19271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:10.703896', 'step': 19271, 'epoch': 2}
{'type': 'loss', 'content': 0.039059676229953766, 'timestamp': '2025-10-02 00:45:10.711833', 'step': 19272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:10.766061', 'step': 19272, 'epoch': 2}
{'type': 'loss', 'content': 0.10935243964195251, 'timestamp': '2025-10-02 00:45:10.768262', 'step': 19273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:10.823418', 'step': 19273, 'epoch': 2}
{'type': 'loss', 'content': 0.035485997796058655, 'timestamp': '2025-10-02 00:45:10.829100', 'step': 19274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:10.888724', 'step': 19274, 'epoch': 2}
{'type': 'loss', 'content': 0.08147453516721725, 'timestamp': '2025-10-02 00:45:10.898861', 'step': 19275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:10.954444', 'step': 19275, 'epoch': 2}
{'type': 'loss', 'content': 0.05451393499970436, 'timestamp': '2025-10-02 00:45:10.960135', 'step': 19276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:11.014937', 'step': 19276, 'epoch': 2}
{'type': 'loss', 'content': 0.005702837835997343, 'timestamp': '2025-10-02 00:45:11.024099', 'step': 19277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:11.078384', 'step': 19277, 'epoch': 2}
{'type': 'loss', 'content': 0.12962499260902405, 'timestamp': '2025-10-02 00:45:11.080669', 'step': 19278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:11.138211', 'step': 19278, 'epoch': 2}
{'type': 'loss', 'content': 0.0231269970536232, 'timestamp': '2025-10-02 00:45:11.145173', 'step': 19279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:11.200770', 'step': 19279, 'epoch': 2}
{'type': 'loss', 'content': 0.04603752866387367, 'timestamp': '2025-10-02 00:45:11.206610', 'step': 19280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:11.260894', 'step': 19280, 'epoch': 2}
{'type': 'loss', 'content': 0.04053759574890137, 'timestamp': '2025-10-02 00:45:11.263050', 'step': 19281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:45:11.317519', 'step': 19281, 'epoch': 2}
{'type': 'loss', 'content': 0.05788061395287514, 'timestamp': '2025-10-02 00:45:11.319946', 'step': 19282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:11.379213', 'step': 19282, 'epoch': 2}
{'type': 'loss', 'content': 0.0343463309109211, 'timestamp': '2025-10-02 00:45:11.389394', 'step': 19283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:45:11.445725', 'step': 19283, 'epoch': 2}
{'type': 'loss', 'content': 0.10003609955310822, 'timestamp': '2025-10-02 00:45:11.451101', 'step': 19284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:11.505710', 'step': 19284, 'epoch': 2}
{'type': 'loss', 'content': 0.03727751225233078, 'timestamp': '2025-10-02 00:45:11.507756', 'step': 19285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:11.562791', 'step': 19285, 'epoch': 2}
{'type': 'loss', 'content': 0.025095276534557343, 'timestamp': '2025-10-02 00:45:11.572122', 'step': 19286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:11.628161', 'step': 19286, 'epoch': 2}
{'type': 'loss', 'content': 0.0570448562502861, 'timestamp': '2025-10-02 00:45:11.637655', 'step': 19287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:11.694352', 'step': 19287, 'epoch': 2}
{'type': 'loss', 'content': 0.019868651404976845, 'timestamp': '2025-10-02 00:45:11.704701', 'step': 19288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:11.759281', 'step': 19288, 'epoch': 2}
{'type': 'loss', 'content': 0.03837739676237106, 'timestamp': '2025-10-02 00:45:11.761356', 'step': 19289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:11.815671', 'step': 19289, 'epoch': 2}
{'type': 'loss', 'content': 0.08168594539165497, 'timestamp': '2025-10-02 00:45:11.823013', 'step': 19290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:45:11.885112', 'step': 19290, 'epoch': 2}
{'type': 'loss', 'content': 0.003174061654135585, 'timestamp': '2025-10-02 00:45:11.895773', 'step': 19291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:11.951297', 'step': 19291, 'epoch': 2}
{'type': 'loss', 'content': 0.06297866255044937, 'timestamp': '2025-10-02 00:45:11.956744', 'step': 19292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:12.011067', 'step': 19292, 'epoch': 2}
{'type': 'loss', 'content': 0.06784545630216599, 'timestamp': '2025-10-02 00:45:12.013286', 'step': 19293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:45:12.083064', 'step': 19293, 'epoch': 2}
{'type': 'loss', 'content': 0.027531344443559647, 'timestamp': '2025-10-02 00:45:12.095602', 'step': 19294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:45:12.165412', 'step': 19294, 'epoch': 2}
{'type': 'loss', 'content': 0.037499234080314636, 'timestamp': '2025-10-02 00:45:12.177722', 'step': 19295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:12.231812', 'step': 19295, 'epoch': 2}
{'type': 'loss', 'content': 0.2025509625673294, 'timestamp': '2025-10-02 00:45:12.237863', 'step': 19296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:12.292023', 'step': 19296, 'epoch': 2}
{'type': 'loss', 'content': 0.0580999031662941, 'timestamp': '2025-10-02 00:45:12.294334', 'step': 19297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:12.350378', 'step': 19297, 'epoch': 2}
{'type': 'loss', 'content': 0.09004102647304535, 'timestamp': '2025-10-02 00:45:12.353740', 'step': 19298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:12.423623', 'step': 19298, 'epoch': 2}
{'type': 'loss', 'content': 0.1307324469089508, 'timestamp': '2025-10-02 00:45:12.425893', 'step': 19299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:12.480901', 'step': 19299, 'epoch': 2}
{'type': 'loss', 'content': 0.026965664699673653, 'timestamp': '2025-10-02 00:45:12.488689', 'step': 19300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:12.543063', 'step': 19300, 'epoch': 2}
{'type': 'loss', 'content': 0.1415344476699829, 'timestamp': '2025-10-02 00:45:12.545141', 'step': 19301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:12.610698', 'step': 19301, 'epoch': 2}
{'type': 'loss', 'content': 0.04482003301382065, 'timestamp': '2025-10-02 00:45:12.612613', 'step': 19302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:12.667404', 'step': 19302, 'epoch': 2}
{'type': 'loss', 'content': 0.11381512880325317, 'timestamp': '2025-10-02 00:45:12.669962', 'step': 19303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:12.724808', 'step': 19303, 'epoch': 2}
{'type': 'loss', 'content': 0.07663403451442719, 'timestamp': '2025-10-02 00:45:12.731096', 'step': 19304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:12.785728', 'step': 19304, 'epoch': 2}
{'type': 'loss', 'content': 0.08587297797203064, 'timestamp': '2025-10-02 00:45:12.788172', 'step': 19305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:12.843803', 'step': 19305, 'epoch': 2}
{'type': 'loss', 'content': 0.020115438848733902, 'timestamp': '2025-10-02 00:45:12.849502', 'step': 19306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:12.905885', 'step': 19306, 'epoch': 2}
{'type': 'loss', 'content': 0.0130058154463768, 'timestamp': '2025-10-02 00:45:12.911388', 'step': 19307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:12.967426', 'step': 19307, 'epoch': 2}
{'type': 'loss', 'content': 0.16081570088863373, 'timestamp': '2025-10-02 00:45:12.974473', 'step': 19308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:13.028859', 'step': 19308, 'epoch': 2}
{'type': 'loss', 'content': 0.05917061120271683, 'timestamp': '2025-10-02 00:45:13.031142', 'step': 19309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:13.086314', 'step': 19309, 'epoch': 2}
{'type': 'loss', 'content': 0.034896913915872574, 'timestamp': '2025-10-02 00:45:13.091427', 'step': 19310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:13.147049', 'step': 19310, 'epoch': 2}
{'type': 'loss', 'content': 0.028382861986756325, 'timestamp': '2025-10-02 00:45:13.149569', 'step': 19311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:13.214415', 'step': 19311, 'epoch': 2}
{'type': 'loss', 'content': 0.053307436406612396, 'timestamp': '2025-10-02 00:45:13.220773', 'step': 19312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:13.275689', 'step': 19312, 'epoch': 2}
{'type': 'loss', 'content': 0.10993427783250809, 'timestamp': '2025-10-02 00:45:13.277936', 'step': 19313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:13.332366', 'step': 19313, 'epoch': 2}
{'type': 'loss', 'content': 0.013629947789013386, 'timestamp': '2025-10-02 00:45:13.338049', 'step': 19314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:45:13.402697', 'step': 19314, 'epoch': 2}
{'type': 'loss', 'content': 0.01907181553542614, 'timestamp': '2025-10-02 00:45:13.413263', 'step': 19315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:13.468311', 'step': 19315, 'epoch': 2}
{'type': 'loss', 'content': 0.017214808613061905, 'timestamp': '2025-10-02 00:45:13.473945', 'step': 19316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:13.528908', 'step': 19316, 'epoch': 2}
{'type': 'loss', 'content': 0.041951734572649, 'timestamp': '2025-10-02 00:45:13.531545', 'step': 19317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:13.587271', 'step': 19317, 'epoch': 2}
{'type': 'loss', 'content': 0.025038592517375946, 'timestamp': '2025-10-02 00:45:13.589391', 'step': 19318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:13.644067', 'step': 19318, 'epoch': 2}
{'type': 'loss', 'content': 0.04847284406423569, 'timestamp': '2025-10-02 00:45:13.649541', 'step': 19319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:45:13.704513', 'step': 19319, 'epoch': 2}
{'type': 'loss', 'content': 0.06138753890991211, 'timestamp': '2025-10-02 00:45:13.710327', 'step': 19320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:13.763987', 'step': 19320, 'epoch': 2}
{'type': 'loss', 'content': 0.034391891211271286, 'timestamp': '2025-10-02 00:45:13.766385', 'step': 19321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:13.821769', 'step': 19321, 'epoch': 2}
{'type': 'loss', 'content': 0.10882039368152618, 'timestamp': '2025-10-02 00:45:13.823925', 'step': 19322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:13.879023', 'step': 19322, 'epoch': 2}
{'type': 'loss', 'content': 0.037454742938280106, 'timestamp': '2025-10-02 00:45:13.885902', 'step': 19323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:13.942621', 'step': 19323, 'epoch': 2}
{'type': 'loss', 'content': 0.0717068687081337, 'timestamp': '2025-10-02 00:45:13.947934', 'step': 19324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:14.003423', 'step': 19324, 'epoch': 2}
{'type': 'loss', 'content': 0.019255148246884346, 'timestamp': '2025-10-02 00:45:14.006498', 'step': 19325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:14.064261', 'step': 19325, 'epoch': 2}
{'type': 'loss', 'content': 0.11344689130783081, 'timestamp': '2025-10-02 00:45:14.066958', 'step': 19326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:14.122484', 'step': 19326, 'epoch': 2}
{'type': 'loss', 'content': 0.05429983139038086, 'timestamp': '2025-10-02 00:45:14.124631', 'step': 19327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:14.182041', 'step': 19327, 'epoch': 2}
{'type': 'loss', 'content': 0.034955237060785294, 'timestamp': '2025-10-02 00:45:14.191810', 'step': 19328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:14.247595', 'step': 19328, 'epoch': 2}
{'type': 'loss', 'content': 0.07945077866315842, 'timestamp': '2025-10-02 00:45:14.249528', 'step': 19329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:14.305865', 'step': 19329, 'epoch': 2}
{'type': 'loss', 'content': 0.03485054150223732, 'timestamp': '2025-10-02 00:45:14.308044', 'step': 19330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:14.370133', 'step': 19330, 'epoch': 2}
{'type': 'loss', 'content': 0.07702122628688812, 'timestamp': '2025-10-02 00:45:14.380205', 'step': 19331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:14.438512', 'step': 19331, 'epoch': 2}
{'type': 'loss', 'content': 0.05661335587501526, 'timestamp': '2025-10-02 00:45:14.444949', 'step': 19332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:14.500041', 'step': 19332, 'epoch': 2}
{'type': 'loss', 'content': 0.07832729071378708, 'timestamp': '2025-10-02 00:45:14.507286', 'step': 19333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:14.563320', 'step': 19333, 'epoch': 2}
{'type': 'loss', 'content': 0.019362974911928177, 'timestamp': '2025-10-02 00:45:14.565521', 'step': 19334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:14.628283', 'step': 19334, 'epoch': 2}
{'type': 'loss', 'content': 0.02619798667728901, 'timestamp': '2025-10-02 00:45:14.638729', 'step': 19335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:14.694203', 'step': 19335, 'epoch': 2}
{'type': 'loss', 'content': 0.029253516346216202, 'timestamp': '2025-10-02 00:45:14.700088', 'step': 19336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:14.754817', 'step': 19336, 'epoch': 2}
{'type': 'loss', 'content': 0.04986678063869476, 'timestamp': '2025-10-02 00:45:14.757438', 'step': 19337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:14.813356', 'step': 19337, 'epoch': 2}
{'type': 'loss', 'content': 0.03776291757822037, 'timestamp': '2025-10-02 00:45:14.820675', 'step': 19338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:14.876763', 'step': 19338, 'epoch': 2}
{'type': 'loss', 'content': 0.050746627151966095, 'timestamp': '2025-10-02 00:45:14.879588', 'step': 19339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:14.935830', 'step': 19339, 'epoch': 2}
{'type': 'loss', 'content': 0.08897414058446884, 'timestamp': '2025-10-02 00:45:14.942218', 'step': 19340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:14.997218', 'step': 19340, 'epoch': 2}
{'type': 'loss', 'content': 0.035046130418777466, 'timestamp': '2025-10-02 00:45:15.000211', 'step': 19341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:15.057917', 'step': 19341, 'epoch': 2}
{'type': 'loss', 'content': 0.003474463941529393, 'timestamp': '2025-10-02 00:45:15.060125', 'step': 19342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:15.118303', 'step': 19342, 'epoch': 2}
{'type': 'loss', 'content': 0.07599348574876785, 'timestamp': '2025-10-02 00:45:15.120626', 'step': 19343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:15.177041', 'step': 19343, 'epoch': 2}
{'type': 'loss', 'content': 0.010969829745590687, 'timestamp': '2025-10-02 00:45:15.184756', 'step': 19344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:15.240030', 'step': 19344, 'epoch': 2}
{'type': 'loss', 'content': 0.033739782869815826, 'timestamp': '2025-10-02 00:45:15.242459', 'step': 19345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:15.298236', 'step': 19345, 'epoch': 2}
{'type': 'loss', 'content': 0.1234203651547432, 'timestamp': '2025-10-02 00:45:15.300471', 'step': 19346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:45:15.376794', 'step': 19346, 'epoch': 2}
{'type': 'loss', 'content': 0.01523655466735363, 'timestamp': '2025-10-02 00:45:15.389997', 'step': 19347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:15.449032', 'step': 19347, 'epoch': 2}
{'type': 'loss', 'content': 0.11850138008594513, 'timestamp': '2025-10-02 00:45:15.454607', 'step': 19348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:15.508809', 'step': 19348, 'epoch': 2}
{'type': 'loss', 'content': 0.08435176312923431, 'timestamp': '2025-10-02 00:45:15.511808', 'step': 19349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:15.566639', 'step': 19349, 'epoch': 2}
{'type': 'loss', 'content': 0.036479707807302475, 'timestamp': '2025-10-02 00:45:15.574018', 'step': 19350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:15.630438', 'step': 19350, 'epoch': 2}
{'type': 'loss', 'content': 0.06796116381883621, 'timestamp': '2025-10-02 00:45:15.632500', 'step': 19351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:15.687681', 'step': 19351, 'epoch': 2}
{'type': 'loss', 'content': 0.11899600178003311, 'timestamp': '2025-10-02 00:45:15.698000', 'step': 19352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:45:15.765344', 'step': 19352, 'epoch': 2}
{'type': 'loss', 'content': 0.06582189351320267, 'timestamp': '2025-10-02 00:45:15.778670', 'step': 19353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:15.833713', 'step': 19353, 'epoch': 2}
{'type': 'loss', 'content': 0.04488522931933403, 'timestamp': '2025-10-02 00:45:15.835553', 'step': 19354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:15.890453', 'step': 19354, 'epoch': 2}
{'type': 'loss', 'content': 0.059451181441545486, 'timestamp': '2025-10-02 00:45:15.892230', 'step': 19355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:45:15.961284', 'step': 19355, 'epoch': 2}
{'type': 'loss', 'content': 0.027380798012018204, 'timestamp': '2025-10-02 00:45:15.974517', 'step': 19356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:16.028723', 'step': 19356, 'epoch': 2}
{'type': 'loss', 'content': 0.06620513647794724, 'timestamp': '2025-10-02 00:45:16.031256', 'step': 19357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:16.091677', 'step': 19357, 'epoch': 2}
{'type': 'loss', 'content': 0.00040500363684259355, 'timestamp': '2025-10-02 00:45:16.101833', 'step': 19358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:16.156948', 'step': 19358, 'epoch': 2}
{'type': 'loss', 'content': 0.022304730489850044, 'timestamp': '2025-10-02 00:45:16.160071', 'step': 19359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:16.214993', 'step': 19359, 'epoch': 2}
{'type': 'loss', 'content': 0.09094221144914627, 'timestamp': '2025-10-02 00:45:16.220819', 'step': 19360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:45:16.276164', 'step': 19360, 'epoch': 2}
{'type': 'loss', 'content': 0.1602209210395813, 'timestamp': '2025-10-02 00:45:16.278647', 'step': 19361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:16.333407', 'step': 19361, 'epoch': 2}
{'type': 'loss', 'content': 0.08060197532176971, 'timestamp': '2025-10-02 00:45:16.335495', 'step': 19362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:16.390134', 'step': 19362, 'epoch': 2}
{'type': 'loss', 'content': 0.10444653034210205, 'timestamp': '2025-10-02 00:45:16.392230', 'step': 19363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:16.446616', 'step': 19363, 'epoch': 2}
{'type': 'loss', 'content': 0.05911517143249512, 'timestamp': '2025-10-02 00:45:16.454551', 'step': 19364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:16.508966', 'step': 19364, 'epoch': 2}
{'type': 'loss', 'content': 0.028429511934518814, 'timestamp': '2025-10-02 00:45:16.511333', 'step': 19365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:16.567025', 'step': 19365, 'epoch': 2}
{'type': 'loss', 'content': 0.0702381357550621, 'timestamp': '2025-10-02 00:45:16.569671', 'step': 19366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:16.625423', 'step': 19366, 'epoch': 2}
{'type': 'loss', 'content': 0.06620747596025467, 'timestamp': '2025-10-02 00:45:16.627844', 'step': 19367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:16.690910', 'step': 19367, 'epoch': 2}
{'type': 'loss', 'content': 0.008717929013073444, 'timestamp': '2025-10-02 00:45:16.702172', 'step': 19368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:16.756387', 'step': 19368, 'epoch': 2}
{'type': 'loss', 'content': 0.006496161222457886, 'timestamp': '2025-10-02 00:45:16.766064', 'step': 19369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:16.821240', 'step': 19369, 'epoch': 2}
{'type': 'loss', 'content': 0.06577989459037781, 'timestamp': '2025-10-02 00:45:16.823992', 'step': 19370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:16.878618', 'step': 19370, 'epoch': 2}
{'type': 'loss', 'content': 0.06698240339756012, 'timestamp': '2025-10-02 00:45:16.881062', 'step': 19371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:16.935650', 'step': 19371, 'epoch': 2}
{'type': 'loss', 'content': 0.05295833945274353, 'timestamp': '2025-10-02 00:45:16.944280', 'step': 19372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:17.005739', 'step': 19372, 'epoch': 2}
{'type': 'loss', 'content': 0.049435585737228394, 'timestamp': '2025-10-02 00:45:17.028896', 'step': 19373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:17.119126', 'step': 19373, 'epoch': 2}
{'type': 'loss', 'content': 0.07575798034667969, 'timestamp': '2025-10-02 00:45:17.137021', 'step': 19374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:17.228710', 'step': 19374, 'epoch': 2}
{'type': 'loss', 'content': 0.13073749840259552, 'timestamp': '2025-10-02 00:45:17.245657', 'step': 19375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:17.312492', 'step': 19375, 'epoch': 2}
{'type': 'loss', 'content': 0.029466811567544937, 'timestamp': '2025-10-02 00:45:17.334293', 'step': 19376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:17.395060', 'step': 19376, 'epoch': 2}
{'type': 'loss', 'content': 0.0981004387140274, 'timestamp': '2025-10-02 00:45:17.400570', 'step': 19377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:17.491214', 'step': 19377, 'epoch': 2}
{'type': 'loss', 'content': 0.15778490900993347, 'timestamp': '2025-10-02 00:45:17.495387', 'step': 19378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:17.556638', 'step': 19378, 'epoch': 2}
{'type': 'loss', 'content': 0.03009866736829281, 'timestamp': '2025-10-02 00:45:17.566295', 'step': 19379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:17.627410', 'step': 19379, 'epoch': 2}
{'type': 'loss', 'content': 0.11490510404109955, 'timestamp': '2025-10-02 00:45:17.635485', 'step': 19380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:17.719839', 'step': 19380, 'epoch': 2}
{'type': 'loss', 'content': 0.0402066595852375, 'timestamp': '2025-10-02 00:45:17.730922', 'step': 19381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:17.801464', 'step': 19381, 'epoch': 2}
{'type': 'loss', 'content': 0.048311732709407806, 'timestamp': '2025-10-02 00:45:17.811510', 'step': 19382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:17.885226', 'step': 19382, 'epoch': 2}
{'type': 'loss', 'content': 0.019969161599874496, 'timestamp': '2025-10-02 00:45:17.896623', 'step': 19383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:17.972717', 'step': 19383, 'epoch': 2}
{'type': 'loss', 'content': 0.060557227581739426, 'timestamp': '2025-10-02 00:45:17.989553', 'step': 19384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:45:18.074965', 'step': 19384, 'epoch': 2}
{'type': 'loss', 'content': 0.01793743669986725, 'timestamp': '2025-10-02 00:45:18.086731', 'step': 19385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:18.155380', 'step': 19385, 'epoch': 2}
{'type': 'loss', 'content': 0.07804261893033981, 'timestamp': '2025-10-02 00:45:18.162462', 'step': 19386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:18.227711', 'step': 19386, 'epoch': 2}
{'type': 'loss', 'content': 0.041773609817028046, 'timestamp': '2025-10-02 00:45:18.238009', 'step': 19387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:18.303901', 'step': 19387, 'epoch': 2}
{'type': 'loss', 'content': 0.013111112639307976, 'timestamp': '2025-10-02 00:45:18.315148', 'step': 19388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:18.373335', 'step': 19388, 'epoch': 2}
{'type': 'loss', 'content': 0.08259250223636627, 'timestamp': '2025-10-02 00:45:18.383865', 'step': 19389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:18.470053', 'step': 19389, 'epoch': 2}
{'type': 'loss', 'content': 0.09240707755088806, 'timestamp': '2025-10-02 00:45:18.473122', 'step': 19390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:18.541685', 'step': 19390, 'epoch': 2}
{'type': 'loss', 'content': 0.09403551369905472, 'timestamp': '2025-10-02 00:45:18.544547', 'step': 19391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:18.604380', 'step': 19391, 'epoch': 2}
{'type': 'loss', 'content': 0.04026060551404953, 'timestamp': '2025-10-02 00:45:18.612879', 'step': 19392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:18.688108', 'step': 19392, 'epoch': 2}
{'type': 'loss', 'content': 0.10093815624713898, 'timestamp': '2025-10-02 00:45:18.692398', 'step': 19393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:18.758793', 'step': 19393, 'epoch': 2}
{'type': 'loss', 'content': 0.047506824135780334, 'timestamp': '2025-10-02 00:45:18.761709', 'step': 19394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:18.826545', 'step': 19394, 'epoch': 2}
{'type': 'loss', 'content': 0.010036454536020756, 'timestamp': '2025-10-02 00:45:18.836454', 'step': 19395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:18.896409', 'step': 19395, 'epoch': 2}
{'type': 'loss', 'content': 0.11511765420436859, 'timestamp': '2025-10-02 00:45:18.904059', 'step': 19396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:18.970525', 'step': 19396, 'epoch': 2}
{'type': 'loss', 'content': 0.009313728660345078, 'timestamp': '2025-10-02 00:45:18.981601', 'step': 19397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:19.062864', 'step': 19397, 'epoch': 2}
{'type': 'loss', 'content': 0.12241561710834503, 'timestamp': '2025-10-02 00:45:19.072994', 'step': 19398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:19.137726', 'step': 19398, 'epoch': 2}
{'type': 'loss', 'content': 0.05358060821890831, 'timestamp': '2025-10-02 00:45:19.147917', 'step': 19399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:19.222657', 'step': 19399, 'epoch': 2}
{'type': 'loss', 'content': 0.07382836192846298, 'timestamp': '2025-10-02 00:45:19.231323', 'step': 19400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:19.307145', 'step': 19400, 'epoch': 2}
{'type': 'loss', 'content': 0.0365232490003109, 'timestamp': '2025-10-02 00:45:19.316333', 'step': 19401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:19.375660', 'step': 19401, 'epoch': 2}
{'type': 'loss', 'content': 0.12122692912817001, 'timestamp': '2025-10-02 00:45:19.386044', 'step': 19402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:19.477320', 'step': 19402, 'epoch': 2}
{'type': 'loss', 'content': 0.04665350914001465, 'timestamp': '2025-10-02 00:45:19.484410', 'step': 19403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:19.560504', 'step': 19403, 'epoch': 2}
{'type': 'loss', 'content': 0.05669057369232178, 'timestamp': '2025-10-02 00:45:19.576077', 'step': 19404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:19.664909', 'step': 19404, 'epoch': 2}
{'type': 'loss', 'content': 0.03786914423108101, 'timestamp': '2025-10-02 00:45:19.675417', 'step': 19405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:19.735814', 'step': 19405, 'epoch': 2}
{'type': 'loss', 'content': 0.1558382362127304, 'timestamp': '2025-10-02 00:45:19.747037', 'step': 19406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:19.832901', 'step': 19406, 'epoch': 2}
{'type': 'loss', 'content': 0.15635038912296295, 'timestamp': '2025-10-02 00:45:19.836053', 'step': 19407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:19.926199', 'step': 19407, 'epoch': 2}
{'type': 'loss', 'content': 0.019753091037273407, 'timestamp': '2025-10-02 00:45:19.943696', 'step': 19408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:20.014251', 'step': 19408, 'epoch': 2}
{'type': 'loss', 'content': 0.12688587605953217, 'timestamp': '2025-10-02 00:45:20.026020', 'step': 19409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:20.104937', 'step': 19409, 'epoch': 2}
{'type': 'loss', 'content': 0.09944450855255127, 'timestamp': '2025-10-02 00:45:20.115639', 'step': 19410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:20.187954', 'step': 19410, 'epoch': 2}
{'type': 'loss', 'content': 0.0444842092692852, 'timestamp': '2025-10-02 00:45:20.200035', 'step': 19411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:20.266782', 'step': 19411, 'epoch': 2}
{'type': 'loss', 'content': 0.1370532065629959, 'timestamp': '2025-10-02 00:45:20.274665', 'step': 19412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:20.332410', 'step': 19412, 'epoch': 2}
{'type': 'loss', 'content': 0.06836391985416412, 'timestamp': '2025-10-02 00:45:20.342681', 'step': 19413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:20.435668', 'step': 19413, 'epoch': 2}
{'type': 'loss', 'content': 0.027249470353126526, 'timestamp': '2025-10-02 00:45:20.445224', 'step': 19414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:20.521165', 'step': 19414, 'epoch': 2}
{'type': 'loss', 'content': 0.09230507165193558, 'timestamp': '2025-10-02 00:45:20.524161', 'step': 19415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:20.598573', 'step': 19415, 'epoch': 2}
{'type': 'loss', 'content': 0.15005996823310852, 'timestamp': '2025-10-02 00:45:20.614660', 'step': 19416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:20.690599', 'step': 19416, 'epoch': 2}
{'type': 'loss', 'content': 0.07195702195167542, 'timestamp': '2025-10-02 00:45:20.699336', 'step': 19417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:45:20.790653', 'step': 19417, 'epoch': 2}
{'type': 'loss', 'content': 0.030795827507972717, 'timestamp': '2025-10-02 00:45:20.802664', 'step': 19418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:45:20.878523', 'step': 19418, 'epoch': 2}
{'type': 'loss', 'content': 0.02056516893208027, 'timestamp': '2025-10-02 00:45:20.892390', 'step': 19419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:45:20.966561', 'step': 19419, 'epoch': 2}
{'type': 'loss', 'content': 0.00934970285743475, 'timestamp': '2025-10-02 00:45:20.978190', 'step': 19420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:21.044771', 'step': 19420, 'epoch': 2}
{'type': 'loss', 'content': 0.051935385912656784, 'timestamp': '2025-10-02 00:45:21.056866', 'step': 19421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:21.116554', 'step': 19421, 'epoch': 2}
{'type': 'loss', 'content': 0.05013824626803398, 'timestamp': '2025-10-02 00:45:21.132964', 'step': 19422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:45:21.224706', 'step': 19422, 'epoch': 2}
{'type': 'loss', 'content': 0.03464227914810181, 'timestamp': '2025-10-02 00:45:21.239544', 'step': 19423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:21.313183', 'step': 19423, 'epoch': 2}
{'type': 'loss', 'content': 0.012326776050031185, 'timestamp': '2025-10-02 00:45:21.323448', 'step': 19424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:21.399611', 'step': 19424, 'epoch': 2}
{'type': 'loss', 'content': 0.014849838800728321, 'timestamp': '2025-10-02 00:45:21.411393', 'step': 19425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:21.482373', 'step': 19425, 'epoch': 2}
{'type': 'loss', 'content': 0.08404369652271271, 'timestamp': '2025-10-02 00:45:21.486112', 'step': 19426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:21.544321', 'step': 19426, 'epoch': 2}
{'type': 'loss', 'content': 0.03681041672825813, 'timestamp': '2025-10-02 00:45:21.549597', 'step': 19427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:21.639341', 'step': 19427, 'epoch': 2}
{'type': 'loss', 'content': 0.11330369114875793, 'timestamp': '2025-10-02 00:45:21.648127', 'step': 19428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:21.727340', 'step': 19428, 'epoch': 2}
{'type': 'loss', 'content': 0.05216304585337639, 'timestamp': '2025-10-02 00:45:21.739598', 'step': 19429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:21.804631', 'step': 19429, 'epoch': 2}
{'type': 'loss', 'content': 0.034201912581920624, 'timestamp': '2025-10-02 00:45:21.812389', 'step': 19430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:21.871347', 'step': 19430, 'epoch': 2}
{'type': 'loss', 'content': 0.13641652464866638, 'timestamp': '2025-10-02 00:45:21.874981', 'step': 19431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:21.941285', 'step': 19431, 'epoch': 2}
{'type': 'loss', 'content': 0.07660379260778427, 'timestamp': '2025-10-02 00:45:21.951905', 'step': 19432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:22.037795', 'step': 19432, 'epoch': 2}
{'type': 'loss', 'content': 0.09251552075147629, 'timestamp': '2025-10-02 00:45:22.044763', 'step': 19433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:22.112629', 'step': 19433, 'epoch': 2}
{'type': 'loss', 'content': 0.027485013008117676, 'timestamp': '2025-10-02 00:45:22.121304', 'step': 19434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:22.201939', 'step': 19434, 'epoch': 2}
{'type': 'loss', 'content': 0.0772424042224884, 'timestamp': '2025-10-02 00:45:22.205368', 'step': 19435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:45:22.291312', 'step': 19435, 'epoch': 2}
{'type': 'loss', 'content': 0.026038609445095062, 'timestamp': '2025-10-02 00:45:22.302736', 'step': 19436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:22.371705', 'step': 19436, 'epoch': 2}
{'type': 'loss', 'content': 0.083335280418396, 'timestamp': '2025-10-02 00:45:22.374685', 'step': 19437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:22.451812', 'step': 19437, 'epoch': 2}
{'type': 'loss', 'content': 0.06310112029314041, 'timestamp': '2025-10-02 00:45:22.455698', 'step': 19438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:22.529632', 'step': 19438, 'epoch': 2}
{'type': 'loss', 'content': 0.046383086591959, 'timestamp': '2025-10-02 00:45:22.534839', 'step': 19439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:22.623421', 'step': 19439, 'epoch': 2}
{'type': 'loss', 'content': 0.037218157202005386, 'timestamp': '2025-10-02 00:45:22.636633', 'step': 19440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:22.712746', 'step': 19440, 'epoch': 2}
{'type': 'loss', 'content': 0.15819698572158813, 'timestamp': '2025-10-02 00:45:22.716527', 'step': 19441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:22.775381', 'step': 19441, 'epoch': 2}
{'type': 'loss', 'content': 0.03497502580285072, 'timestamp': '2025-10-02 00:45:22.780736', 'step': 19442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:22.847667', 'step': 19442, 'epoch': 2}
{'type': 'loss', 'content': 0.051540475338697433, 'timestamp': '2025-10-02 00:45:22.861215', 'step': 19443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:22.929906', 'step': 19443, 'epoch': 2}
{'type': 'loss', 'content': 0.1141849234700203, 'timestamp': '2025-10-02 00:45:22.937593', 'step': 19444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:23.003991', 'step': 19444, 'epoch': 2}
{'type': 'loss', 'content': 0.020879721269011497, 'timestamp': '2025-10-02 00:45:23.008058', 'step': 19445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:23.066289', 'step': 19445, 'epoch': 2}
{'type': 'loss', 'content': 0.09483101218938828, 'timestamp': '2025-10-02 00:45:23.068954', 'step': 19446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:23.146691', 'step': 19446, 'epoch': 2}
{'type': 'loss', 'content': 0.18052040040493011, 'timestamp': '2025-10-02 00:45:23.157538', 'step': 19447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:23.234023', 'step': 19447, 'epoch': 2}
{'type': 'loss', 'content': 0.08537627011537552, 'timestamp': '2025-10-02 00:45:23.241978', 'step': 19448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:23.300839', 'step': 19448, 'epoch': 2}
{'type': 'loss', 'content': 0.048415470868349075, 'timestamp': '2025-10-02 00:45:23.308393', 'step': 19449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:23.370109', 'step': 19449, 'epoch': 2}
{'type': 'loss', 'content': 0.04584608972072601, 'timestamp': '2025-10-02 00:45:23.383597', 'step': 19450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:23.459275', 'step': 19450, 'epoch': 2}
{'type': 'loss', 'content': 0.08366121351718903, 'timestamp': '2025-10-02 00:45:23.462673', 'step': 19451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:23.525204', 'step': 19451, 'epoch': 2}
{'type': 'loss', 'content': 0.04405062645673752, 'timestamp': '2025-10-02 00:45:23.535323', 'step': 19452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:23.609268', 'step': 19452, 'epoch': 2}
{'type': 'loss', 'content': 0.0363677553832531, 'timestamp': '2025-10-02 00:45:23.620355', 'step': 19453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:23.697447', 'step': 19453, 'epoch': 2}
{'type': 'loss', 'content': 0.01581725664436817, 'timestamp': '2025-10-02 00:45:23.704950', 'step': 19454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:23.783374', 'step': 19454, 'epoch': 2}
{'type': 'loss', 'content': 0.11080079525709152, 'timestamp': '2025-10-02 00:45:23.787677', 'step': 19455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:23.857021', 'step': 19455, 'epoch': 2}
{'type': 'loss', 'content': 0.024112269282341003, 'timestamp': '2025-10-02 00:45:23.867164', 'step': 19456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:23.952215', 'step': 19456, 'epoch': 2}
{'type': 'loss', 'content': 0.019828081130981445, 'timestamp': '2025-10-02 00:45:23.956611', 'step': 19457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:24.024298', 'step': 19457, 'epoch': 2}
{'type': 'loss', 'content': 0.01733877882361412, 'timestamp': '2025-10-02 00:45:24.028768', 'step': 19458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:24.095946', 'step': 19458, 'epoch': 2}
{'type': 'loss', 'content': 0.11552495509386063, 'timestamp': '2025-10-02 00:45:24.099363', 'step': 19459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:24.190291', 'step': 19459, 'epoch': 2}
{'type': 'loss', 'content': 0.07546906173229218, 'timestamp': '2025-10-02 00:45:24.197985', 'step': 19460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:24.277592', 'step': 19460, 'epoch': 2}
{'type': 'loss', 'content': 0.007997252978384495, 'timestamp': '2025-10-02 00:45:24.287847', 'step': 19461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:24.355133', 'step': 19461, 'epoch': 2}
{'type': 'loss', 'content': 0.15938319265842438, 'timestamp': '2025-10-02 00:45:24.368997', 'step': 19462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:45:24.454747', 'step': 19462, 'epoch': 2}
{'type': 'loss', 'content': 0.009761855937540531, 'timestamp': '2025-10-02 00:45:24.465336', 'step': 19463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:24.528243', 'step': 19463, 'epoch': 2}
{'type': 'loss', 'content': 0.10262417793273926, 'timestamp': '2025-10-02 00:45:24.545053', 'step': 19464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:24.627549', 'step': 19464, 'epoch': 2}
{'type': 'loss', 'content': 0.08147810399532318, 'timestamp': '2025-10-02 00:45:24.633256', 'step': 19465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:24.705308', 'step': 19465, 'epoch': 2}
{'type': 'loss', 'content': 0.021095028147101402, 'timestamp': '2025-10-02 00:45:24.714818', 'step': 19466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:24.785527', 'step': 19466, 'epoch': 2}
{'type': 'loss', 'content': 0.09500288218259811, 'timestamp': '2025-10-02 00:45:24.789286', 'step': 19467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:45:24.877397', 'step': 19467, 'epoch': 2}
{'type': 'loss', 'content': 0.029979297891259193, 'timestamp': '2025-10-02 00:45:24.888653', 'step': 19468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:24.960205', 'step': 19468, 'epoch': 2}
{'type': 'loss', 'content': 0.009484772570431232, 'timestamp': '2025-10-02 00:45:24.964258', 'step': 19469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:25.056877', 'step': 19469, 'epoch': 2}
{'type': 'loss', 'content': 0.02136617712676525, 'timestamp': '2025-10-02 00:45:25.061569', 'step': 19470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:45:25.137420', 'step': 19470, 'epoch': 2}
{'type': 'loss', 'content': 0.023307522758841515, 'timestamp': '2025-10-02 00:45:25.148098', 'step': 19471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:25.224400', 'step': 19471, 'epoch': 2}
{'type': 'loss', 'content': 0.012821630574762821, 'timestamp': '2025-10-02 00:45:25.231935', 'step': 19472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:25.299348', 'step': 19472, 'epoch': 2}
{'type': 'loss', 'content': 0.08859182894229889, 'timestamp': '2025-10-02 00:45:25.310997', 'step': 19473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:25.387522', 'step': 19473, 'epoch': 2}
{'type': 'loss', 'content': 0.05700761079788208, 'timestamp': '2025-10-02 00:45:25.391889', 'step': 19474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:25.465665', 'step': 19474, 'epoch': 2}
{'type': 'loss', 'content': 0.07043673098087311, 'timestamp': '2025-10-02 00:45:25.469842', 'step': 19475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:25.549188', 'step': 19475, 'epoch': 2}
{'type': 'loss', 'content': 0.07355038821697235, 'timestamp': '2025-10-02 00:45:25.565064', 'step': 19476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:25.647392', 'step': 19476, 'epoch': 2}
{'type': 'loss', 'content': 0.1329820454120636, 'timestamp': '2025-10-02 00:45:25.651908', 'step': 19477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:25.717066', 'step': 19477, 'epoch': 2}
{'type': 'loss', 'content': 0.007601771503686905, 'timestamp': '2025-10-02 00:45:25.727191', 'step': 19478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:25.790378', 'step': 19478, 'epoch': 2}
{'type': 'loss', 'content': 0.08065404742956161, 'timestamp': '2025-10-02 00:45:25.809484', 'step': 19479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:25.877308', 'step': 19479, 'epoch': 2}
{'type': 'loss', 'content': 0.09078483283519745, 'timestamp': '2025-10-02 00:45:25.886665', 'step': 19480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:25.982991', 'step': 19480, 'epoch': 2}
{'type': 'loss', 'content': 0.028585461899638176, 'timestamp': '2025-10-02 00:45:25.988060', 'step': 19481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:26.091380', 'step': 19481, 'epoch': 2}
{'type': 'loss', 'content': 0.03376545011997223, 'timestamp': '2025-10-02 00:45:26.110116', 'step': 19482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:26.191259', 'step': 19482, 'epoch': 2}
{'type': 'loss', 'content': 0.019889237359166145, 'timestamp': '2025-10-02 00:45:26.196116', 'step': 19483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:26.271846', 'step': 19483, 'epoch': 2}
{'type': 'loss', 'content': 0.054201338440179825, 'timestamp': '2025-10-02 00:45:26.281062', 'step': 19484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:26.343521', 'step': 19484, 'epoch': 2}
{'type': 'loss', 'content': 0.03902747109532356, 'timestamp': '2025-10-02 00:45:26.347831', 'step': 19485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:26.406863', 'step': 19485, 'epoch': 2}
{'type': 'loss', 'content': 0.026543540880084038, 'timestamp': '2025-10-02 00:45:26.416251', 'step': 19486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:26.486426', 'step': 19486, 'epoch': 2}
{'type': 'loss', 'content': 0.14144480228424072, 'timestamp': '2025-10-02 00:45:26.494981', 'step': 19487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:26.565651', 'step': 19487, 'epoch': 2}
{'type': 'loss', 'content': 0.19705642759799957, 'timestamp': '2025-10-02 00:45:26.573381', 'step': 19488, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:45:54.347010', 'step': 19488, 'epoch': 2}
{'type': 'pplx', 'content': 99.99627491977633, 'timestamp': '2025-10-02 00:45:54.351186', 'step': 19488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:54.406638', 'step': 19488, 'epoch': 2}
{'type': 'loss', 'content': 0.043854087591171265, 'timestamp': '2025-10-02 00:45:54.411693', 'step': 19489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:45:54.474476', 'step': 19489, 'epoch': 2}
{'type': 'loss', 'content': 0.03274540603160858, 'timestamp': '2025-10-02 00:45:54.485310', 'step': 19490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:54.544768', 'step': 19490, 'epoch': 2}
{'type': 'loss', 'content': 0.06678733229637146, 'timestamp': '2025-10-02 00:45:54.550207', 'step': 19491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:45:54.619913', 'step': 19491, 'epoch': 2}
{'type': 'loss', 'content': 0.027867227792739868, 'timestamp': '2025-10-02 00:45:54.633009', 'step': 19492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:54.687816', 'step': 19492, 'epoch': 2}
{'type': 'loss', 'content': 0.028491441160440445, 'timestamp': '2025-10-02 00:45:54.695166', 'step': 19493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:54.749803', 'step': 19493, 'epoch': 2}
{'type': 'loss', 'content': 0.05654530972242355, 'timestamp': '2025-10-02 00:45:54.752628', 'step': 19494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:54.807265', 'step': 19494, 'epoch': 2}
{'type': 'loss', 'content': 0.03361786901950836, 'timestamp': '2025-10-02 00:45:54.809856', 'step': 19495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:54.867060', 'step': 19495, 'epoch': 2}
{'type': 'loss', 'content': 0.01804402284324169, 'timestamp': '2025-10-02 00:45:54.877377', 'step': 19496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:54.931790', 'step': 19496, 'epoch': 2}
{'type': 'loss', 'content': 0.04995793104171753, 'timestamp': '2025-10-02 00:45:54.934667', 'step': 19497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:55.003038', 'step': 19497, 'epoch': 2}
{'type': 'loss', 'content': 0.044181764125823975, 'timestamp': '2025-10-02 00:45:55.006027', 'step': 19498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:55.062620', 'step': 19498, 'epoch': 2}
{'type': 'loss', 'content': 0.05655018612742424, 'timestamp': '2025-10-02 00:45:55.071790', 'step': 19499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:55.126368', 'step': 19499, 'epoch': 2}
{'type': 'loss', 'content': 0.15790873765945435, 'timestamp': '2025-10-02 00:45:55.132242', 'step': 19500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 19500', 'timestamp': '2025-10-02 00:45:55.878033', 'step': 19500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:55.935748', 'step': 19500, 'epoch': 2}
{'type': 'loss', 'content': 0.0582384429872036, 'timestamp': '2025-10-02 00:45:55.938381', 'step': 19501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:55.993634', 'step': 19501, 'epoch': 2}
{'type': 'loss', 'content': 0.03867063671350479, 'timestamp': '2025-10-02 00:45:55.996211', 'step': 19502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:56.060093', 'step': 19502, 'epoch': 2}
{'type': 'loss', 'content': 0.011597706936299801, 'timestamp': '2025-10-02 00:45:56.063329', 'step': 19503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:56.125885', 'step': 19503, 'epoch': 2}
{'type': 'loss', 'content': 0.06458140909671783, 'timestamp': '2025-10-02 00:45:56.133905', 'step': 19504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:56.190056', 'step': 19504, 'epoch': 2}
{'type': 'loss', 'content': 0.04576658830046654, 'timestamp': '2025-10-02 00:45:56.197400', 'step': 19505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:56.258241', 'step': 19505, 'epoch': 2}
{'type': 'loss', 'content': 0.0739109143614769, 'timestamp': '2025-10-02 00:45:56.263978', 'step': 19506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:45:56.322281', 'step': 19506, 'epoch': 2}
{'type': 'loss', 'content': 0.14194361865520477, 'timestamp': '2025-10-02 00:45:56.325902', 'step': 19507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:56.382549', 'step': 19507, 'epoch': 2}
{'type': 'loss', 'content': 0.09183576703071594, 'timestamp': '2025-10-02 00:45:56.389094', 'step': 19508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:56.445887', 'step': 19508, 'epoch': 2}
{'type': 'loss', 'content': 0.048882294446229935, 'timestamp': '2025-10-02 00:45:56.449707', 'step': 19509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:56.505416', 'step': 19509, 'epoch': 2}
{'type': 'loss', 'content': 0.06564487516880035, 'timestamp': '2025-10-02 00:45:56.518357', 'step': 19510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:56.577156', 'step': 19510, 'epoch': 2}
{'type': 'loss', 'content': 0.04618415609002113, 'timestamp': '2025-10-02 00:45:56.580232', 'step': 19511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:56.645822', 'step': 19511, 'epoch': 2}
{'type': 'loss', 'content': 0.018046075478196144, 'timestamp': '2025-10-02 00:45:56.656133', 'step': 19512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:56.714906', 'step': 19512, 'epoch': 2}
{'type': 'loss', 'content': 0.024098731577396393, 'timestamp': '2025-10-02 00:45:56.717413', 'step': 19513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:56.773848', 'step': 19513, 'epoch': 2}
{'type': 'loss', 'content': 0.04653984680771828, 'timestamp': '2025-10-02 00:45:56.781123', 'step': 19514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:56.836186', 'step': 19514, 'epoch': 2}
{'type': 'loss', 'content': 0.1430623084306717, 'timestamp': '2025-10-02 00:45:56.839960', 'step': 19515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:56.899638', 'step': 19515, 'epoch': 2}
{'type': 'loss', 'content': 0.026287440210580826, 'timestamp': '2025-10-02 00:45:56.906458', 'step': 19516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:56.965245', 'step': 19516, 'epoch': 2}
{'type': 'loss', 'content': 0.08112365752458572, 'timestamp': '2025-10-02 00:45:56.967897', 'step': 19517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:57.023045', 'step': 19517, 'epoch': 2}
{'type': 'loss', 'content': 0.07726141810417175, 'timestamp': '2025-10-02 00:45:57.032556', 'step': 19518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:57.099641', 'step': 19518, 'epoch': 2}
{'type': 'loss', 'content': 0.045505642890930176, 'timestamp': '2025-10-02 00:45:57.106706', 'step': 19519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:57.165060', 'step': 19519, 'epoch': 2}
{'type': 'loss', 'content': 0.0286727137863636, 'timestamp': '2025-10-02 00:45:57.172887', 'step': 19520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:45:57.243807', 'step': 19520, 'epoch': 2}
{'type': 'loss', 'content': 0.0529092513024807, 'timestamp': '2025-10-02 00:45:57.257618', 'step': 19521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:45:57.326427', 'step': 19521, 'epoch': 2}
{'type': 'loss', 'content': 0.02153950370848179, 'timestamp': '2025-10-02 00:45:57.337029', 'step': 19522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:57.393630', 'step': 19522, 'epoch': 2}
{'type': 'loss', 'content': 0.13819676637649536, 'timestamp': '2025-10-02 00:45:57.402928', 'step': 19523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:57.459019', 'step': 19523, 'epoch': 2}
{'type': 'loss', 'content': 0.14128011465072632, 'timestamp': '2025-10-02 00:45:57.465268', 'step': 19524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:57.521175', 'step': 19524, 'epoch': 2}
{'type': 'loss', 'content': 0.014933528378605843, 'timestamp': '2025-10-02 00:45:57.531415', 'step': 19525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:45:57.590853', 'step': 19525, 'epoch': 2}
{'type': 'loss', 'content': 0.08009573072195053, 'timestamp': '2025-10-02 00:45:57.601030', 'step': 19526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:57.655830', 'step': 19526, 'epoch': 2}
{'type': 'loss', 'content': 0.11288634687662125, 'timestamp': '2025-10-02 00:45:57.658904', 'step': 19527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:57.714082', 'step': 19527, 'epoch': 2}
{'type': 'loss', 'content': 0.10490337759256363, 'timestamp': '2025-10-02 00:45:57.720561', 'step': 19528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:45:57.775357', 'step': 19528, 'epoch': 2}
{'type': 'loss', 'content': 0.05466991290450096, 'timestamp': '2025-10-02 00:45:57.778020', 'step': 19529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:57.832657', 'step': 19529, 'epoch': 2}
{'type': 'loss', 'content': 0.05056474730372429, 'timestamp': '2025-10-02 00:45:57.839889', 'step': 19530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:57.897001', 'step': 19530, 'epoch': 2}
{'type': 'loss', 'content': 0.09007684141397476, 'timestamp': '2025-10-02 00:45:57.899604', 'step': 19531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:57.954329', 'step': 19531, 'epoch': 2}
{'type': 'loss', 'content': 0.03655559942126274, 'timestamp': '2025-10-02 00:45:57.960367', 'step': 19532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:58.014893', 'step': 19532, 'epoch': 2}
{'type': 'loss', 'content': 0.03821393474936485, 'timestamp': '2025-10-02 00:45:58.017849', 'step': 19533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:45:58.090639', 'step': 19533, 'epoch': 2}
{'type': 'loss', 'content': 0.01571119949221611, 'timestamp': '2025-10-02 00:45:58.103055', 'step': 19534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:58.158234', 'step': 19534, 'epoch': 2}
{'type': 'loss', 'content': 0.03599834814667702, 'timestamp': '2025-10-02 00:45:58.165393', 'step': 19535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:58.222241', 'step': 19535, 'epoch': 2}
{'type': 'loss', 'content': 0.0329749695956707, 'timestamp': '2025-10-02 00:45:58.228891', 'step': 19536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:58.283111', 'step': 19536, 'epoch': 2}
{'type': 'loss', 'content': 0.054742150008678436, 'timestamp': '2025-10-02 00:45:58.285911', 'step': 19537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:45:58.341243', 'step': 19537, 'epoch': 2}
{'type': 'loss', 'content': 0.03378146141767502, 'timestamp': '2025-10-02 00:45:58.350535', 'step': 19538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:58.406530', 'step': 19538, 'epoch': 2}
{'type': 'loss', 'content': 0.052730266004800797, 'timestamp': '2025-10-02 00:45:58.409120', 'step': 19539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:58.464030', 'step': 19539, 'epoch': 2}
{'type': 'loss', 'content': 0.11995649337768555, 'timestamp': '2025-10-02 00:45:58.470431', 'step': 19540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:58.526619', 'step': 19540, 'epoch': 2}
{'type': 'loss', 'content': 0.07142134755849838, 'timestamp': '2025-10-02 00:45:58.529152', 'step': 19541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:45:58.583750', 'step': 19541, 'epoch': 2}
{'type': 'loss', 'content': 0.12079451233148575, 'timestamp': '2025-10-02 00:45:58.586113', 'step': 19542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:58.641518', 'step': 19542, 'epoch': 2}
{'type': 'loss', 'content': 0.040961526334285736, 'timestamp': '2025-10-02 00:45:58.644100', 'step': 19543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:58.699727', 'step': 19543, 'epoch': 2}
{'type': 'loss', 'content': 0.05911657586693764, 'timestamp': '2025-10-02 00:45:58.706296', 'step': 19544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:58.761266', 'step': 19544, 'epoch': 2}
{'type': 'loss', 'content': 0.08355806022882462, 'timestamp': '2025-10-02 00:45:58.764384', 'step': 19545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:45:58.819820', 'step': 19545, 'epoch': 2}
{'type': 'loss', 'content': 0.02494599111378193, 'timestamp': '2025-10-02 00:45:58.822545', 'step': 19546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:58.878209', 'step': 19546, 'epoch': 2}
{'type': 'loss', 'content': 0.05872809886932373, 'timestamp': '2025-10-02 00:45:58.881004', 'step': 19547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:58.938211', 'step': 19547, 'epoch': 2}
{'type': 'loss', 'content': 0.031094254925847054, 'timestamp': '2025-10-02 00:45:58.944483', 'step': 19548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:58.998614', 'step': 19548, 'epoch': 2}
{'type': 'loss', 'content': 0.05556928738951683, 'timestamp': '2025-10-02 00:45:59.001496', 'step': 19549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:59.055696', 'step': 19549, 'epoch': 2}
{'type': 'loss', 'content': 0.09709233790636063, 'timestamp': '2025-10-02 00:45:59.058323', 'step': 19550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:45:59.115045', 'step': 19550, 'epoch': 2}
{'type': 'loss', 'content': 0.07357840985059738, 'timestamp': '2025-10-02 00:45:59.117988', 'step': 19551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:45:59.186008', 'step': 19551, 'epoch': 2}
{'type': 'loss', 'content': 0.01904263161122799, 'timestamp': '2025-10-02 00:45:59.198707', 'step': 19552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:45:59.254062', 'step': 19552, 'epoch': 2}
{'type': 'loss', 'content': 0.11115115880966187, 'timestamp': '2025-10-02 00:45:59.256990', 'step': 19553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:45:59.327102', 'step': 19553, 'epoch': 2}
{'type': 'loss', 'content': 0.005756870377808809, 'timestamp': '2025-10-02 00:45:59.339406', 'step': 19554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:45:59.404180', 'step': 19554, 'epoch': 2}
{'type': 'loss', 'content': 0.03247058764100075, 'timestamp': '2025-10-02 00:45:59.415008', 'step': 19555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:45:59.471358', 'step': 19555, 'epoch': 2}
{'type': 'loss', 'content': 0.0990663692355156, 'timestamp': '2025-10-02 00:45:59.481677', 'step': 19556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:45:59.539020', 'step': 19556, 'epoch': 2}
{'type': 'loss', 'content': 0.08973861485719681, 'timestamp': '2025-10-02 00:45:59.546279', 'step': 19557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:45:59.604681', 'step': 19557, 'epoch': 2}
{'type': 'loss', 'content': 0.08560203015804291, 'timestamp': '2025-10-02 00:45:59.609585', 'step': 19558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:45:59.667375', 'step': 19558, 'epoch': 2}
{'type': 'loss', 'content': 0.03167767822742462, 'timestamp': '2025-10-02 00:45:59.671606', 'step': 19559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:45:59.730425', 'step': 19559, 'epoch': 2}
{'type': 'loss', 'content': 0.044630762189626694, 'timestamp': '2025-10-02 00:45:59.738024', 'step': 19560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:45:59.795435', 'step': 19560, 'epoch': 2}
{'type': 'loss', 'content': 0.04428548365831375, 'timestamp': '2025-10-02 00:45:59.797794', 'step': 19561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:59.854934', 'step': 19561, 'epoch': 2}
{'type': 'loss', 'content': 0.14818640053272247, 'timestamp': '2025-10-02 00:45:59.857465', 'step': 19562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:45:59.914771', 'step': 19562, 'epoch': 2}
{'type': 'loss', 'content': 0.09138883650302887, 'timestamp': '2025-10-02 00:45:59.917795', 'step': 19563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:45:59.975692', 'step': 19563, 'epoch': 2}
{'type': 'loss', 'content': 0.1282556802034378, 'timestamp': '2025-10-02 00:45:59.982733', 'step': 19564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:00.040598', 'step': 19564, 'epoch': 2}
{'type': 'loss', 'content': 0.09560812264680862, 'timestamp': '2025-10-02 00:46:00.043006', 'step': 19565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:00.098844', 'step': 19565, 'epoch': 2}
{'type': 'loss', 'content': 0.03869328275322914, 'timestamp': '2025-10-02 00:46:00.102348', 'step': 19566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:00.160700', 'step': 19566, 'epoch': 2}
{'type': 'loss', 'content': 0.02812911756336689, 'timestamp': '2025-10-02 00:46:00.163250', 'step': 19567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:00.220229', 'step': 19567, 'epoch': 2}
{'type': 'loss', 'content': 0.04659217968583107, 'timestamp': '2025-10-02 00:46:00.227907', 'step': 19568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:00.287947', 'step': 19568, 'epoch': 2}
{'type': 'loss', 'content': 0.04661855846643448, 'timestamp': '2025-10-02 00:46:00.293494', 'step': 19569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:00.353893', 'step': 19569, 'epoch': 2}
{'type': 'loss', 'content': 0.015240253880620003, 'timestamp': '2025-10-02 00:46:00.364038', 'step': 19570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:00.422520', 'step': 19570, 'epoch': 2}
{'type': 'loss', 'content': 0.021511396393179893, 'timestamp': '2025-10-02 00:46:00.426023', 'step': 19571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 640], 'flops': 12800077771264.0}, 'timestamp': '2025-10-02 00:46:00.522057', 'step': 19571, 'epoch': 2}
{'type': 'loss', 'content': 0.02002161741256714, 'timestamp': '2025-10-02 00:46:00.539961', 'step': 19572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:00.599007', 'step': 19572, 'epoch': 2}
{'type': 'loss', 'content': 0.03360234946012497, 'timestamp': '2025-10-02 00:46:00.602758', 'step': 19573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:00.660110', 'step': 19573, 'epoch': 2}
{'type': 'loss', 'content': 0.08609092980623245, 'timestamp': '2025-10-02 00:46:00.663703', 'step': 19574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:00.721648', 'step': 19574, 'epoch': 2}
{'type': 'loss', 'content': 0.04744052141904831, 'timestamp': '2025-10-02 00:46:00.725581', 'step': 19575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:46:00.789689', 'step': 19575, 'epoch': 2}
{'type': 'loss', 'content': 0.002691093599423766, 'timestamp': '2025-10-02 00:46:00.801250', 'step': 19576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:00.861706', 'step': 19576, 'epoch': 2}
{'type': 'loss', 'content': 0.020311899483203888, 'timestamp': '2025-10-02 00:46:00.873032', 'step': 19577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:00.930210', 'step': 19577, 'epoch': 2}
{'type': 'loss', 'content': 0.10476227104663849, 'timestamp': '2025-10-02 00:46:00.934268', 'step': 19578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:46:01.013226', 'step': 19578, 'epoch': 2}
{'type': 'loss', 'content': 0.004931480623781681, 'timestamp': '2025-10-02 00:46:01.026889', 'step': 19579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:01.087011', 'step': 19579, 'epoch': 2}
{'type': 'loss', 'content': 0.0802917629480362, 'timestamp': '2025-10-02 00:46:01.093608', 'step': 19580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:01.148510', 'step': 19580, 'epoch': 2}
{'type': 'loss', 'content': 0.08006926625967026, 'timestamp': '2025-10-02 00:46:01.150740', 'step': 19581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:01.205678', 'step': 19581, 'epoch': 2}
{'type': 'loss', 'content': 0.028553146868944168, 'timestamp': '2025-10-02 00:46:01.208112', 'step': 19582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:01.262702', 'step': 19582, 'epoch': 2}
{'type': 'loss', 'content': 0.07566791027784348, 'timestamp': '2025-10-02 00:46:01.265177', 'step': 19583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:01.319790', 'step': 19583, 'epoch': 2}
{'type': 'loss', 'content': 0.023139629513025284, 'timestamp': '2025-10-02 00:46:01.325880', 'step': 19584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:01.379603', 'step': 19584, 'epoch': 2}
{'type': 'loss', 'content': 0.08268225938081741, 'timestamp': '2025-10-02 00:46:01.382682', 'step': 19585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:01.437132', 'step': 19585, 'epoch': 2}
{'type': 'loss', 'content': 0.05871405825018883, 'timestamp': '2025-10-02 00:46:01.442847', 'step': 19586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:01.498901', 'step': 19586, 'epoch': 2}
{'type': 'loss', 'content': 0.01657702960073948, 'timestamp': '2025-10-02 00:46:01.501250', 'step': 19587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:01.555423', 'step': 19587, 'epoch': 2}
{'type': 'loss', 'content': 0.009090058505535126, 'timestamp': '2025-10-02 00:46:01.563451', 'step': 19588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:01.618638', 'step': 19588, 'epoch': 2}
{'type': 'loss', 'content': 0.03517809510231018, 'timestamp': '2025-10-02 00:46:01.621538', 'step': 19589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:01.684623', 'step': 19589, 'epoch': 2}
{'type': 'loss', 'content': 0.07072800397872925, 'timestamp': '2025-10-02 00:46:01.687184', 'step': 19590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:01.742078', 'step': 19590, 'epoch': 2}
{'type': 'loss', 'content': 0.016047827899456024, 'timestamp': '2025-10-02 00:46:01.744492', 'step': 19591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:01.805518', 'step': 19591, 'epoch': 2}
{'type': 'loss', 'content': 0.018580731004476547, 'timestamp': '2025-10-02 00:46:01.816738', 'step': 19592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:01.870623', 'step': 19592, 'epoch': 2}
{'type': 'loss', 'content': 0.0556289479136467, 'timestamp': '2025-10-02 00:46:01.880819', 'step': 19593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:01.935774', 'step': 19593, 'epoch': 2}
{'type': 'loss', 'content': 0.07172812521457672, 'timestamp': '2025-10-02 00:46:01.938237', 'step': 19594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:01.993228', 'step': 19594, 'epoch': 2}
{'type': 'loss', 'content': 0.08374805748462677, 'timestamp': '2025-10-02 00:46:01.995771', 'step': 19595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:02.051704', 'step': 19595, 'epoch': 2}
{'type': 'loss', 'content': 0.023042399436235428, 'timestamp': '2025-10-02 00:46:02.058047', 'step': 19596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:02.112256', 'step': 19596, 'epoch': 2}
{'type': 'loss', 'content': 0.0598006509244442, 'timestamp': '2025-10-02 00:46:02.114893', 'step': 19597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:02.170167', 'step': 19597, 'epoch': 2}
{'type': 'loss', 'content': 0.035682689398527145, 'timestamp': '2025-10-02 00:46:02.172790', 'step': 19598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:02.228237', 'step': 19598, 'epoch': 2}
{'type': 'loss', 'content': 0.033957380801439285, 'timestamp': '2025-10-02 00:46:02.235436', 'step': 19599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:02.289825', 'step': 19599, 'epoch': 2}
{'type': 'loss', 'content': 0.04227667674422264, 'timestamp': '2025-10-02 00:46:02.298804', 'step': 19600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:02.357777', 'step': 19600, 'epoch': 2}
{'type': 'loss', 'content': 0.05779039114713669, 'timestamp': '2025-10-02 00:46:02.366838', 'step': 19601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:02.424626', 'step': 19601, 'epoch': 2}
{'type': 'loss', 'content': 0.06691855192184448, 'timestamp': '2025-10-02 00:46:02.431910', 'step': 19602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:02.486657', 'step': 19602, 'epoch': 2}
{'type': 'loss', 'content': 0.05704164505004883, 'timestamp': '2025-10-02 00:46:02.489934', 'step': 19603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:02.549978', 'step': 19603, 'epoch': 2}
{'type': 'loss', 'content': 0.10574091970920563, 'timestamp': '2025-10-02 00:46:02.556459', 'step': 19604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:02.611634', 'step': 19604, 'epoch': 2}
{'type': 'loss', 'content': 0.047996703535318375, 'timestamp': '2025-10-02 00:46:02.618802', 'step': 19605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:02.674025', 'step': 19605, 'epoch': 2}
{'type': 'loss', 'content': 0.04408802464604378, 'timestamp': '2025-10-02 00:46:02.676990', 'step': 19606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:02.733187', 'step': 19606, 'epoch': 2}
{'type': 'loss', 'content': 0.15314821898937225, 'timestamp': '2025-10-02 00:46:02.736324', 'step': 19607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:02.791868', 'step': 19607, 'epoch': 2}
{'type': 'loss', 'content': 0.11629194021224976, 'timestamp': '2025-10-02 00:46:02.800011', 'step': 19608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:02.855299', 'step': 19608, 'epoch': 2}
{'type': 'loss', 'content': 0.11930128186941147, 'timestamp': '2025-10-02 00:46:02.858128', 'step': 19609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:02.912421', 'step': 19609, 'epoch': 2}
{'type': 'loss', 'content': 0.10160769522190094, 'timestamp': '2025-10-02 00:46:02.914943', 'step': 19610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:02.970814', 'step': 19610, 'epoch': 2}
{'type': 'loss', 'content': 0.17119647562503815, 'timestamp': '2025-10-02 00:46:02.974245', 'step': 19611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:03.030243', 'step': 19611, 'epoch': 2}
{'type': 'loss', 'content': 0.01944679208099842, 'timestamp': '2025-10-02 00:46:03.040519', 'step': 19612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:03.094613', 'step': 19612, 'epoch': 2}
{'type': 'loss', 'content': 0.08110763132572174, 'timestamp': '2025-10-02 00:46:03.097499', 'step': 19613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:46:03.165218', 'step': 19613, 'epoch': 2}
{'type': 'loss', 'content': 0.006035428959876299, 'timestamp': '2025-10-02 00:46:03.177178', 'step': 19614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:46:03.245420', 'step': 19614, 'epoch': 2}
{'type': 'loss', 'content': 0.003946265205740929, 'timestamp': '2025-10-02 00:46:03.257349', 'step': 19615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:03.316168', 'step': 19615, 'epoch': 2}
{'type': 'loss', 'content': 0.04248366877436638, 'timestamp': '2025-10-02 00:46:03.322411', 'step': 19616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:03.376068', 'step': 19616, 'epoch': 2}
{'type': 'loss', 'content': 0.010999458841979504, 'timestamp': '2025-10-02 00:46:03.386282', 'step': 19617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:03.441049', 'step': 19617, 'epoch': 2}
{'type': 'loss', 'content': 0.020209530368447304, 'timestamp': '2025-10-02 00:46:03.443560', 'step': 19618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:46:03.507650', 'step': 19618, 'epoch': 2}
{'type': 'loss', 'content': 0.010689357295632362, 'timestamp': '2025-10-02 00:46:03.518481', 'step': 19619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:03.573770', 'step': 19619, 'epoch': 2}
{'type': 'loss', 'content': 0.036678630858659744, 'timestamp': '2025-10-02 00:46:03.580548', 'step': 19620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:03.635667', 'step': 19620, 'epoch': 2}
{'type': 'loss', 'content': 0.003239044453948736, 'timestamp': '2025-10-02 00:46:03.638129', 'step': 19621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:03.691868', 'step': 19621, 'epoch': 2}
{'type': 'loss', 'content': 0.1594143956899643, 'timestamp': '2025-10-02 00:46:03.694523', 'step': 19622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:03.748963', 'step': 19622, 'epoch': 2}
{'type': 'loss', 'content': 0.0948239341378212, 'timestamp': '2025-10-02 00:46:03.751476', 'step': 19623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:03.805630', 'step': 19623, 'epoch': 2}
{'type': 'loss', 'content': 0.17776669561862946, 'timestamp': '2025-10-02 00:46:03.811631', 'step': 19624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:03.865943', 'step': 19624, 'epoch': 2}
{'type': 'loss', 'content': 0.03728970140218735, 'timestamp': '2025-10-02 00:46:03.868790', 'step': 19625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:03.928384', 'step': 19625, 'epoch': 2}
{'type': 'loss', 'content': 0.07377959042787552, 'timestamp': '2025-10-02 00:46:03.937566', 'step': 19626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:03.992952', 'step': 19626, 'epoch': 2}
{'type': 'loss', 'content': 0.10970348864793777, 'timestamp': '2025-10-02 00:46:03.995737', 'step': 19627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:04.050328', 'step': 19627, 'epoch': 2}
{'type': 'loss', 'content': 0.1506577730178833, 'timestamp': '2025-10-02 00:46:04.058368', 'step': 19628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:04.112849', 'step': 19628, 'epoch': 2}
{'type': 'loss', 'content': 0.03241470083594322, 'timestamp': '2025-10-02 00:46:04.115536', 'step': 19629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:04.169955', 'step': 19629, 'epoch': 2}
{'type': 'loss', 'content': 0.04376479983329773, 'timestamp': '2025-10-02 00:46:04.172385', 'step': 19630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:04.227505', 'step': 19630, 'epoch': 2}
{'type': 'loss', 'content': 0.038076017051935196, 'timestamp': '2025-10-02 00:46:04.230246', 'step': 19631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:04.284834', 'step': 19631, 'epoch': 2}
{'type': 'loss', 'content': 0.04599376767873764, 'timestamp': '2025-10-02 00:46:04.290889', 'step': 19632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:04.344508', 'step': 19632, 'epoch': 2}
{'type': 'loss', 'content': 0.11727886646986008, 'timestamp': '2025-10-02 00:46:04.346881', 'step': 19633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:04.401518', 'step': 19633, 'epoch': 2}
{'type': 'loss', 'content': 0.030581919476389885, 'timestamp': '2025-10-02 00:46:04.403806', 'step': 19634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:04.458340', 'step': 19634, 'epoch': 2}
{'type': 'loss', 'content': 0.05612102523446083, 'timestamp': '2025-10-02 00:46:04.464024', 'step': 19635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:04.518857', 'step': 19635, 'epoch': 2}
{'type': 'loss', 'content': 0.028819767758250237, 'timestamp': '2025-10-02 00:46:04.524981', 'step': 19636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:04.580963', 'step': 19636, 'epoch': 2}
{'type': 'loss', 'content': 0.02164032869040966, 'timestamp': '2025-10-02 00:46:04.587674', 'step': 19637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:04.642236', 'step': 19637, 'epoch': 2}
{'type': 'loss', 'content': 0.06746570765972137, 'timestamp': '2025-10-02 00:46:04.644568', 'step': 19638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:04.699525', 'step': 19638, 'epoch': 2}
{'type': 'loss', 'content': 0.06835339963436127, 'timestamp': '2025-10-02 00:46:04.701818', 'step': 19639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:04.756696', 'step': 19639, 'epoch': 2}
{'type': 'loss', 'content': 0.06067077815532684, 'timestamp': '2025-10-02 00:46:04.762925', 'step': 19640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:04.817204', 'step': 19640, 'epoch': 2}
{'type': 'loss', 'content': 0.060525424778461456, 'timestamp': '2025-10-02 00:46:04.820289', 'step': 19641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:04.879180', 'step': 19641, 'epoch': 2}
{'type': 'loss', 'content': 0.05479152500629425, 'timestamp': '2025-10-02 00:46:04.889326', 'step': 19642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:04.943753', 'step': 19642, 'epoch': 2}
{'type': 'loss', 'content': 0.10137654840946198, 'timestamp': '2025-10-02 00:46:04.946384', 'step': 19643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:05.001091', 'step': 19643, 'epoch': 2}
{'type': 'loss', 'content': 0.007810783106833696, 'timestamp': '2025-10-02 00:46:05.007065', 'step': 19644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:05.060989', 'step': 19644, 'epoch': 2}
{'type': 'loss', 'content': 0.04528138414025307, 'timestamp': '2025-10-02 00:46:05.063413', 'step': 19645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:05.118063', 'step': 19645, 'epoch': 2}
{'type': 'loss', 'content': 0.042294204235076904, 'timestamp': '2025-10-02 00:46:05.120519', 'step': 19646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:05.174931', 'step': 19646, 'epoch': 2}
{'type': 'loss', 'content': 0.11702819913625717, 'timestamp': '2025-10-02 00:46:05.177582', 'step': 19647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:05.232501', 'step': 19647, 'epoch': 2}
{'type': 'loss', 'content': 0.04007280245423317, 'timestamp': '2025-10-02 00:46:05.238828', 'step': 19648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:05.294810', 'step': 19648, 'epoch': 2}
{'type': 'loss', 'content': 0.0364861935377121, 'timestamp': '2025-10-02 00:46:05.300453', 'step': 19649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:05.354828', 'step': 19649, 'epoch': 2}
{'type': 'loss', 'content': 0.053941186517477036, 'timestamp': '2025-10-02 00:46:05.357886', 'step': 19650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:05.412925', 'step': 19650, 'epoch': 2}
{'type': 'loss', 'content': 0.05233900249004364, 'timestamp': '2025-10-02 00:46:05.419247', 'step': 19651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:05.473865', 'step': 19651, 'epoch': 2}
{'type': 'loss', 'content': 0.019588755443692207, 'timestamp': '2025-10-02 00:46:05.481986', 'step': 19652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:05.535908', 'step': 19652, 'epoch': 2}
{'type': 'loss', 'content': 0.05627371370792389, 'timestamp': '2025-10-02 00:46:05.538798', 'step': 19653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:05.594359', 'step': 19653, 'epoch': 2}
{'type': 'loss', 'content': 0.05386486276984215, 'timestamp': '2025-10-02 00:46:05.596848', 'step': 19654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:46:05.651111', 'step': 19654, 'epoch': 2}
{'type': 'loss', 'content': 0.15076516568660736, 'timestamp': '2025-10-02 00:46:05.653647', 'step': 19655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:05.708280', 'step': 19655, 'epoch': 2}
{'type': 'loss', 'content': 0.04390210658311844, 'timestamp': '2025-10-02 00:46:05.716299', 'step': 19656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:05.771165', 'step': 19656, 'epoch': 2}
{'type': 'loss', 'content': 0.025130530819296837, 'timestamp': '2025-10-02 00:46:05.773667', 'step': 19657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:05.832503', 'step': 19657, 'epoch': 2}
{'type': 'loss', 'content': 0.02623986452817917, 'timestamp': '2025-10-02 00:46:05.842610', 'step': 19658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:46:05.914367', 'step': 19658, 'epoch': 2}
{'type': 'loss', 'content': 0.031912900507450104, 'timestamp': '2025-10-02 00:46:05.926979', 'step': 19659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:05.982767', 'step': 19659, 'epoch': 2}
{'type': 'loss', 'content': 0.047917213290929794, 'timestamp': '2025-10-02 00:46:05.988569', 'step': 19660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:06.042622', 'step': 19660, 'epoch': 2}
{'type': 'loss', 'content': 0.005225266329944134, 'timestamp': '2025-10-02 00:46:06.048256', 'step': 19661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:06.104366', 'step': 19661, 'epoch': 2}
{'type': 'loss', 'content': 0.04460163041949272, 'timestamp': '2025-10-02 00:46:06.109830', 'step': 19662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:06.165676', 'step': 19662, 'epoch': 2}
{'type': 'loss', 'content': 0.10378434509038925, 'timestamp': '2025-10-02 00:46:06.168119', 'step': 19663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:06.223466', 'step': 19663, 'epoch': 2}
{'type': 'loss', 'content': 0.17505846917629242, 'timestamp': '2025-10-02 00:46:06.229645', 'step': 19664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:06.283781', 'step': 19664, 'epoch': 2}
{'type': 'loss', 'content': 0.061860788613557816, 'timestamp': '2025-10-02 00:46:06.286116', 'step': 19665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:06.345454', 'step': 19665, 'epoch': 2}
{'type': 'loss', 'content': 0.06894031167030334, 'timestamp': '2025-10-02 00:46:06.355659', 'step': 19666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:06.413327', 'step': 19666, 'epoch': 2}
{'type': 'loss', 'content': 0.052359309047460556, 'timestamp': '2025-10-02 00:46:06.416625', 'step': 19667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:06.471054', 'step': 19667, 'epoch': 2}
{'type': 'loss', 'content': 0.17925947904586792, 'timestamp': '2025-10-02 00:46:06.476978', 'step': 19668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:06.531368', 'step': 19668, 'epoch': 2}
{'type': 'loss', 'content': 0.054848916828632355, 'timestamp': '2025-10-02 00:46:06.533999', 'step': 19669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:06.590118', 'step': 19669, 'epoch': 2}
{'type': 'loss', 'content': 0.0029335038270801306, 'timestamp': '2025-10-02 00:46:06.592751', 'step': 19670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:06.648299', 'step': 19670, 'epoch': 2}
{'type': 'loss', 'content': 0.11373194307088852, 'timestamp': '2025-10-02 00:46:06.650846', 'step': 19671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:06.705253', 'step': 19671, 'epoch': 2}
{'type': 'loss', 'content': 0.01246628351509571, 'timestamp': '2025-10-02 00:46:06.711053', 'step': 19672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:06.764709', 'step': 19672, 'epoch': 2}
{'type': 'loss', 'content': 0.045376524329185486, 'timestamp': '2025-10-02 00:46:06.772157', 'step': 19673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:06.829127', 'step': 19673, 'epoch': 2}
{'type': 'loss', 'content': 0.12116061896085739, 'timestamp': '2025-10-02 00:46:06.831891', 'step': 19674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:06.886528', 'step': 19674, 'epoch': 2}
{'type': 'loss', 'content': 0.08471633493900299, 'timestamp': '2025-10-02 00:46:06.889386', 'step': 19675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:06.943522', 'step': 19675, 'epoch': 2}
{'type': 'loss', 'content': 0.1576014906167984, 'timestamp': '2025-10-02 00:46:06.949858', 'step': 19676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:07.004571', 'step': 19676, 'epoch': 2}
{'type': 'loss', 'content': 0.01590849831700325, 'timestamp': '2025-10-02 00:46:07.011836', 'step': 19677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:07.067254', 'step': 19677, 'epoch': 2}
{'type': 'loss', 'content': 0.24662427604198456, 'timestamp': '2025-10-02 00:46:07.069569', 'step': 19678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:07.124412', 'step': 19678, 'epoch': 2}
{'type': 'loss', 'content': 0.061548613011837006, 'timestamp': '2025-10-02 00:46:07.133746', 'step': 19679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:07.190633', 'step': 19679, 'epoch': 2}
{'type': 'loss', 'content': 0.0120150251314044, 'timestamp': '2025-10-02 00:46:07.197086', 'step': 19680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:07.251022', 'step': 19680, 'epoch': 2}
{'type': 'loss', 'content': 0.012415420264005661, 'timestamp': '2025-10-02 00:46:07.253512', 'step': 19681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:07.308526', 'step': 19681, 'epoch': 2}
{'type': 'loss', 'content': 0.0728151723742485, 'timestamp': '2025-10-02 00:46:07.310848', 'step': 19682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:07.365512', 'step': 19682, 'epoch': 2}
{'type': 'loss', 'content': 0.08219987899065018, 'timestamp': '2025-10-02 00:46:07.369398', 'step': 19683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:07.424161', 'step': 19683, 'epoch': 2}
{'type': 'loss', 'content': 0.07763507217168808, 'timestamp': '2025-10-02 00:46:07.430549', 'step': 19684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:07.485611', 'step': 19684, 'epoch': 2}
{'type': 'loss', 'content': 0.04875417426228523, 'timestamp': '2025-10-02 00:46:07.488255', 'step': 19685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:07.542630', 'step': 19685, 'epoch': 2}
{'type': 'loss', 'content': 0.040871940553188324, 'timestamp': '2025-10-02 00:46:07.549980', 'step': 19686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:07.610280', 'step': 19686, 'epoch': 2}
{'type': 'loss', 'content': 0.0009694894542917609, 'timestamp': '2025-10-02 00:46:07.620413', 'step': 19687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:07.676428', 'step': 19687, 'epoch': 2}
{'type': 'loss', 'content': 0.05384979024529457, 'timestamp': '2025-10-02 00:46:07.682523', 'step': 19688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:07.737402', 'step': 19688, 'epoch': 2}
{'type': 'loss', 'content': 0.08080895990133286, 'timestamp': '2025-10-02 00:46:07.740482', 'step': 19689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:07.794334', 'step': 19689, 'epoch': 2}
{'type': 'loss', 'content': 0.04636571556329727, 'timestamp': '2025-10-02 00:46:07.803542', 'step': 19690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:07.865223', 'step': 19690, 'epoch': 2}
{'type': 'loss', 'content': 0.10199977457523346, 'timestamp': '2025-10-02 00:46:07.868041', 'step': 19691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:07.925162', 'step': 19691, 'epoch': 2}
{'type': 'loss', 'content': 0.06217210367321968, 'timestamp': '2025-10-02 00:46:07.931494', 'step': 19692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:07.985739', 'step': 19692, 'epoch': 2}
{'type': 'loss', 'content': 0.09373155236244202, 'timestamp': '2025-10-02 00:46:07.988606', 'step': 19693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:08.044783', 'step': 19693, 'epoch': 2}
{'type': 'loss', 'content': 0.051808912307024, 'timestamp': '2025-10-02 00:46:08.054308', 'step': 19694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:08.108843', 'step': 19694, 'epoch': 2}
{'type': 'loss', 'content': 0.03541167825460434, 'timestamp': '2025-10-02 00:46:08.117996', 'step': 19695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:08.172663', 'step': 19695, 'epoch': 2}
{'type': 'loss', 'content': 0.03478005900979042, 'timestamp': '2025-10-02 00:46:08.179327', 'step': 19696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:08.233337', 'step': 19696, 'epoch': 2}
{'type': 'loss', 'content': 0.060460954904556274, 'timestamp': '2025-10-02 00:46:08.236474', 'step': 19697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:08.291410', 'step': 19697, 'epoch': 2}
{'type': 'loss', 'content': 0.15153245627880096, 'timestamp': '2025-10-02 00:46:08.294440', 'step': 19698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:08.352772', 'step': 19698, 'epoch': 2}
{'type': 'loss', 'content': 0.059470903128385544, 'timestamp': '2025-10-02 00:46:08.362875', 'step': 19699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:08.417579', 'step': 19699, 'epoch': 2}
{'type': 'loss', 'content': 0.02004631981253624, 'timestamp': '2025-10-02 00:46:08.423542', 'step': 19700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:08.478883', 'step': 19700, 'epoch': 2}
{'type': 'loss', 'content': 0.003258510259911418, 'timestamp': '2025-10-02 00:46:08.481408', 'step': 19701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:08.543247', 'step': 19701, 'epoch': 2}
{'type': 'loss', 'content': 0.006710132118314505, 'timestamp': '2025-10-02 00:46:08.550534', 'step': 19702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:08.606035', 'step': 19702, 'epoch': 2}
{'type': 'loss', 'content': 0.06317440420389175, 'timestamp': '2025-10-02 00:46:08.611672', 'step': 19703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:08.665524', 'step': 19703, 'epoch': 2}
{'type': 'loss', 'content': 0.10409419983625412, 'timestamp': '2025-10-02 00:46:08.671908', 'step': 19704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:08.725356', 'step': 19704, 'epoch': 2}
{'type': 'loss', 'content': 0.12683811783790588, 'timestamp': '2025-10-02 00:46:08.728221', 'step': 19705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:08.782554', 'step': 19705, 'epoch': 2}
{'type': 'loss', 'content': 0.03461063653230667, 'timestamp': '2025-10-02 00:46:08.785183', 'step': 19706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:08.841198', 'step': 19706, 'epoch': 2}
{'type': 'loss', 'content': 0.0546426847577095, 'timestamp': '2025-10-02 00:46:08.843521', 'step': 19707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:08.899183', 'step': 19707, 'epoch': 2}
{'type': 'loss', 'content': 0.03513668477535248, 'timestamp': '2025-10-02 00:46:08.905676', 'step': 19708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:08.958889', 'step': 19708, 'epoch': 2}
{'type': 'loss', 'content': 0.06184077635407448, 'timestamp': '2025-10-02 00:46:08.961383', 'step': 19709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:09.016196', 'step': 19709, 'epoch': 2}
{'type': 'loss', 'content': 0.02936708927154541, 'timestamp': '2025-10-02 00:46:09.018666', 'step': 19710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:09.073653', 'step': 19710, 'epoch': 2}
{'type': 'loss', 'content': 0.08888417482376099, 'timestamp': '2025-10-02 00:46:09.076909', 'step': 19711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:09.138521', 'step': 19711, 'epoch': 2}
{'type': 'loss', 'content': 0.022705670446157455, 'timestamp': '2025-10-02 00:46:09.149745', 'step': 19712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:09.208194', 'step': 19712, 'epoch': 2}
{'type': 'loss', 'content': 0.02915601246058941, 'timestamp': '2025-10-02 00:46:09.217251', 'step': 19713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:09.272835', 'step': 19713, 'epoch': 2}
{'type': 'loss', 'content': 0.05064636096358299, 'timestamp': '2025-10-02 00:46:09.282402', 'step': 19714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:09.337215', 'step': 19714, 'epoch': 2}
{'type': 'loss', 'content': 0.06328031420707703, 'timestamp': '2025-10-02 00:46:09.342853', 'step': 19715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:09.398526', 'step': 19715, 'epoch': 2}
{'type': 'loss', 'content': 0.02909209579229355, 'timestamp': '2025-10-02 00:46:09.408803', 'step': 19716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:46:09.477453', 'step': 19716, 'epoch': 2}
{'type': 'loss', 'content': 0.009406360797584057, 'timestamp': '2025-10-02 00:46:09.490826', 'step': 19717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:09.547239', 'step': 19717, 'epoch': 2}
{'type': 'loss', 'content': 0.1136382594704628, 'timestamp': '2025-10-02 00:46:09.551394', 'step': 19718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:09.606540', 'step': 19718, 'epoch': 2}
{'type': 'loss', 'content': 0.08378569036722183, 'timestamp': '2025-10-02 00:46:09.609548', 'step': 19719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:09.665037', 'step': 19719, 'epoch': 2}
{'type': 'loss', 'content': 0.10916218906641006, 'timestamp': '2025-10-02 00:46:09.671184', 'step': 19720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:09.731508', 'step': 19720, 'epoch': 2}
{'type': 'loss', 'content': 0.049235619604587555, 'timestamp': '2025-10-02 00:46:09.742466', 'step': 19721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:46:09.816092', 'step': 19721, 'epoch': 2}
{'type': 'loss', 'content': 0.009174909442663193, 'timestamp': '2025-10-02 00:46:09.828703', 'step': 19722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:09.894237', 'step': 19722, 'epoch': 2}
{'type': 'loss', 'content': 0.02938719652593136, 'timestamp': '2025-10-02 00:46:09.904665', 'step': 19723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:09.961912', 'step': 19723, 'epoch': 2}
{'type': 'loss', 'content': 0.12257768958806992, 'timestamp': '2025-10-02 00:46:09.968827', 'step': 19724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:10.023911', 'step': 19724, 'epoch': 2}
{'type': 'loss', 'content': 0.05432577431201935, 'timestamp': '2025-10-02 00:46:10.026661', 'step': 19725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:10.082254', 'step': 19725, 'epoch': 2}
{'type': 'loss', 'content': 0.17251822352409363, 'timestamp': '2025-10-02 00:46:10.084860', 'step': 19726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:10.141239', 'step': 19726, 'epoch': 2}
{'type': 'loss', 'content': 0.10515645891427994, 'timestamp': '2025-10-02 00:46:10.145078', 'step': 19727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:10.200372', 'step': 19727, 'epoch': 2}
{'type': 'loss', 'content': 0.13307593762874603, 'timestamp': '2025-10-02 00:46:10.206423', 'step': 19728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:10.261332', 'step': 19728, 'epoch': 2}
{'type': 'loss', 'content': 0.1336100697517395, 'timestamp': '2025-10-02 00:46:10.263857', 'step': 19729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:10.320621', 'step': 19729, 'epoch': 2}
{'type': 'loss', 'content': 0.0557955838739872, 'timestamp': '2025-10-02 00:46:10.324417', 'step': 19730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:10.382074', 'step': 19730, 'epoch': 2}
{'type': 'loss', 'content': 0.11668703705072403, 'timestamp': '2025-10-02 00:46:10.389151', 'step': 19731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:10.445439', 'step': 19731, 'epoch': 2}
{'type': 'loss', 'content': 0.03186905011534691, 'timestamp': '2025-10-02 00:46:10.451865', 'step': 19732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:10.508694', 'step': 19732, 'epoch': 2}
{'type': 'loss', 'content': 0.054558753967285156, 'timestamp': '2025-10-02 00:46:10.518905', 'step': 19733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:10.576320', 'step': 19733, 'epoch': 2}
{'type': 'loss', 'content': 0.0031997188925743103, 'timestamp': '2025-10-02 00:46:10.580957', 'step': 19734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:10.638324', 'step': 19734, 'epoch': 2}
{'type': 'loss', 'content': 0.16234594583511353, 'timestamp': '2025-10-02 00:46:10.640864', 'step': 19735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:10.695939', 'step': 19735, 'epoch': 2}
{'type': 'loss', 'content': 0.046798039227724075, 'timestamp': '2025-10-02 00:46:10.702529', 'step': 19736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:10.759961', 'step': 19736, 'epoch': 2}
{'type': 'loss', 'content': 0.03307503089308739, 'timestamp': '2025-10-02 00:46:10.763582', 'step': 19737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:10.821011', 'step': 19737, 'epoch': 2}
{'type': 'loss', 'content': 0.02838640846312046, 'timestamp': '2025-10-02 00:46:10.830527', 'step': 19738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:10.886872', 'step': 19738, 'epoch': 2}
{'type': 'loss', 'content': 0.07132967561483383, 'timestamp': '2025-10-02 00:46:10.892177', 'step': 19739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:10.947612', 'step': 19739, 'epoch': 2}
{'type': 'loss', 'content': 0.04279760643839836, 'timestamp': '2025-10-02 00:46:10.955504', 'step': 19740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:11.010399', 'step': 19740, 'epoch': 2}
{'type': 'loss', 'content': 0.09199292957782745, 'timestamp': '2025-10-02 00:46:11.013519', 'step': 19741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:11.070694', 'step': 19741, 'epoch': 2}
{'type': 'loss', 'content': 0.011872761882841587, 'timestamp': '2025-10-02 00:46:11.076485', 'step': 19742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:11.132422', 'step': 19742, 'epoch': 2}
{'type': 'loss', 'content': 0.0487448051571846, 'timestamp': '2025-10-02 00:46:11.135374', 'step': 19743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:46:11.209119', 'step': 19743, 'epoch': 2}
{'type': 'loss', 'content': 0.009247390553355217, 'timestamp': '2025-10-02 00:46:11.222464', 'step': 19744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:11.278992', 'step': 19744, 'epoch': 2}
{'type': 'loss', 'content': 0.07813076674938202, 'timestamp': '2025-10-02 00:46:11.282120', 'step': 19745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:11.339140', 'step': 19745, 'epoch': 2}
{'type': 'loss', 'content': 0.03740831837058067, 'timestamp': '2025-10-02 00:46:11.344728', 'step': 19746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:46:11.408537', 'step': 19746, 'epoch': 2}
{'type': 'loss', 'content': 0.039919447153806686, 'timestamp': '2025-10-02 00:46:11.419167', 'step': 19747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:11.475470', 'step': 19747, 'epoch': 2}
{'type': 'loss', 'content': 0.012719259597361088, 'timestamp': '2025-10-02 00:46:11.482528', 'step': 19748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:11.536887', 'step': 19748, 'epoch': 2}
{'type': 'loss', 'content': 0.10146783292293549, 'timestamp': '2025-10-02 00:46:11.540663', 'step': 19749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:11.600128', 'step': 19749, 'epoch': 2}
{'type': 'loss', 'content': 0.15460340678691864, 'timestamp': '2025-10-02 00:46:11.603270', 'step': 19750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:11.659347', 'step': 19750, 'epoch': 2}
{'type': 'loss', 'content': 0.07174398005008698, 'timestamp': '2025-10-02 00:46:11.662233', 'step': 19751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:11.722285', 'step': 19751, 'epoch': 2}
{'type': 'loss', 'content': 0.02307308092713356, 'timestamp': '2025-10-02 00:46:11.729599', 'step': 19752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:11.785092', 'step': 19752, 'epoch': 2}
{'type': 'loss', 'content': 0.029435236006975174, 'timestamp': '2025-10-02 00:46:11.787871', 'step': 19753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:11.842382', 'step': 19753, 'epoch': 2}
{'type': 'loss', 'content': 0.09405624866485596, 'timestamp': '2025-10-02 00:46:11.849695', 'step': 19754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:11.905400', 'step': 19754, 'epoch': 2}
{'type': 'loss', 'content': 0.03493376821279526, 'timestamp': '2025-10-02 00:46:11.907952', 'step': 19755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:11.963401', 'step': 19755, 'epoch': 2}
{'type': 'loss', 'content': 0.03322230279445648, 'timestamp': '2025-10-02 00:46:11.973707', 'step': 19756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:12.028613', 'step': 19756, 'epoch': 2}
{'type': 'loss', 'content': 0.01975274831056595, 'timestamp': '2025-10-02 00:46:12.031409', 'step': 19757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:12.085926', 'step': 19757, 'epoch': 2}
{'type': 'loss', 'content': 0.05915095657110214, 'timestamp': '2025-10-02 00:46:12.088381', 'step': 19758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:12.143534', 'step': 19758, 'epoch': 2}
{'type': 'loss', 'content': 0.028077054768800735, 'timestamp': '2025-10-02 00:46:12.149011', 'step': 19759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:12.203822', 'step': 19759, 'epoch': 2}
{'type': 'loss', 'content': 0.20378725230693817, 'timestamp': '2025-10-02 00:46:12.210131', 'step': 19760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:12.264312', 'step': 19760, 'epoch': 2}
{'type': 'loss', 'content': 0.05116493999958038, 'timestamp': '2025-10-02 00:46:12.266667', 'step': 19761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:12.321616', 'step': 19761, 'epoch': 2}
{'type': 'loss', 'content': 0.13834765553474426, 'timestamp': '2025-10-02 00:46:12.323934', 'step': 19762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:46:12.378455', 'step': 19762, 'epoch': 2}
{'type': 'loss', 'content': 0.054686978459358215, 'timestamp': '2025-10-02 00:46:12.381259', 'step': 19763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:12.435695', 'step': 19763, 'epoch': 2}
{'type': 'loss', 'content': 0.10615862160921097, 'timestamp': '2025-10-02 00:46:12.443615', 'step': 19764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:12.497604', 'step': 19764, 'epoch': 2}
{'type': 'loss', 'content': 0.060537658631801605, 'timestamp': '2025-10-02 00:46:12.499989', 'step': 19765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:12.555088', 'step': 19765, 'epoch': 2}
{'type': 'loss', 'content': 0.051583290100097656, 'timestamp': '2025-10-02 00:46:12.557681', 'step': 19766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:12.613637', 'step': 19766, 'epoch': 2}
{'type': 'loss', 'content': 0.07545585185289383, 'timestamp': '2025-10-02 00:46:12.619186', 'step': 19767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:12.673926', 'step': 19767, 'epoch': 2}
{'type': 'loss', 'content': 0.053330887109041214, 'timestamp': '2025-10-02 00:46:12.680193', 'step': 19768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:12.733738', 'step': 19768, 'epoch': 2}
{'type': 'loss', 'content': 0.08149711787700653, 'timestamp': '2025-10-02 00:46:12.736201', 'step': 19769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:12.791846', 'step': 19769, 'epoch': 2}
{'type': 'loss', 'content': 0.1155741736292839, 'timestamp': '2025-10-02 00:46:12.794760', 'step': 19770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:12.849340', 'step': 19770, 'epoch': 2}
{'type': 'loss', 'content': 0.11172863841056824, 'timestamp': '2025-10-02 00:46:12.851719', 'step': 19771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:12.906462', 'step': 19771, 'epoch': 2}
{'type': 'loss', 'content': 0.10639845579862595, 'timestamp': '2025-10-02 00:46:12.913175', 'step': 19772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:12.968665', 'step': 19772, 'epoch': 2}
{'type': 'loss', 'content': 0.09651973098516464, 'timestamp': '2025-10-02 00:46:12.974328', 'step': 19773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:13.029914', 'step': 19773, 'epoch': 2}
{'type': 'loss', 'content': 0.05911540612578392, 'timestamp': '2025-10-02 00:46:13.032675', 'step': 19774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:13.087921', 'step': 19774, 'epoch': 2}
{'type': 'loss', 'content': 0.0891711637377739, 'timestamp': '2025-10-02 00:46:13.090516', 'step': 19775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:46:13.153531', 'step': 19775, 'epoch': 2}
{'type': 'loss', 'content': 0.0338367260992527, 'timestamp': '2025-10-02 00:46:13.165058', 'step': 19776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:13.219490', 'step': 19776, 'epoch': 2}
{'type': 'loss', 'content': 0.029333585873246193, 'timestamp': '2025-10-02 00:46:13.226749', 'step': 19777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:13.285801', 'step': 19777, 'epoch': 2}
{'type': 'loss', 'content': 0.009125902317464352, 'timestamp': '2025-10-02 00:46:13.288293', 'step': 19778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:13.342489', 'step': 19778, 'epoch': 2}
{'type': 'loss', 'content': 0.15978597104549408, 'timestamp': '2025-10-02 00:46:13.344927', 'step': 19779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:13.399907', 'step': 19779, 'epoch': 2}
{'type': 'loss', 'content': 0.012903894297778606, 'timestamp': '2025-10-02 00:46:13.405973', 'step': 19780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:13.459817', 'step': 19780, 'epoch': 2}
{'type': 'loss', 'content': 0.08449018001556396, 'timestamp': '2025-10-02 00:46:13.462597', 'step': 19781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:13.517276', 'step': 19781, 'epoch': 2}
{'type': 'loss', 'content': 0.04518209025263786, 'timestamp': '2025-10-02 00:46:13.526492', 'step': 19782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:13.581110', 'step': 19782, 'epoch': 2}
{'type': 'loss', 'content': 0.07121961563825607, 'timestamp': '2025-10-02 00:46:13.584040', 'step': 19783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:13.639693', 'step': 19783, 'epoch': 2}
{'type': 'loss', 'content': 0.050985902547836304, 'timestamp': '2025-10-02 00:46:13.646418', 'step': 19784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:13.701090', 'step': 19784, 'epoch': 2}
{'type': 'loss', 'content': 0.06272881478071213, 'timestamp': '2025-10-02 00:46:13.703700', 'step': 19785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:13.758311', 'step': 19785, 'epoch': 2}
{'type': 'loss', 'content': 0.03873040899634361, 'timestamp': '2025-10-02 00:46:13.760564', 'step': 19786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:13.814338', 'step': 19786, 'epoch': 2}
{'type': 'loss', 'content': 0.05764611065387726, 'timestamp': '2025-10-02 00:46:13.823485', 'step': 19787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:13.877821', 'step': 19787, 'epoch': 2}
{'type': 'loss', 'content': 0.1072775349020958, 'timestamp': '2025-10-02 00:46:13.884079', 'step': 19788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:13.938424', 'step': 19788, 'epoch': 2}
{'type': 'loss', 'content': 0.023269616067409515, 'timestamp': '2025-10-02 00:46:13.940827', 'step': 19789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:13.995422', 'step': 19789, 'epoch': 2}
{'type': 'loss', 'content': 0.0847495049238205, 'timestamp': '2025-10-02 00:46:13.997989', 'step': 19790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:14.052390', 'step': 19790, 'epoch': 2}
{'type': 'loss', 'content': 0.12974251806735992, 'timestamp': '2025-10-02 00:46:14.055630', 'step': 19791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:14.109988', 'step': 19791, 'epoch': 2}
{'type': 'loss', 'content': 0.07070869207382202, 'timestamp': '2025-10-02 00:46:14.115928', 'step': 19792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:14.169440', 'step': 19792, 'epoch': 2}
{'type': 'loss', 'content': 0.09135588258504868, 'timestamp': '2025-10-02 00:46:14.171990', 'step': 19793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:14.226253', 'step': 19793, 'epoch': 2}
{'type': 'loss', 'content': 0.08867381513118744, 'timestamp': '2025-10-02 00:46:14.229100', 'step': 19794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:14.293624', 'step': 19794, 'epoch': 2}
{'type': 'loss', 'content': 0.08737678080797195, 'timestamp': '2025-10-02 00:46:14.296163', 'step': 19795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:14.350625', 'step': 19795, 'epoch': 2}
{'type': 'loss', 'content': 0.05054130032658577, 'timestamp': '2025-10-02 00:46:14.360600', 'step': 19796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:14.414506', 'step': 19796, 'epoch': 2}
{'type': 'loss', 'content': 0.03385743126273155, 'timestamp': '2025-10-02 00:46:14.417349', 'step': 19797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:14.474172', 'step': 19797, 'epoch': 2}
{'type': 'loss', 'content': 0.0058351196348667145, 'timestamp': '2025-10-02 00:46:14.481528', 'step': 19798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:14.535771', 'step': 19798, 'epoch': 2}
{'type': 'loss', 'content': 0.0479707308113575, 'timestamp': '2025-10-02 00:46:14.538252', 'step': 19799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:14.592788', 'step': 19799, 'epoch': 2}
{'type': 'loss', 'content': 0.0445941723883152, 'timestamp': '2025-10-02 00:46:14.598949', 'step': 19800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:14.657388', 'step': 19800, 'epoch': 2}
{'type': 'loss', 'content': 0.023378511890769005, 'timestamp': '2025-10-02 00:46:14.668329', 'step': 19801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:14.747411', 'step': 19801, 'epoch': 2}
{'type': 'loss', 'content': 0.0728665143251419, 'timestamp': '2025-10-02 00:46:14.756918', 'step': 19802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:14.825029', 'step': 19802, 'epoch': 2}
{'type': 'loss', 'content': 0.04008905589580536, 'timestamp': '2025-10-02 00:46:14.846715', 'step': 19803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:14.933441', 'step': 19803, 'epoch': 2}
{'type': 'loss', 'content': 0.050406791269779205, 'timestamp': '2025-10-02 00:46:14.941247', 'step': 19804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:15.030543', 'step': 19804, 'epoch': 2}
{'type': 'loss', 'content': 0.03717266023159027, 'timestamp': '2025-10-02 00:46:15.036342', 'step': 19805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:15.098193', 'step': 19805, 'epoch': 2}
{'type': 'loss', 'content': 0.10467901080846786, 'timestamp': '2025-10-02 00:46:15.115666', 'step': 19806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:15.203925', 'step': 19806, 'epoch': 2}
{'type': 'loss', 'content': 0.04992038384079933, 'timestamp': '2025-10-02 00:46:15.208291', 'step': 19807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:15.269699', 'step': 19807, 'epoch': 2}
{'type': 'loss', 'content': 0.11937728524208069, 'timestamp': '2025-10-02 00:46:15.290613', 'step': 19808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:15.378397', 'step': 19808, 'epoch': 2}
{'type': 'loss', 'content': 0.05187322571873665, 'timestamp': '2025-10-02 00:46:15.382808', 'step': 19809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:15.458086', 'step': 19809, 'epoch': 2}
{'type': 'loss', 'content': 0.040116604417562485, 'timestamp': '2025-10-02 00:46:15.462899', 'step': 19810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:15.550191', 'step': 19810, 'epoch': 2}
{'type': 'loss', 'content': 0.02067967690527439, 'timestamp': '2025-10-02 00:46:15.562596', 'step': 19811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:15.643089', 'step': 19811, 'epoch': 2}
{'type': 'loss', 'content': 0.004911435768008232, 'timestamp': '2025-10-02 00:46:15.651384', 'step': 19812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:15.711640', 'step': 19812, 'epoch': 2}
{'type': 'loss', 'content': 0.1456357091665268, 'timestamp': '2025-10-02 00:46:15.715683', 'step': 19813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:15.789325', 'step': 19813, 'epoch': 2}
{'type': 'loss', 'content': 0.014577191323041916, 'timestamp': '2025-10-02 00:46:15.800695', 'step': 19814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:15.873650', 'step': 19814, 'epoch': 2}
{'type': 'loss', 'content': 0.0682220607995987, 'timestamp': '2025-10-02 00:46:15.883761', 'step': 19815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:15.946709', 'step': 19815, 'epoch': 2}
{'type': 'loss', 'content': 0.06685285270214081, 'timestamp': '2025-10-02 00:46:15.957654', 'step': 19816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:16.016093', 'step': 19816, 'epoch': 2}
{'type': 'loss', 'content': 0.14829349517822266, 'timestamp': '2025-10-02 00:46:16.028909', 'step': 19817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:16.111910', 'step': 19817, 'epoch': 2}
{'type': 'loss', 'content': 0.11965497583150864, 'timestamp': '2025-10-02 00:46:16.116793', 'step': 19818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:16.186864', 'step': 19818, 'epoch': 2}
{'type': 'loss', 'content': 0.01785273477435112, 'timestamp': '2025-10-02 00:46:16.191027', 'step': 19819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:16.253181', 'step': 19819, 'epoch': 2}
{'type': 'loss', 'content': 0.07257036864757538, 'timestamp': '2025-10-02 00:46:16.260781', 'step': 19820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:16.317775', 'step': 19820, 'epoch': 2}
{'type': 'loss', 'content': 0.06606751680374146, 'timestamp': '2025-10-02 00:46:16.323293', 'step': 19821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:16.384071', 'step': 19821, 'epoch': 2}
{'type': 'loss', 'content': 0.03949172422289848, 'timestamp': '2025-10-02 00:46:16.388001', 'step': 19822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:16.445077', 'step': 19822, 'epoch': 2}
{'type': 'loss', 'content': 0.09128115326166153, 'timestamp': '2025-10-02 00:46:16.448142', 'step': 19823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:16.515788', 'step': 19823, 'epoch': 2}
{'type': 'loss', 'content': 0.02193245105445385, 'timestamp': '2025-10-02 00:46:16.524758', 'step': 19824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:16.595697', 'step': 19824, 'epoch': 2}
{'type': 'loss', 'content': 0.00201118690893054, 'timestamp': '2025-10-02 00:46:16.604986', 'step': 19825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:16.661751', 'step': 19825, 'epoch': 2}
{'type': 'loss', 'content': 0.11035173386335373, 'timestamp': '2025-10-02 00:46:16.673943', 'step': 19826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:16.741363', 'step': 19826, 'epoch': 2}
{'type': 'loss', 'content': 0.02736791968345642, 'timestamp': '2025-10-02 00:46:16.745892', 'step': 19827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:16.804401', 'step': 19827, 'epoch': 2}
{'type': 'loss', 'content': 0.11599895358085632, 'timestamp': '2025-10-02 00:46:16.820227', 'step': 19828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:16.879840', 'step': 19828, 'epoch': 2}
{'type': 'loss', 'content': 0.01016775518655777, 'timestamp': '2025-10-02 00:46:16.886952', 'step': 19829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:16.945678', 'step': 19829, 'epoch': 2}
{'type': 'loss', 'content': 0.03812391683459282, 'timestamp': '2025-10-02 00:46:16.951497', 'step': 19830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:17.022266', 'step': 19830, 'epoch': 2}
{'type': 'loss', 'content': 0.05556025356054306, 'timestamp': '2025-10-02 00:46:17.035592', 'step': 19831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:46:17.122722', 'step': 19831, 'epoch': 2}
{'type': 'loss', 'content': 0.031572602689266205, 'timestamp': '2025-10-02 00:46:17.136963', 'step': 19832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:17.195698', 'step': 19832, 'epoch': 2}
{'type': 'loss', 'content': 0.0782494992017746, 'timestamp': '2025-10-02 00:46:17.200805', 'step': 19833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:17.289952', 'step': 19833, 'epoch': 2}
{'type': 'loss', 'content': 0.08642604202032089, 'timestamp': '2025-10-02 00:46:17.295042', 'step': 19834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:17.373451', 'step': 19834, 'epoch': 2}
{'type': 'loss', 'content': 0.09602577239274979, 'timestamp': '2025-10-02 00:46:17.379057', 'step': 19835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:17.457797', 'step': 19835, 'epoch': 2}
{'type': 'loss', 'content': 0.038036048412323, 'timestamp': '2025-10-02 00:46:17.465627', 'step': 19836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:17.554372', 'step': 19836, 'epoch': 2}
{'type': 'loss', 'content': 0.09175746142864227, 'timestamp': '2025-10-02 00:46:17.572686', 'step': 19837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:17.636283', 'step': 19837, 'epoch': 2}
{'type': 'loss', 'content': 0.06863322108983994, 'timestamp': '2025-10-02 00:46:17.643605', 'step': 19838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:17.713797', 'step': 19838, 'epoch': 2}
{'type': 'loss', 'content': 0.12078255414962769, 'timestamp': '2025-10-02 00:46:17.725995', 'step': 19839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:17.804803', 'step': 19839, 'epoch': 2}
{'type': 'loss', 'content': 0.03936124965548515, 'timestamp': '2025-10-02 00:46:17.817282', 'step': 19840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:17.883421', 'step': 19840, 'epoch': 2}
{'type': 'loss', 'content': 0.1330811232328415, 'timestamp': '2025-10-02 00:46:17.889397', 'step': 19841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:17.967172', 'step': 19841, 'epoch': 2}
{'type': 'loss', 'content': 0.059941619634628296, 'timestamp': '2025-10-02 00:46:17.972298', 'step': 19842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:18.043291', 'step': 19842, 'epoch': 2}
{'type': 'loss', 'content': 0.12244348227977753, 'timestamp': '2025-10-02 00:46:18.048584', 'step': 19843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:18.110510', 'step': 19843, 'epoch': 2}
{'type': 'loss', 'content': 0.11845394223928452, 'timestamp': '2025-10-02 00:46:18.122690', 'step': 19844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:18.182670', 'step': 19844, 'epoch': 2}
{'type': 'loss', 'content': 0.20827242732048035, 'timestamp': '2025-10-02 00:46:18.186234', 'step': 19845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:18.264810', 'step': 19845, 'epoch': 2}
{'type': 'loss', 'content': 0.13214895129203796, 'timestamp': '2025-10-02 00:46:18.267812', 'step': 19846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:18.333324', 'step': 19846, 'epoch': 2}
{'type': 'loss', 'content': 0.10102854669094086, 'timestamp': '2025-10-02 00:46:18.336547', 'step': 19847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:18.402445', 'step': 19847, 'epoch': 2}
{'type': 'loss', 'content': 0.14879874885082245, 'timestamp': '2025-10-02 00:46:18.420876', 'step': 19848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:18.504480', 'step': 19848, 'epoch': 2}
{'type': 'loss', 'content': 0.05218900367617607, 'timestamp': '2025-10-02 00:46:18.520676', 'step': 19849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:18.624776', 'step': 19849, 'epoch': 2}
{'type': 'loss', 'content': 0.08044056594371796, 'timestamp': '2025-10-02 00:46:18.628811', 'step': 19850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:18.688825', 'step': 19850, 'epoch': 2}
{'type': 'loss', 'content': 0.029782572761178017, 'timestamp': '2025-10-02 00:46:18.692359', 'step': 19851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:18.761927', 'step': 19851, 'epoch': 2}
{'type': 'loss', 'content': 0.06886500120162964, 'timestamp': '2025-10-02 00:46:18.769037', 'step': 19852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:18.851276', 'step': 19852, 'epoch': 2}
{'type': 'loss', 'content': 0.04090694338083267, 'timestamp': '2025-10-02 00:46:18.861157', 'step': 19853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:18.955520', 'step': 19853, 'epoch': 2}
{'type': 'loss', 'content': 0.1405993551015854, 'timestamp': '2025-10-02 00:46:18.960407', 'step': 19854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:19.029699', 'step': 19854, 'epoch': 2}
{'type': 'loss', 'content': 0.0389077365398407, 'timestamp': '2025-10-02 00:46:19.034772', 'step': 19855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:19.116928', 'step': 19855, 'epoch': 2}
{'type': 'loss', 'content': 0.08896531909704208, 'timestamp': '2025-10-02 00:46:19.125200', 'step': 19856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:19.208786', 'step': 19856, 'epoch': 2}
{'type': 'loss', 'content': 0.012878961861133575, 'timestamp': '2025-10-02 00:46:19.211975', 'step': 19857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:19.285083', 'step': 19857, 'epoch': 2}
{'type': 'loss', 'content': 0.10215575993061066, 'timestamp': '2025-10-02 00:46:19.300350', 'step': 19858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:19.361227', 'step': 19858, 'epoch': 2}
{'type': 'loss', 'content': 0.08257945626974106, 'timestamp': '2025-10-02 00:46:19.376690', 'step': 19859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:19.441720', 'step': 19859, 'epoch': 2}
{'type': 'loss', 'content': 0.016059178858995438, 'timestamp': '2025-10-02 00:46:19.462719', 'step': 19860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:19.523264', 'step': 19860, 'epoch': 2}
{'type': 'loss', 'content': 0.04889778792858124, 'timestamp': '2025-10-02 00:46:19.533482', 'step': 19861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:19.605793', 'step': 19861, 'epoch': 2}
{'type': 'loss', 'content': 0.07161463797092438, 'timestamp': '2025-10-02 00:46:19.612354', 'step': 19862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:19.685623', 'step': 19862, 'epoch': 2}
{'type': 'loss', 'content': 0.02139280177652836, 'timestamp': '2025-10-02 00:46:19.707287', 'step': 19863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:19.776299', 'step': 19863, 'epoch': 2}
{'type': 'loss', 'content': 0.025037856772542, 'timestamp': '2025-10-02 00:46:19.800284', 'step': 19864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:19.880753', 'step': 19864, 'epoch': 2}
{'type': 'loss', 'content': 0.026272831484675407, 'timestamp': '2025-10-02 00:46:19.894892', 'step': 19865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:19.957683', 'step': 19865, 'epoch': 2}
{'type': 'loss', 'content': 0.1380617320537567, 'timestamp': '2025-10-02 00:46:19.961488', 'step': 19866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:20.035815', 'step': 19866, 'epoch': 2}
{'type': 'loss', 'content': 0.13315385580062866, 'timestamp': '2025-10-02 00:46:20.047118', 'step': 19867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:20.131086', 'step': 19867, 'epoch': 2}
{'type': 'loss', 'content': 0.07984505593776703, 'timestamp': '2025-10-02 00:46:20.148905', 'step': 19868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:20.234391', 'step': 19868, 'epoch': 2}
{'type': 'loss', 'content': 0.05859232321381569, 'timestamp': '2025-10-02 00:46:20.238518', 'step': 19869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:20.308271', 'step': 19869, 'epoch': 2}
{'type': 'loss', 'content': 0.11233781278133392, 'timestamp': '2025-10-02 00:46:20.321239', 'step': 19870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:20.402058', 'step': 19870, 'epoch': 2}
{'type': 'loss', 'content': 0.20552873611450195, 'timestamp': '2025-10-02 00:46:20.405037', 'step': 19871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:20.468959', 'step': 19871, 'epoch': 2}
{'type': 'loss', 'content': 0.06599947065114975, 'timestamp': '2025-10-02 00:46:20.476137', 'step': 19872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:20.554873', 'step': 19872, 'epoch': 2}
{'type': 'loss', 'content': 0.03559904173016548, 'timestamp': '2025-10-02 00:46:20.561594', 'step': 19873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:20.624304', 'step': 19873, 'epoch': 2}
{'type': 'loss', 'content': 0.0032742463517934084, 'timestamp': '2025-10-02 00:46:20.640343', 'step': 19874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:20.708729', 'step': 19874, 'epoch': 2}
{'type': 'loss', 'content': 0.044027991592884064, 'timestamp': '2025-10-02 00:46:20.712432', 'step': 19875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:20.780701', 'step': 19875, 'epoch': 2}
{'type': 'loss', 'content': 0.01726391725242138, 'timestamp': '2025-10-02 00:46:20.791395', 'step': 19876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:20.853135', 'step': 19876, 'epoch': 2}
{'type': 'loss', 'content': 0.03382733836770058, 'timestamp': '2025-10-02 00:46:20.857051', 'step': 19877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:20.920095', 'step': 19877, 'epoch': 2}
{'type': 'loss', 'content': 0.10554513335227966, 'timestamp': '2025-10-02 00:46:20.924185', 'step': 19878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:20.982525', 'step': 19878, 'epoch': 2}
{'type': 'loss', 'content': 0.07444342225790024, 'timestamp': '2025-10-02 00:46:20.985777', 'step': 19879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:21.054771', 'step': 19879, 'epoch': 2}
{'type': 'loss', 'content': 0.06386774033308029, 'timestamp': '2025-10-02 00:46:21.070993', 'step': 19880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:21.139784', 'step': 19880, 'epoch': 2}
{'type': 'loss', 'content': 0.15171828866004944, 'timestamp': '2025-10-02 00:46:21.150449', 'step': 19881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:46:21.237463', 'step': 19881, 'epoch': 2}
{'type': 'loss', 'content': 0.009281517937779427, 'timestamp': '2025-10-02 00:46:21.248362', 'step': 19882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:21.321526', 'step': 19882, 'epoch': 2}
{'type': 'loss', 'content': 0.035901688039302826, 'timestamp': '2025-10-02 00:46:21.333401', 'step': 19883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:21.406158', 'step': 19883, 'epoch': 2}
{'type': 'loss', 'content': 0.07272946834564209, 'timestamp': '2025-10-02 00:46:21.412492', 'step': 19884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:21.477139', 'step': 19884, 'epoch': 2}
{'type': 'loss', 'content': 0.012245486490428448, 'timestamp': '2025-10-02 00:46:21.487380', 'step': 19885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:21.553976', 'step': 19885, 'epoch': 2}
{'type': 'loss', 'content': 0.016930675134062767, 'timestamp': '2025-10-02 00:46:21.564163', 'step': 19886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:21.633390', 'step': 19886, 'epoch': 2}
{'type': 'loss', 'content': 0.02766399458050728, 'timestamp': '2025-10-02 00:46:21.643543', 'step': 19887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:21.712390', 'step': 19887, 'epoch': 2}
{'type': 'loss', 'content': 0.17805898189544678, 'timestamp': '2025-10-02 00:46:21.726146', 'step': 19888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:21.789130', 'step': 19888, 'epoch': 2}
{'type': 'loss', 'content': 0.009507527574896812, 'timestamp': '2025-10-02 00:46:21.801737', 'step': 19889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:21.876901', 'step': 19889, 'epoch': 2}
{'type': 'loss', 'content': 0.06271133571863174, 'timestamp': '2025-10-02 00:46:21.886367', 'step': 19890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:21.959076', 'step': 19890, 'epoch': 2}
{'type': 'loss', 'content': 0.013237709179520607, 'timestamp': '2025-10-02 00:46:21.969515', 'step': 19891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:22.029538', 'step': 19891, 'epoch': 2}
{'type': 'loss', 'content': 0.06203042343258858, 'timestamp': '2025-10-02 00:46:22.041055', 'step': 19892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:22.109525', 'step': 19892, 'epoch': 2}
{'type': 'loss', 'content': 0.07147393375635147, 'timestamp': '2025-10-02 00:46:22.115023', 'step': 19893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:22.178785', 'step': 19893, 'epoch': 2}
{'type': 'loss', 'content': 0.12211548537015915, 'timestamp': '2025-10-02 00:46:22.183106', 'step': 19894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:22.246002', 'step': 19894, 'epoch': 2}
{'type': 'loss', 'content': 0.005300292745232582, 'timestamp': '2025-10-02 00:46:22.256186', 'step': 19895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:22.328053', 'step': 19895, 'epoch': 2}
{'type': 'loss', 'content': 0.04074699059128761, 'timestamp': '2025-10-02 00:46:22.338267', 'step': 19896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:22.400689', 'step': 19896, 'epoch': 2}
{'type': 'loss', 'content': 0.03752341866493225, 'timestamp': '2025-10-02 00:46:22.413580', 'step': 19897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:22.486541', 'step': 19897, 'epoch': 2}
{'type': 'loss', 'content': 0.13438022136688232, 'timestamp': '2025-10-02 00:46:22.490610', 'step': 19898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:22.564818', 'step': 19898, 'epoch': 2}
{'type': 'loss', 'content': 0.027655059471726418, 'timestamp': '2025-10-02 00:46:22.574978', 'step': 19899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:22.640428', 'step': 19899, 'epoch': 2}
{'type': 'loss', 'content': 0.027868904173374176, 'timestamp': '2025-10-02 00:46:22.647988', 'step': 19900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:22.708310', 'step': 19900, 'epoch': 2}
{'type': 'loss', 'content': 0.070599265396595, 'timestamp': '2025-10-02 00:46:22.711335', 'step': 19901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:22.779058', 'step': 19901, 'epoch': 2}
{'type': 'loss', 'content': 0.06842789798974991, 'timestamp': '2025-10-02 00:46:22.786234', 'step': 19902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:22.847255', 'step': 19902, 'epoch': 2}
{'type': 'loss', 'content': 0.029844924807548523, 'timestamp': '2025-10-02 00:46:22.853241', 'step': 19903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:22.911817', 'step': 19903, 'epoch': 2}
{'type': 'loss', 'content': 0.08329835534095764, 'timestamp': '2025-10-02 00:46:22.919151', 'step': 19904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:22.979611', 'step': 19904, 'epoch': 2}
{'type': 'loss', 'content': 0.033329252153635025, 'timestamp': '2025-10-02 00:46:22.990522', 'step': 19905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:23.053907', 'step': 19905, 'epoch': 2}
{'type': 'loss', 'content': 0.03514806926250458, 'timestamp': '2025-10-02 00:46:23.061235', 'step': 19906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:23.134643', 'step': 19906, 'epoch': 2}
{'type': 'loss', 'content': 0.015670889988541603, 'timestamp': '2025-10-02 00:46:23.144821', 'step': 19907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:23.201342', 'step': 19907, 'epoch': 2}
{'type': 'loss', 'content': 0.1084897592663765, 'timestamp': '2025-10-02 00:46:23.208850', 'step': 19908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:23.265655', 'step': 19908, 'epoch': 2}
{'type': 'loss', 'content': 0.062179069966077805, 'timestamp': '2025-10-02 00:46:23.274231', 'step': 19909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:23.333074', 'step': 19909, 'epoch': 2}
{'type': 'loss', 'content': 0.13284125924110413, 'timestamp': '2025-10-02 00:46:23.336943', 'step': 19910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:23.402174', 'step': 19910, 'epoch': 2}
{'type': 'loss', 'content': 0.015837576240301132, 'timestamp': '2025-10-02 00:46:23.412603', 'step': 19911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:23.478337', 'step': 19911, 'epoch': 2}
{'type': 'loss', 'content': 0.1098037138581276, 'timestamp': '2025-10-02 00:46:23.491505', 'step': 19912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:46:23.570197', 'step': 19912, 'epoch': 2}
{'type': 'loss', 'content': 0.009865688160061836, 'timestamp': '2025-10-02 00:46:23.581673', 'step': 19913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:23.645611', 'step': 19913, 'epoch': 2}
{'type': 'loss', 'content': 0.08621294796466827, 'timestamp': '2025-10-02 00:46:23.656621', 'step': 19914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:23.730890', 'step': 19914, 'epoch': 2}
{'type': 'loss', 'content': 0.04832233488559723, 'timestamp': '2025-10-02 00:46:23.737997', 'step': 19915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:46:23.819662', 'step': 19915, 'epoch': 2}
{'type': 'loss', 'content': 0.017005780711770058, 'timestamp': '2025-10-02 00:46:23.831086', 'step': 19916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:23.896913', 'step': 19916, 'epoch': 2}
{'type': 'loss', 'content': 0.04162368178367615, 'timestamp': '2025-10-02 00:46:23.906702', 'step': 19917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:23.976856', 'step': 19917, 'epoch': 2}
{'type': 'loss', 'content': 0.035398002713918686, 'timestamp': '2025-10-02 00:46:23.986524', 'step': 19918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:24.051396', 'step': 19918, 'epoch': 2}
{'type': 'loss', 'content': 0.03560607135295868, 'timestamp': '2025-10-02 00:46:24.058593', 'step': 19919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:24.126020', 'step': 19919, 'epoch': 2}
{'type': 'loss', 'content': 0.086624376475811, 'timestamp': '2025-10-02 00:46:24.135582', 'step': 19920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:24.198147', 'step': 19920, 'epoch': 2}
{'type': 'loss', 'content': 0.0471525602042675, 'timestamp': '2025-10-02 00:46:24.202399', 'step': 19921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:24.266383', 'step': 19921, 'epoch': 2}
{'type': 'loss', 'content': 0.028904229402542114, 'timestamp': '2025-10-02 00:46:24.272050', 'step': 19922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:24.339959', 'step': 19922, 'epoch': 2}
{'type': 'loss', 'content': 0.07384052872657776, 'timestamp': '2025-10-02 00:46:24.343117', 'step': 19923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:24.422272', 'step': 19923, 'epoch': 2}
{'type': 'loss', 'content': 0.04167937859892845, 'timestamp': '2025-10-02 00:46:24.432830', 'step': 19924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:24.502379', 'step': 19924, 'epoch': 2}
{'type': 'loss', 'content': 0.12443909794092178, 'timestamp': '2025-10-02 00:46:24.505601', 'step': 19925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:24.578299', 'step': 19925, 'epoch': 2}
{'type': 'loss', 'content': 0.06864183396100998, 'timestamp': '2025-10-02 00:46:24.585620', 'step': 19926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:24.657379', 'step': 19926, 'epoch': 2}
{'type': 'loss', 'content': 0.0498993918299675, 'timestamp': '2025-10-02 00:46:24.666884', 'step': 19927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:46:24.732522', 'step': 19927, 'epoch': 2}
{'type': 'loss', 'content': 0.013367777690291405, 'timestamp': '2025-10-02 00:46:24.748047', 'step': 19928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:46:24.820493', 'step': 19928, 'epoch': 2}
{'type': 'loss', 'content': 0.06325248628854752, 'timestamp': '2025-10-02 00:46:24.829642', 'step': 19929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:24.890029', 'step': 19929, 'epoch': 2}
{'type': 'loss', 'content': 0.1817125827074051, 'timestamp': '2025-10-02 00:46:24.899440', 'step': 19930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:24.962181', 'step': 19930, 'epoch': 2}
{'type': 'loss', 'content': 0.03422628343105316, 'timestamp': '2025-10-02 00:46:24.971703', 'step': 19931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:25.033578', 'step': 19931, 'epoch': 2}
{'type': 'loss', 'content': 0.06796551495790482, 'timestamp': '2025-10-02 00:46:25.040070', 'step': 19932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:25.098195', 'step': 19932, 'epoch': 2}
{'type': 'loss', 'content': 0.036073118448257446, 'timestamp': '2025-10-02 00:46:25.102063', 'step': 19933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:25.163806', 'step': 19933, 'epoch': 2}
{'type': 'loss', 'content': 0.04421994090080261, 'timestamp': '2025-10-02 00:46:25.176186', 'step': 19934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:25.249896', 'step': 19934, 'epoch': 2}
{'type': 'loss', 'content': 0.07503335922956467, 'timestamp': '2025-10-02 00:46:25.252783', 'step': 19935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:25.314879', 'step': 19935, 'epoch': 2}
{'type': 'loss', 'content': 0.09404899924993515, 'timestamp': '2025-10-02 00:46:25.327337', 'step': 19936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:25.388695', 'step': 19936, 'epoch': 2}
{'type': 'loss', 'content': 0.07244567573070526, 'timestamp': '2025-10-02 00:46:25.396561', 'step': 19937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:25.462956', 'step': 19937, 'epoch': 2}
{'type': 'loss', 'content': 0.09904936701059341, 'timestamp': '2025-10-02 00:46:25.466339', 'step': 19938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:25.535628', 'step': 19938, 'epoch': 2}
{'type': 'loss', 'content': 0.11791399866342545, 'timestamp': '2025-10-02 00:46:25.539266', 'step': 19939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:25.604141', 'step': 19939, 'epoch': 2}
{'type': 'loss', 'content': 0.13979481160640717, 'timestamp': '2025-10-02 00:46:25.617213', 'step': 19940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:25.677877', 'step': 19940, 'epoch': 2}
{'type': 'loss', 'content': 0.06784146279096603, 'timestamp': '2025-10-02 00:46:25.682517', 'step': 19941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:25.740445', 'step': 19941, 'epoch': 2}
{'type': 'loss', 'content': 0.06684114784002304, 'timestamp': '2025-10-02 00:46:25.743912', 'step': 19942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:25.811285', 'step': 19942, 'epoch': 2}
{'type': 'loss', 'content': 0.09667986631393433, 'timestamp': '2025-10-02 00:46:25.813960', 'step': 19943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:25.881210', 'step': 19943, 'epoch': 2}
{'type': 'loss', 'content': 0.02192879281938076, 'timestamp': '2025-10-02 00:46:25.893923', 'step': 19944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:25.965433', 'step': 19944, 'epoch': 2}
{'type': 'loss', 'content': 0.09021134674549103, 'timestamp': '2025-10-02 00:46:25.976567', 'step': 19945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:26.044320', 'step': 19945, 'epoch': 2}
{'type': 'loss', 'content': 0.026267021894454956, 'timestamp': '2025-10-02 00:46:26.052896', 'step': 19946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:26.117996', 'step': 19946, 'epoch': 2}
{'type': 'loss', 'content': 0.08752956986427307, 'timestamp': '2025-10-02 00:46:26.121685', 'step': 19947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:26.195491', 'step': 19947, 'epoch': 2}
{'type': 'loss', 'content': 0.06631980836391449, 'timestamp': '2025-10-02 00:46:26.202321', 'step': 19948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:26.280710', 'step': 19948, 'epoch': 2}
{'type': 'loss', 'content': 0.07366713136434555, 'timestamp': '2025-10-02 00:46:26.286666', 'step': 19949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:26.349253', 'step': 19949, 'epoch': 2}
{'type': 'loss', 'content': 0.028443995863199234, 'timestamp': '2025-10-02 00:46:26.358611', 'step': 19950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:26.421953', 'step': 19950, 'epoch': 2}
{'type': 'loss', 'content': 0.06555907428264618, 'timestamp': '2025-10-02 00:46:26.427815', 'step': 19951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:26.485056', 'step': 19951, 'epoch': 2}
{'type': 'loss', 'content': 0.056251268833875656, 'timestamp': '2025-10-02 00:46:26.491924', 'step': 19952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:26.557334', 'step': 19952, 'epoch': 2}
{'type': 'loss', 'content': 0.006661106366664171, 'timestamp': '2025-10-02 00:46:26.564930', 'step': 19953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:26.629503', 'step': 19953, 'epoch': 2}
{'type': 'loss', 'content': 0.029598940163850784, 'timestamp': '2025-10-02 00:46:26.637036', 'step': 19954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:26.699282', 'step': 19954, 'epoch': 2}
{'type': 'loss', 'content': 0.07241357862949371, 'timestamp': '2025-10-02 00:46:26.712676', 'step': 19955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:26.782105', 'step': 19955, 'epoch': 2}
{'type': 'loss', 'content': 0.03654635325074196, 'timestamp': '2025-10-02 00:46:26.793061', 'step': 19956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:26.864366', 'step': 19956, 'epoch': 2}
{'type': 'loss', 'content': 0.08156090974807739, 'timestamp': '2025-10-02 00:46:26.868151', 'step': 19957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:46:26.954288', 'step': 19957, 'epoch': 2}
{'type': 'loss', 'content': 0.011748265475034714, 'timestamp': '2025-10-02 00:46:26.969163', 'step': 19958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:27.053197', 'step': 19958, 'epoch': 2}
{'type': 'loss', 'content': 0.026385221630334854, 'timestamp': '2025-10-02 00:46:27.058496', 'step': 19959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:27.147367', 'step': 19959, 'epoch': 2}
{'type': 'loss', 'content': 0.20258444547653198, 'timestamp': '2025-10-02 00:46:27.153856', 'step': 19960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:27.215660', 'step': 19960, 'epoch': 2}
{'type': 'loss', 'content': 0.05307931825518608, 'timestamp': '2025-10-02 00:46:27.221447', 'step': 19961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:27.279726', 'step': 19961, 'epoch': 2}
{'type': 'loss', 'content': 0.03778764232993126, 'timestamp': '2025-10-02 00:46:27.287110', 'step': 19962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:27.350563', 'step': 19962, 'epoch': 2}
{'type': 'loss', 'content': 0.10730450600385666, 'timestamp': '2025-10-02 00:46:27.354606', 'step': 19963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:27.412727', 'step': 19963, 'epoch': 2}
{'type': 'loss', 'content': 0.020155103877186775, 'timestamp': '2025-10-02 00:46:27.419801', 'step': 19964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:27.474430', 'step': 19964, 'epoch': 2}
{'type': 'loss', 'content': 0.07025855034589767, 'timestamp': '2025-10-02 00:46:27.478340', 'step': 19965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:27.535749', 'step': 19965, 'epoch': 2}
{'type': 'loss', 'content': 0.052583735436201096, 'timestamp': '2025-10-02 00:46:27.538459', 'step': 19966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:27.614435', 'step': 19966, 'epoch': 2}
{'type': 'loss', 'content': 0.030554287135601044, 'timestamp': '2025-10-02 00:46:27.624893', 'step': 19967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:27.680905', 'step': 19967, 'epoch': 2}
{'type': 'loss', 'content': 0.12360923737287521, 'timestamp': '2025-10-02 00:46:27.687021', 'step': 19968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:27.756732', 'step': 19968, 'epoch': 2}
{'type': 'loss', 'content': 0.03338653966784477, 'timestamp': '2025-10-02 00:46:27.766569', 'step': 19969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:27.838364', 'step': 19969, 'epoch': 2}
{'type': 'loss', 'content': 0.10795629024505615, 'timestamp': '2025-10-02 00:46:27.841533', 'step': 19970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:27.902325', 'step': 19970, 'epoch': 2}
{'type': 'loss', 'content': 0.07489854842424393, 'timestamp': '2025-10-02 00:46:27.912150', 'step': 19971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:27.985983', 'step': 19971, 'epoch': 2}
{'type': 'loss', 'content': 0.0845915898680687, 'timestamp': '2025-10-02 00:46:27.996375', 'step': 19972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:28.051474', 'step': 19972, 'epoch': 2}
{'type': 'loss', 'content': 0.010442506521940231, 'timestamp': '2025-10-02 00:46:28.061083', 'step': 19973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:28.137088', 'step': 19973, 'epoch': 2}
{'type': 'loss', 'content': 0.039797935634851456, 'timestamp': '2025-10-02 00:46:28.147651', 'step': 19974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:28.217662', 'step': 19974, 'epoch': 2}
{'type': 'loss', 'content': 0.0709361881017685, 'timestamp': '2025-10-02 00:46:28.220892', 'step': 19975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:28.295381', 'step': 19975, 'epoch': 2}
{'type': 'loss', 'content': 0.029192764312028885, 'timestamp': '2025-10-02 00:46:28.311413', 'step': 19976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:28.386482', 'step': 19976, 'epoch': 2}
{'type': 'loss', 'content': 0.036494046449661255, 'timestamp': '2025-10-02 00:46:28.392426', 'step': 19977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:28.455805', 'step': 19977, 'epoch': 2}
{'type': 'loss', 'content': 0.10962359607219696, 'timestamp': '2025-10-02 00:46:28.461390', 'step': 19978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:28.520805', 'step': 19978, 'epoch': 2}
{'type': 'loss', 'content': 0.08091048151254654, 'timestamp': '2025-10-02 00:46:28.527873', 'step': 19979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:28.587322', 'step': 19979, 'epoch': 2}
{'type': 'loss', 'content': 0.03867253288626671, 'timestamp': '2025-10-02 00:46:28.597182', 'step': 19980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:28.655504', 'step': 19980, 'epoch': 2}
{'type': 'loss', 'content': 0.04735678434371948, 'timestamp': '2025-10-02 00:46:28.658178', 'step': 19981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:28.721825', 'step': 19981, 'epoch': 2}
{'type': 'loss', 'content': 0.03612835332751274, 'timestamp': '2025-10-02 00:46:28.729594', 'step': 19982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:28.796085', 'step': 19982, 'epoch': 2}
{'type': 'loss', 'content': 0.04848415404558182, 'timestamp': '2025-10-02 00:46:28.803502', 'step': 19983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:28.875270', 'step': 19983, 'epoch': 2}
{'type': 'loss', 'content': 0.02399926446378231, 'timestamp': '2025-10-02 00:46:28.886199', 'step': 19984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:28.947509', 'step': 19984, 'epoch': 2}
{'type': 'loss', 'content': 0.1116054505109787, 'timestamp': '2025-10-02 00:46:28.954527', 'step': 19985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:29.012676', 'step': 19985, 'epoch': 2}
{'type': 'loss', 'content': 0.13287833333015442, 'timestamp': '2025-10-02 00:46:29.016637', 'step': 19986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:29.085062', 'step': 19986, 'epoch': 2}
{'type': 'loss', 'content': 0.1201275959610939, 'timestamp': '2025-10-02 00:46:29.089304', 'step': 19987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:29.154653', 'step': 19987, 'epoch': 2}
{'type': 'loss', 'content': 0.03750152885913849, 'timestamp': '2025-10-02 00:46:29.163530', 'step': 19988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:29.226648', 'step': 19988, 'epoch': 2}
{'type': 'loss', 'content': 0.029199091717600822, 'timestamp': '2025-10-02 00:46:29.234352', 'step': 19989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:46:29.305692', 'step': 19989, 'epoch': 2}
{'type': 'loss', 'content': 0.011575302109122276, 'timestamp': '2025-10-02 00:46:29.317707', 'step': 19990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:46:29.397806', 'step': 19990, 'epoch': 2}
{'type': 'loss', 'content': 0.03950324282050133, 'timestamp': '2025-10-02 00:46:29.408486', 'step': 19991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:29.470500', 'step': 19991, 'epoch': 2}
{'type': 'loss', 'content': 0.030073706060647964, 'timestamp': '2025-10-02 00:46:29.482481', 'step': 19992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:29.548357', 'step': 19992, 'epoch': 2}
{'type': 'loss', 'content': 0.06109208986163139, 'timestamp': '2025-10-02 00:46:29.560662', 'step': 19993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:29.624469', 'step': 19993, 'epoch': 2}
{'type': 'loss', 'content': 0.06774916499853134, 'timestamp': '2025-10-02 00:46:29.635593', 'step': 19994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:29.705842', 'step': 19994, 'epoch': 2}
{'type': 'loss', 'content': 0.1035882830619812, 'timestamp': '2025-10-02 00:46:29.708322', 'step': 19995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:29.775543', 'step': 19995, 'epoch': 2}
{'type': 'loss', 'content': 0.03522820398211479, 'timestamp': '2025-10-02 00:46:29.782410', 'step': 19996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:29.838027', 'step': 19996, 'epoch': 2}
{'type': 'loss', 'content': 0.029215971007943153, 'timestamp': '2025-10-02 00:46:29.843925', 'step': 19997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:29.912521', 'step': 19997, 'epoch': 2}
{'type': 'loss', 'content': 0.11204509437084198, 'timestamp': '2025-10-02 00:46:29.915841', 'step': 19998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:29.972993', 'step': 19998, 'epoch': 2}
{'type': 'loss', 'content': 0.0028140225913375616, 'timestamp': '2025-10-02 00:46:29.980515', 'step': 19999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:30.041160', 'step': 19999, 'epoch': 2}
{'type': 'loss', 'content': 0.11909125745296478, 'timestamp': '2025-10-02 00:46:30.053283', 'step': 20000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 20000', 'timestamp': '2025-10-02 00:46:30.444672', 'step': 20000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:30.504963', 'step': 20000, 'epoch': 2}
{'type': 'loss', 'content': 0.14660830795764923, 'timestamp': '2025-10-02 00:46:30.508923', 'step': 20001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:30.578423', 'step': 20001, 'epoch': 2}
{'type': 'loss', 'content': 0.018536550924181938, 'timestamp': '2025-10-02 00:46:30.588622', 'step': 20002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:30.653287', 'step': 20002, 'epoch': 2}
{'type': 'loss', 'content': 0.03222137689590454, 'timestamp': '2025-10-02 00:46:30.663796', 'step': 20003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:30.727739', 'step': 20003, 'epoch': 2}
{'type': 'loss', 'content': 0.1253928393125534, 'timestamp': '2025-10-02 00:46:30.734834', 'step': 20004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:30.795707', 'step': 20004, 'epoch': 2}
{'type': 'loss', 'content': 0.09972196817398071, 'timestamp': '2025-10-02 00:46:30.805040', 'step': 20005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:30.868793', 'step': 20005, 'epoch': 2}
{'type': 'loss', 'content': 0.07326637208461761, 'timestamp': '2025-10-02 00:46:30.872348', 'step': 20006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:30.942920', 'step': 20006, 'epoch': 2}
{'type': 'loss', 'content': 0.06029842048883438, 'timestamp': '2025-10-02 00:46:30.952471', 'step': 20007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:31.026004', 'step': 20007, 'epoch': 2}
{'type': 'loss', 'content': 0.014886624179780483, 'timestamp': '2025-10-02 00:46:31.034541', 'step': 20008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:31.097768', 'step': 20008, 'epoch': 2}
{'type': 'loss', 'content': 0.01767590269446373, 'timestamp': '2025-10-02 00:46:31.103650', 'step': 20009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:31.168353', 'step': 20009, 'epoch': 2}
{'type': 'loss', 'content': 0.062210313975811005, 'timestamp': '2025-10-02 00:46:31.180231', 'step': 20010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:31.252378', 'step': 20010, 'epoch': 2}
{'type': 'loss', 'content': 0.05112314596772194, 'timestamp': '2025-10-02 00:46:31.260446', 'step': 20011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:31.321858', 'step': 20011, 'epoch': 2}
{'type': 'loss', 'content': 0.06641260534524918, 'timestamp': '2025-10-02 00:46:31.328897', 'step': 20012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:31.383820', 'step': 20012, 'epoch': 2}
{'type': 'loss', 'content': 0.035683996975421906, 'timestamp': '2025-10-02 00:46:31.386466', 'step': 20013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:31.441105', 'step': 20013, 'epoch': 2}
{'type': 'loss', 'content': 0.0767701268196106, 'timestamp': '2025-10-02 00:46:31.443642', 'step': 20014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:31.498176', 'step': 20014, 'epoch': 2}
{'type': 'loss', 'content': 0.023227812722325325, 'timestamp': '2025-10-02 00:46:31.507517', 'step': 20015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:31.563676', 'step': 20015, 'epoch': 2}
{'type': 'loss', 'content': 0.05027100071310997, 'timestamp': '2025-10-02 00:46:31.571114', 'step': 20016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:31.625660', 'step': 20016, 'epoch': 2}
{'type': 'loss', 'content': 0.1411849707365036, 'timestamp': '2025-10-02 00:46:31.628341', 'step': 20017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:31.686340', 'step': 20017, 'epoch': 2}
{'type': 'loss', 'content': 0.027216440066695213, 'timestamp': '2025-10-02 00:46:31.689347', 'step': 20018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:31.745519', 'step': 20018, 'epoch': 2}
{'type': 'loss', 'content': 0.018983932211995125, 'timestamp': '2025-10-02 00:46:31.751299', 'step': 20019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:31.807436', 'step': 20019, 'epoch': 2}
{'type': 'loss', 'content': 0.09931151568889618, 'timestamp': '2025-10-02 00:46:31.813739', 'step': 20020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:31.868103', 'step': 20020, 'epoch': 2}
{'type': 'loss', 'content': 0.017529230564832687, 'timestamp': '2025-10-02 00:46:31.870816', 'step': 20021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:31.927401', 'step': 20021, 'epoch': 2}
{'type': 'loss', 'content': 0.008534524589776993, 'timestamp': '2025-10-02 00:46:31.936925', 'step': 20022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:31.992321', 'step': 20022, 'epoch': 2}
{'type': 'loss', 'content': 0.0407952144742012, 'timestamp': '2025-10-02 00:46:31.995080', 'step': 20023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:46:32.065362', 'step': 20023, 'epoch': 2}
{'type': 'loss', 'content': 0.05910247936844826, 'timestamp': '2025-10-02 00:46:32.078599', 'step': 20024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:32.133318', 'step': 20024, 'epoch': 2}
{'type': 'loss', 'content': 0.15509022772312164, 'timestamp': '2025-10-02 00:46:32.136154', 'step': 20025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:32.190357', 'step': 20025, 'epoch': 2}
{'type': 'loss', 'content': 0.017567791044712067, 'timestamp': '2025-10-02 00:46:32.193444', 'step': 20026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:32.248666', 'step': 20026, 'epoch': 2}
{'type': 'loss', 'content': 0.0391170009970665, 'timestamp': '2025-10-02 00:46:32.251472', 'step': 20027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:32.305889', 'step': 20027, 'epoch': 2}
{'type': 'loss', 'content': 0.04488731548190117, 'timestamp': '2025-10-02 00:46:32.312347', 'step': 20028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:32.367237', 'step': 20028, 'epoch': 2}
{'type': 'loss', 'content': 0.05363675579428673, 'timestamp': '2025-10-02 00:46:32.369730', 'step': 20029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:32.423652', 'step': 20029, 'epoch': 2}
{'type': 'loss', 'content': 0.07894445955753326, 'timestamp': '2025-10-02 00:46:32.426763', 'step': 20030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:46:32.488724', 'step': 20030, 'epoch': 2}
{'type': 'loss', 'content': 0.03342599421739578, 'timestamp': '2025-10-02 00:46:32.499343', 'step': 20031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:32.554540', 'step': 20031, 'epoch': 2}
{'type': 'loss', 'content': 0.07196727395057678, 'timestamp': '2025-10-02 00:46:32.561175', 'step': 20032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:32.616129', 'step': 20032, 'epoch': 2}
{'type': 'loss', 'content': 0.0394665002822876, 'timestamp': '2025-10-02 00:46:32.626316', 'step': 20033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:32.681304', 'step': 20033, 'epoch': 2}
{'type': 'loss', 'content': 0.10666166245937347, 'timestamp': '2025-10-02 00:46:32.684837', 'step': 20034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:32.739609', 'step': 20034, 'epoch': 2}
{'type': 'loss', 'content': 0.052769917994737625, 'timestamp': '2025-10-02 00:46:32.742480', 'step': 20035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:32.796758', 'step': 20035, 'epoch': 2}
{'type': 'loss', 'content': 0.13759419322013855, 'timestamp': '2025-10-02 00:46:32.803997', 'step': 20036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:32.858641', 'step': 20036, 'epoch': 2}
{'type': 'loss', 'content': 0.023999294266104698, 'timestamp': '2025-10-02 00:46:32.861143', 'step': 20037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:32.915499', 'step': 20037, 'epoch': 2}
{'type': 'loss', 'content': 0.131615549325943, 'timestamp': '2025-10-02 00:46:32.917972', 'step': 20038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:32.972686', 'step': 20038, 'epoch': 2}
{'type': 'loss', 'content': 0.004222738090902567, 'timestamp': '2025-10-02 00:46:32.975372', 'step': 20039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:33.029991', 'step': 20039, 'epoch': 2}
{'type': 'loss', 'content': 0.038812872022390366, 'timestamp': '2025-10-02 00:46:33.036495', 'step': 20040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:33.091670', 'step': 20040, 'epoch': 2}
{'type': 'loss', 'content': 0.21468886733055115, 'timestamp': '2025-10-02 00:46:33.096016', 'step': 20041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:33.153909', 'step': 20041, 'epoch': 2}
{'type': 'loss', 'content': 0.057926908135414124, 'timestamp': '2025-10-02 00:46:33.159023', 'step': 20042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:46:33.230820', 'step': 20042, 'epoch': 2}
{'type': 'loss', 'content': 0.025265542790293694, 'timestamp': '2025-10-02 00:46:33.243146', 'step': 20043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:33.300040', 'step': 20043, 'epoch': 2}
{'type': 'loss', 'content': 0.002024095505475998, 'timestamp': '2025-10-02 00:46:33.308329', 'step': 20044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:33.362743', 'step': 20044, 'epoch': 2}
{'type': 'loss', 'content': 0.06384748220443726, 'timestamp': '2025-10-02 00:46:33.368694', 'step': 20045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:46:33.434110', 'step': 20045, 'epoch': 2}
{'type': 'loss', 'content': 0.026422861963510513, 'timestamp': '2025-10-02 00:46:33.444726', 'step': 20046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:33.501748', 'step': 20046, 'epoch': 2}
{'type': 'loss', 'content': 0.02389374189078808, 'timestamp': '2025-10-02 00:46:33.506085', 'step': 20047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:33.564342', 'step': 20047, 'epoch': 2}
{'type': 'loss', 'content': 0.10367613285779953, 'timestamp': '2025-10-02 00:46:33.571437', 'step': 20048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:46:33.634715', 'step': 20048, 'epoch': 2}
{'type': 'loss', 'content': 0.025660375133156776, 'timestamp': '2025-10-02 00:46:33.646516', 'step': 20049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:33.703396', 'step': 20049, 'epoch': 2}
{'type': 'loss', 'content': 0.042850296944379807, 'timestamp': '2025-10-02 00:46:33.710935', 'step': 20050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:33.767627', 'step': 20050, 'epoch': 2}
{'type': 'loss', 'content': 0.026381060481071472, 'timestamp': '2025-10-02 00:46:33.770139', 'step': 20051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:33.824390', 'step': 20051, 'epoch': 2}
{'type': 'loss', 'content': 0.2064613401889801, 'timestamp': '2025-10-02 00:46:33.831564', 'step': 20052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:33.888058', 'step': 20052, 'epoch': 2}
{'type': 'loss', 'content': 0.05771423131227493, 'timestamp': '2025-10-02 00:46:33.890557', 'step': 20053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:33.947777', 'step': 20053, 'epoch': 2}
{'type': 'loss', 'content': 0.014042037539184093, 'timestamp': '2025-10-02 00:46:33.950386', 'step': 20054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:34.007322', 'step': 20054, 'epoch': 2}
{'type': 'loss', 'content': 0.019629500806331635, 'timestamp': '2025-10-02 00:46:34.010062', 'step': 20055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:34.067240', 'step': 20055, 'epoch': 2}
{'type': 'loss', 'content': 0.044807322323322296, 'timestamp': '2025-10-02 00:46:34.073918', 'step': 20056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:34.129309', 'step': 20056, 'epoch': 2}
{'type': 'loss', 'content': 0.029080456122756004, 'timestamp': '2025-10-02 00:46:34.135311', 'step': 20057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:34.192417', 'step': 20057, 'epoch': 2}
{'type': 'loss', 'content': 0.007106616627424955, 'timestamp': '2025-10-02 00:46:34.201763', 'step': 20058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:34.258281', 'step': 20058, 'epoch': 2}
{'type': 'loss', 'content': 0.012093160301446915, 'timestamp': '2025-10-02 00:46:34.265719', 'step': 20059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:34.322188', 'step': 20059, 'epoch': 2}
{'type': 'loss', 'content': 0.003084215335547924, 'timestamp': '2025-10-02 00:46:34.328638', 'step': 20060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:34.384834', 'step': 20060, 'epoch': 2}
{'type': 'loss', 'content': 0.0712134912610054, 'timestamp': '2025-10-02 00:46:34.387231', 'step': 20061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:34.442254', 'step': 20061, 'epoch': 2}
{'type': 'loss', 'content': 0.1557496339082718, 'timestamp': '2025-10-02 00:46:34.445596', 'step': 20062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:34.503531', 'step': 20062, 'epoch': 2}
{'type': 'loss', 'content': 0.09166552126407623, 'timestamp': '2025-10-02 00:46:34.507237', 'step': 20063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:34.564341', 'step': 20063, 'epoch': 2}
{'type': 'loss', 'content': 0.018315061926841736, 'timestamp': '2025-10-02 00:46:34.571535', 'step': 20064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:34.628085', 'step': 20064, 'epoch': 2}
{'type': 'loss', 'content': 0.06420978158712387, 'timestamp': '2025-10-02 00:46:34.631116', 'step': 20065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:34.687876', 'step': 20065, 'epoch': 2}
{'type': 'loss', 'content': 0.039990734308958054, 'timestamp': '2025-10-02 00:46:34.691683', 'step': 20066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:34.748149', 'step': 20066, 'epoch': 2}
{'type': 'loss', 'content': 0.07638479024171829, 'timestamp': '2025-10-02 00:46:34.757473', 'step': 20067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:34.813278', 'step': 20067, 'epoch': 2}
{'type': 'loss', 'content': 0.004501301795244217, 'timestamp': '2025-10-02 00:46:34.819577', 'step': 20068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:34.876320', 'step': 20068, 'epoch': 2}
{'type': 'loss', 'content': 0.12344131618738174, 'timestamp': '2025-10-02 00:46:34.880870', 'step': 20069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:34.946689', 'step': 20069, 'epoch': 2}
{'type': 'loss', 'content': 0.010665628127753735, 'timestamp': '2025-10-02 00:46:34.957141', 'step': 20070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:35.014527', 'step': 20070, 'epoch': 2}
{'type': 'loss', 'content': 0.05426434427499771, 'timestamp': '2025-10-02 00:46:35.018484', 'step': 20071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:35.076834', 'step': 20071, 'epoch': 2}
{'type': 'loss', 'content': 0.09112785756587982, 'timestamp': '2025-10-02 00:46:35.083246', 'step': 20072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:35.140607', 'step': 20072, 'epoch': 2}
{'type': 'loss', 'content': 0.07198129594326019, 'timestamp': '2025-10-02 00:46:35.143315', 'step': 20073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:35.198673', 'step': 20073, 'epoch': 2}
{'type': 'loss', 'content': 0.07075666636228561, 'timestamp': '2025-10-02 00:46:35.202696', 'step': 20074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:35.260331', 'step': 20074, 'epoch': 2}
{'type': 'loss', 'content': 0.02457057684659958, 'timestamp': '2025-10-02 00:46:35.262851', 'step': 20075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:35.317900', 'step': 20075, 'epoch': 2}
{'type': 'loss', 'content': 0.0356454961001873, 'timestamp': '2025-10-02 00:46:35.325257', 'step': 20076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:35.383497', 'step': 20076, 'epoch': 2}
{'type': 'loss', 'content': 0.05639303848147392, 'timestamp': '2025-10-02 00:46:35.391020', 'step': 20077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:35.449089', 'step': 20077, 'epoch': 2}
{'type': 'loss', 'content': 0.011640393175184727, 'timestamp': '2025-10-02 00:46:35.451437', 'step': 20078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:35.506617', 'step': 20078, 'epoch': 2}
{'type': 'loss', 'content': 0.021435922011733055, 'timestamp': '2025-10-02 00:46:35.516143', 'step': 20079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:35.570879', 'step': 20079, 'epoch': 2}
{'type': 'loss', 'content': 0.08581673353910446, 'timestamp': '2025-10-02 00:46:35.576957', 'step': 20080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:35.631424', 'step': 20080, 'epoch': 2}
{'type': 'loss', 'content': 0.012629340402781963, 'timestamp': '2025-10-02 00:46:35.639040', 'step': 20081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:35.694740', 'step': 20081, 'epoch': 2}
{'type': 'loss', 'content': 0.01951778307557106, 'timestamp': '2025-10-02 00:46:35.704103', 'step': 20082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:35.758738', 'step': 20082, 'epoch': 2}
{'type': 'loss', 'content': 0.1410781443119049, 'timestamp': '2025-10-02 00:46:35.761507', 'step': 20083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:35.816009', 'step': 20083, 'epoch': 2}
{'type': 'loss', 'content': 0.050585221499204636, 'timestamp': '2025-10-02 00:46:35.822601', 'step': 20084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:35.877438', 'step': 20084, 'epoch': 2}
{'type': 'loss', 'content': 0.08859604597091675, 'timestamp': '2025-10-02 00:46:35.880427', 'step': 20085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:35.936498', 'step': 20085, 'epoch': 2}
{'type': 'loss', 'content': 0.048343442380428314, 'timestamp': '2025-10-02 00:46:35.942521', 'step': 20086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:35.998997', 'step': 20086, 'epoch': 2}
{'type': 'loss', 'content': 0.027865558862686157, 'timestamp': '2025-10-02 00:46:36.004920', 'step': 20087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:36.059880', 'step': 20087, 'epoch': 2}
{'type': 'loss', 'content': 0.19818437099456787, 'timestamp': '2025-10-02 00:46:36.066376', 'step': 20088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:36.120435', 'step': 20088, 'epoch': 2}
{'type': 'loss', 'content': 0.07805316150188446, 'timestamp': '2025-10-02 00:46:36.123102', 'step': 20089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:36.177281', 'step': 20089, 'epoch': 2}
{'type': 'loss', 'content': 0.09693538397550583, 'timestamp': '2025-10-02 00:46:36.179938', 'step': 20090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:36.235413', 'step': 20090, 'epoch': 2}
{'type': 'loss', 'content': 0.06481321901082993, 'timestamp': '2025-10-02 00:46:36.241291', 'step': 20091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:46:36.313205', 'step': 20091, 'epoch': 2}
{'type': 'loss', 'content': 0.016494277864694595, 'timestamp': '2025-10-02 00:46:36.326571', 'step': 20092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:36.380493', 'step': 20092, 'epoch': 2}
{'type': 'loss', 'content': 0.07546694576740265, 'timestamp': '2025-10-02 00:46:36.383140', 'step': 20093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:36.443024', 'step': 20093, 'epoch': 2}
{'type': 'loss', 'content': 0.022218286991119385, 'timestamp': '2025-10-02 00:46:36.453227', 'step': 20094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:36.507908', 'step': 20094, 'epoch': 2}
{'type': 'loss', 'content': 0.04388783499598503, 'timestamp': '2025-10-02 00:46:36.510490', 'step': 20095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:36.564903', 'step': 20095, 'epoch': 2}
{'type': 'loss', 'content': 0.03769814223051071, 'timestamp': '2025-10-02 00:46:36.571756', 'step': 20096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:36.631470', 'step': 20096, 'epoch': 2}
{'type': 'loss', 'content': 0.044923245906829834, 'timestamp': '2025-10-02 00:46:36.642780', 'step': 20097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:36.698285', 'step': 20097, 'epoch': 2}
{'type': 'loss', 'content': 0.02515740878880024, 'timestamp': '2025-10-02 00:46:36.704135', 'step': 20098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:36.759204', 'step': 20098, 'epoch': 2}
{'type': 'loss', 'content': 0.005416407249867916, 'timestamp': '2025-10-02 00:46:36.762199', 'step': 20099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:36.816192', 'step': 20099, 'epoch': 2}
{'type': 'loss', 'content': 0.05688999220728874, 'timestamp': '2025-10-02 00:46:36.823176', 'step': 20100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:36.877320', 'step': 20100, 'epoch': 2}
{'type': 'loss', 'content': 0.044983576983213425, 'timestamp': '2025-10-02 00:46:36.883256', 'step': 20101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:36.939018', 'step': 20101, 'epoch': 2}
{'type': 'loss', 'content': 0.008569614961743355, 'timestamp': '2025-10-02 00:46:36.948381', 'step': 20102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:37.004758', 'step': 20102, 'epoch': 2}
{'type': 'loss', 'content': 0.03902042657136917, 'timestamp': '2025-10-02 00:46:37.007446', 'step': 20103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:37.061289', 'step': 20103, 'epoch': 2}
{'type': 'loss', 'content': 0.06469621509313583, 'timestamp': '2025-10-02 00:46:37.067541', 'step': 20104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:37.121371', 'step': 20104, 'epoch': 2}
{'type': 'loss', 'content': 0.017237283289432526, 'timestamp': '2025-10-02 00:46:37.129441', 'step': 20105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:37.191936', 'step': 20105, 'epoch': 2}
{'type': 'loss', 'content': 0.040701862424612045, 'timestamp': '2025-10-02 00:46:37.195062', 'step': 20106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:37.250075', 'step': 20106, 'epoch': 2}
{'type': 'loss', 'content': 0.054955966770648956, 'timestamp': '2025-10-02 00:46:37.253295', 'step': 20107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:37.308955', 'step': 20107, 'epoch': 2}
{'type': 'loss', 'content': 0.036273710429668427, 'timestamp': '2025-10-02 00:46:37.315185', 'step': 20108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:37.369185', 'step': 20108, 'epoch': 2}
{'type': 'loss', 'content': 0.051706839352846146, 'timestamp': '2025-10-02 00:46:37.376597', 'step': 20109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:37.430966', 'step': 20109, 'epoch': 2}
{'type': 'loss', 'content': 0.04639941081404686, 'timestamp': '2025-10-02 00:46:37.433795', 'step': 20110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:37.488332', 'step': 20110, 'epoch': 2}
{'type': 'loss', 'content': 0.05489989370107651, 'timestamp': '2025-10-02 00:46:37.490938', 'step': 20111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:37.545629', 'step': 20111, 'epoch': 2}
{'type': 'loss', 'content': 0.04566697031259537, 'timestamp': '2025-10-02 00:46:37.551781', 'step': 20112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:46:37.612309', 'step': 20112, 'epoch': 2}
{'type': 'loss', 'content': 0.010406811721622944, 'timestamp': '2025-10-02 00:46:37.623826', 'step': 20113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:37.678199', 'step': 20113, 'epoch': 2}
{'type': 'loss', 'content': 0.06518937647342682, 'timestamp': '2025-10-02 00:46:37.685677', 'step': 20114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:46:37.750380', 'step': 20114, 'epoch': 2}
{'type': 'loss', 'content': 0.0463903583586216, 'timestamp': '2025-10-02 00:46:37.761000', 'step': 20115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:37.816908', 'step': 20115, 'epoch': 2}
{'type': 'loss', 'content': 0.06852491199970245, 'timestamp': '2025-10-02 00:46:37.823628', 'step': 20116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:37.876502', 'step': 20116, 'epoch': 2}
{'type': 'loss', 'content': 0.1278899759054184, 'timestamp': '2025-10-02 00:46:37.879119', 'step': 20117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:37.938903', 'step': 20117, 'epoch': 2}
{'type': 'loss', 'content': 0.0020440896041691303, 'timestamp': '2025-10-02 00:46:37.949080', 'step': 20118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:38.003373', 'step': 20118, 'epoch': 2}
{'type': 'loss', 'content': 0.04828426241874695, 'timestamp': '2025-10-02 00:46:38.011002', 'step': 20119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:38.076718', 'step': 20119, 'epoch': 2}
{'type': 'loss', 'content': 0.016439298167824745, 'timestamp': '2025-10-02 00:46:38.087956', 'step': 20120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:38.142656', 'step': 20120, 'epoch': 2}
{'type': 'loss', 'content': 0.16198629140853882, 'timestamp': '2025-10-02 00:46:38.145529', 'step': 20121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:38.200359', 'step': 20121, 'epoch': 2}
{'type': 'loss', 'content': 0.06424196064472198, 'timestamp': '2025-10-02 00:46:38.203248', 'step': 20122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:38.257498', 'step': 20122, 'epoch': 2}
{'type': 'loss', 'content': 0.11377173662185669, 'timestamp': '2025-10-02 00:46:38.260229', 'step': 20123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:38.314946', 'step': 20123, 'epoch': 2}
{'type': 'loss', 'content': 0.02600017935037613, 'timestamp': '2025-10-02 00:46:38.320998', 'step': 20124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:38.374705', 'step': 20124, 'epoch': 2}
{'type': 'loss', 'content': 0.18634693324565887, 'timestamp': '2025-10-02 00:46:38.377233', 'step': 20125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:38.430855', 'step': 20125, 'epoch': 2}
{'type': 'loss', 'content': 0.0844503715634346, 'timestamp': '2025-10-02 00:46:38.433672', 'step': 20126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:38.488497', 'step': 20126, 'epoch': 2}
{'type': 'loss', 'content': 0.03672019764780998, 'timestamp': '2025-10-02 00:46:38.491363', 'step': 20127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:38.545821', 'step': 20127, 'epoch': 2}
{'type': 'loss', 'content': 0.03687950223684311, 'timestamp': '2025-10-02 00:46:38.554067', 'step': 20128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:38.608012', 'step': 20128, 'epoch': 2}
{'type': 'loss', 'content': 0.057411376386880875, 'timestamp': '2025-10-02 00:46:38.610461', 'step': 20129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:38.665066', 'step': 20129, 'epoch': 2}
{'type': 'loss', 'content': 0.07475883513689041, 'timestamp': '2025-10-02 00:46:38.667740', 'step': 20130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:38.723755', 'step': 20130, 'epoch': 2}
{'type': 'loss', 'content': 0.04522963613271713, 'timestamp': '2025-10-02 00:46:38.729841', 'step': 20131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:38.783607', 'step': 20131, 'epoch': 2}
{'type': 'loss', 'content': 0.1922122836112976, 'timestamp': '2025-10-02 00:46:38.789901', 'step': 20132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:38.844519', 'step': 20132, 'epoch': 2}
{'type': 'loss', 'content': 0.06951232999563217, 'timestamp': '2025-10-02 00:46:38.846868', 'step': 20133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:38.900773', 'step': 20133, 'epoch': 2}
{'type': 'loss', 'content': 0.12798605859279633, 'timestamp': '2025-10-02 00:46:38.903309', 'step': 20134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:46:38.965738', 'step': 20134, 'epoch': 2}
{'type': 'loss', 'content': 0.02973164990544319, 'timestamp': '2025-10-02 00:46:38.976577', 'step': 20135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:39.033182', 'step': 20135, 'epoch': 2}
{'type': 'loss', 'content': 0.06756366789340973, 'timestamp': '2025-10-02 00:46:39.039244', 'step': 20136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:39.092832', 'step': 20136, 'epoch': 2}
{'type': 'loss', 'content': 0.12086381018161774, 'timestamp': '2025-10-02 00:46:39.096257', 'step': 20137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:39.151895', 'step': 20137, 'epoch': 2}
{'type': 'loss', 'content': 0.03356679901480675, 'timestamp': '2025-10-02 00:46:39.159515', 'step': 20138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:46:39.213704', 'step': 20138, 'epoch': 2}
{'type': 'loss', 'content': 0.09752051532268524, 'timestamp': '2025-10-02 00:46:39.216424', 'step': 20139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:39.271665', 'step': 20139, 'epoch': 2}
{'type': 'loss', 'content': 0.05934128910303116, 'timestamp': '2025-10-02 00:46:39.278502', 'step': 20140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:39.336759', 'step': 20140, 'epoch': 2}
{'type': 'loss', 'content': 0.13345220685005188, 'timestamp': '2025-10-02 00:46:39.339729', 'step': 20141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:39.394613', 'step': 20141, 'epoch': 2}
{'type': 'loss', 'content': 0.10214833170175552, 'timestamp': '2025-10-02 00:46:39.397368', 'step': 20142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:39.453213', 'step': 20142, 'epoch': 2}
{'type': 'loss', 'content': 0.10744722187519073, 'timestamp': '2025-10-02 00:46:39.455691', 'step': 20143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:39.510122', 'step': 20143, 'epoch': 2}
{'type': 'loss', 'content': 0.10642693936824799, 'timestamp': '2025-10-02 00:46:39.520885', 'step': 20144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:39.575955', 'step': 20144, 'epoch': 2}
{'type': 'loss', 'content': 0.01031067781150341, 'timestamp': '2025-10-02 00:46:39.578609', 'step': 20145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:39.633026', 'step': 20145, 'epoch': 2}
{'type': 'loss', 'content': 0.003384433686733246, 'timestamp': '2025-10-02 00:46:39.635908', 'step': 20146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:39.689584', 'step': 20146, 'epoch': 2}
{'type': 'loss', 'content': 0.062063056975603104, 'timestamp': '2025-10-02 00:46:39.692909', 'step': 20147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:39.747694', 'step': 20147, 'epoch': 2}
{'type': 'loss', 'content': 0.033313922584056854, 'timestamp': '2025-10-02 00:46:39.753825', 'step': 20148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:39.814452', 'step': 20148, 'epoch': 2}
{'type': 'loss', 'content': 0.005680850241333246, 'timestamp': '2025-10-02 00:46:39.825804', 'step': 20149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:39.880613', 'step': 20149, 'epoch': 2}
{'type': 'loss', 'content': 0.04244263097643852, 'timestamp': '2025-10-02 00:46:39.888256', 'step': 20150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:39.943640', 'step': 20150, 'epoch': 2}
{'type': 'loss', 'content': 0.09481165558099747, 'timestamp': '2025-10-02 00:46:39.946235', 'step': 20151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:40.000921', 'step': 20151, 'epoch': 2}
{'type': 'loss', 'content': 0.07803564518690109, 'timestamp': '2025-10-02 00:46:40.007365', 'step': 20152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:46:40.061080', 'step': 20152, 'epoch': 2}
{'type': 'loss', 'content': 0.06175239011645317, 'timestamp': '2025-10-02 00:46:40.063511', 'step': 20153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:40.117320', 'step': 20153, 'epoch': 2}
{'type': 'loss', 'content': 0.06536805629730225, 'timestamp': '2025-10-02 00:46:40.120064', 'step': 20154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:40.175521', 'step': 20154, 'epoch': 2}
{'type': 'loss', 'content': 0.06887318193912506, 'timestamp': '2025-10-02 00:46:40.178439', 'step': 20155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:40.239809', 'step': 20155, 'epoch': 2}
{'type': 'loss', 'content': 0.052648428827524185, 'timestamp': '2025-10-02 00:46:40.251059', 'step': 20156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:40.306058', 'step': 20156, 'epoch': 2}
{'type': 'loss', 'content': 0.035272564738988876, 'timestamp': '2025-10-02 00:46:40.308697', 'step': 20157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:40.365312', 'step': 20157, 'epoch': 2}
{'type': 'loss', 'content': 0.043906234204769135, 'timestamp': '2025-10-02 00:46:40.374657', 'step': 20158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:40.429862', 'step': 20158, 'epoch': 2}
{'type': 'loss', 'content': 0.02996101975440979, 'timestamp': '2025-10-02 00:46:40.439214', 'step': 20159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:40.496891', 'step': 20159, 'epoch': 2}
{'type': 'loss', 'content': 0.04437476769089699, 'timestamp': '2025-10-02 00:46:40.503534', 'step': 20160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:46:40.557608', 'step': 20160, 'epoch': 2}
{'type': 'loss', 'content': 0.06795129925012589, 'timestamp': '2025-10-02 00:46:40.560297', 'step': 20161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:46:40.614198', 'step': 20161, 'epoch': 2}
{'type': 'loss', 'content': 0.0908268615603447, 'timestamp': '2025-10-02 00:46:40.617924', 'step': 20162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:40.674757', 'step': 20162, 'epoch': 2}
{'type': 'loss', 'content': 0.04792507737874985, 'timestamp': '2025-10-02 00:46:40.680590', 'step': 20163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:40.740515', 'step': 20163, 'epoch': 2}
{'type': 'loss', 'content': 0.06596700847148895, 'timestamp': '2025-10-02 00:46:40.746977', 'step': 20164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:46:40.808083', 'step': 20164, 'epoch': 2}
{'type': 'loss', 'content': 0.08238398283720016, 'timestamp': '2025-10-02 00:46:40.819361', 'step': 20165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:46:40.873789', 'step': 20165, 'epoch': 2}
{'type': 'loss', 'content': 0.039056677371263504, 'timestamp': '2025-10-02 00:46:40.876909', 'step': 20166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:40.931741', 'step': 20166, 'epoch': 2}
{'type': 'loss', 'content': 0.05323517322540283, 'timestamp': '2025-10-02 00:46:40.934271', 'step': 20167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:46:40.989924', 'step': 20167, 'epoch': 2}
{'type': 'loss', 'content': 0.03186100348830223, 'timestamp': '2025-10-02 00:46:41.000277', 'step': 20168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:41.054546', 'step': 20168, 'epoch': 2}
{'type': 'loss', 'content': 0.09041032195091248, 'timestamp': '2025-10-02 00:46:41.062058', 'step': 20169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:46:41.116242', 'step': 20169, 'epoch': 2}
{'type': 'loss', 'content': 0.10621844232082367, 'timestamp': '2025-10-02 00:46:41.119078', 'step': 20170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:46:41.173816', 'step': 20170, 'epoch': 2}
{'type': 'loss', 'content': 0.043286439031362534, 'timestamp': '2025-10-02 00:46:41.181452', 'step': 20171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:41.236505', 'step': 20171, 'epoch': 2}
{'type': 'loss', 'content': 0.030003147199749947, 'timestamp': '2025-10-02 00:46:41.242758', 'step': 20172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:41.297492', 'step': 20172, 'epoch': 2}
{'type': 'loss', 'content': 0.022185545414686203, 'timestamp': '2025-10-02 00:46:41.307049', 'step': 20173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:46:41.377864', 'step': 20173, 'epoch': 2}
{'type': 'loss', 'content': 0.031596019864082336, 'timestamp': '2025-10-02 00:46:41.390274', 'step': 20174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:46:41.445960', 'step': 20174, 'epoch': 2}
{'type': 'loss', 'content': 0.08250240981578827, 'timestamp': '2025-10-02 00:46:41.448568', 'step': 20175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:46:41.503206', 'step': 20175, 'epoch': 2}
{'type': 'loss', 'content': 0.057138022035360336, 'timestamp': '2025-10-02 00:46:41.509491', 'step': 20176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:46:41.564721', 'step': 20176, 'epoch': 2}
{'type': 'loss', 'content': 0.030520858243107796, 'timestamp': '2025-10-02 00:46:41.570660', 'step': 20177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:41.625280', 'step': 20177, 'epoch': 2}
{'type': 'loss', 'content': 0.0422719269990921, 'timestamp': '2025-10-02 00:46:41.628169', 'step': 20178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:46:41.683909', 'step': 20178, 'epoch': 2}
{'type': 'loss', 'content': 0.0006278950022533536, 'timestamp': '2025-10-02 00:46:41.686480', 'step': 20179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:46:41.741321', 'step': 20179, 'epoch': 2}
{'type': 'loss', 'content': 0.023348655551671982, 'timestamp': '2025-10-02 00:46:41.751424', 'step': 20180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:46:41.809377', 'step': 20180, 'epoch': 2}
{'type': 'loss', 'content': 0.010392392985522747, 'timestamp': '2025-10-02 00:46:41.820370', 'step': 20181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:46:41.875622', 'step': 20181, 'epoch': 2}
{'type': 'loss', 'content': 0.10025317966938019, 'timestamp': '2025-10-02 00:46:41.878294', 'step': 20182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:46:41.932262', 'step': 20182, 'epoch': 2}
{'type': 'loss', 'content': 0.09891804307699203, 'timestamp': '2025-10-02 00:46:41.935372', 'step': 20183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:46:41.989779', 'step': 20183, 'epoch': 2}
{'type': 'loss', 'content': 0.08253321051597595, 'timestamp': '2025-10-02 00:46:41.996412', 'step': 20184, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:47:08.675993', 'step': 20184, 'epoch': 2}
{'type': 'pplx', 'content': 99.14164986721205, 'timestamp': '2025-10-02 00:47:08.680147', 'step': 20184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:08.734830', 'step': 20184, 'epoch': 2}
{'type': 'loss', 'content': 0.17058400809764862, 'timestamp': '2025-10-02 00:47:08.737299', 'step': 20185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:08.793606', 'step': 20185, 'epoch': 2}
{'type': 'loss', 'content': 0.05294040963053703, 'timestamp': '2025-10-02 00:47:08.802008', 'step': 20186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:08.857234', 'step': 20186, 'epoch': 2}
{'type': 'loss', 'content': 0.07904743403196335, 'timestamp': '2025-10-02 00:47:08.860861', 'step': 20187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:08.917868', 'step': 20187, 'epoch': 2}
{'type': 'loss', 'content': 0.025814155116677284, 'timestamp': '2025-10-02 00:47:08.927957', 'step': 20188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:08.982052', 'step': 20188, 'epoch': 2}
{'type': 'loss', 'content': 0.053220655769109726, 'timestamp': '2025-10-02 00:47:08.984523', 'step': 20189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:09.040224', 'step': 20189, 'epoch': 2}
{'type': 'loss', 'content': 0.07829026132822037, 'timestamp': '2025-10-02 00:47:09.045949', 'step': 20190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:09.103493', 'step': 20190, 'epoch': 2}
{'type': 'loss', 'content': 0.04152904450893402, 'timestamp': '2025-10-02 00:47:09.105938', 'step': 20191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:09.160304', 'step': 20191, 'epoch': 2}
{'type': 'loss', 'content': 0.021975945681333542, 'timestamp': '2025-10-02 00:47:09.168507', 'step': 20192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:09.222061', 'step': 20192, 'epoch': 2}
{'type': 'loss', 'content': 0.1755060851573944, 'timestamp': '2025-10-02 00:47:09.224400', 'step': 20193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:09.278372', 'step': 20193, 'epoch': 2}
{'type': 'loss', 'content': 0.0874272957444191, 'timestamp': '2025-10-02 00:47:09.280762', 'step': 20194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:09.335482', 'step': 20194, 'epoch': 2}
{'type': 'loss', 'content': 0.06487829983234406, 'timestamp': '2025-10-02 00:47:09.344783', 'step': 20195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:09.399515', 'step': 20195, 'epoch': 2}
{'type': 'loss', 'content': 0.04704458639025688, 'timestamp': '2025-10-02 00:47:09.407582', 'step': 20196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:09.464248', 'step': 20196, 'epoch': 2}
{'type': 'loss', 'content': 0.0093079200014472, 'timestamp': '2025-10-02 00:47:09.471786', 'step': 20197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:09.527162', 'step': 20197, 'epoch': 2}
{'type': 'loss', 'content': 0.04694044217467308, 'timestamp': '2025-10-02 00:47:09.533133', 'step': 20198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:09.587782', 'step': 20198, 'epoch': 2}
{'type': 'loss', 'content': 0.11509206146001816, 'timestamp': '2025-10-02 00:47:09.590230', 'step': 20199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:09.645092', 'step': 20199, 'epoch': 2}
{'type': 'loss', 'content': 0.05947848781943321, 'timestamp': '2025-10-02 00:47:09.651319', 'step': 20200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:09.704998', 'step': 20200, 'epoch': 2}
{'type': 'loss', 'content': 0.11802961677312851, 'timestamp': '2025-10-02 00:47:09.707613', 'step': 20201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:47:09.770307', 'step': 20201, 'epoch': 2}
{'type': 'loss', 'content': 0.011158842593431473, 'timestamp': '2025-10-02 00:47:09.781148', 'step': 20202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:09.837115', 'step': 20202, 'epoch': 2}
{'type': 'loss', 'content': 0.041036538779735565, 'timestamp': '2025-10-02 00:47:09.846594', 'step': 20203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:09.902056', 'step': 20203, 'epoch': 2}
{'type': 'loss', 'content': 0.04199974983930588, 'timestamp': '2025-10-02 00:47:09.912391', 'step': 20204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:09.966050', 'step': 20204, 'epoch': 2}
{'type': 'loss', 'content': 0.038381654769182205, 'timestamp': '2025-10-02 00:47:09.968567', 'step': 20205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:10.023934', 'step': 20205, 'epoch': 2}
{'type': 'loss', 'content': 0.02225789800286293, 'timestamp': '2025-10-02 00:47:10.031601', 'step': 20206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:10.087140', 'step': 20206, 'epoch': 2}
{'type': 'loss', 'content': 0.08122066408395767, 'timestamp': '2025-10-02 00:47:10.089638', 'step': 20207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:10.144108', 'step': 20207, 'epoch': 2}
{'type': 'loss', 'content': 0.010649589821696281, 'timestamp': '2025-10-02 00:47:10.154170', 'step': 20208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:47:10.215153', 'step': 20208, 'epoch': 2}
{'type': 'loss', 'content': 0.10158202052116394, 'timestamp': '2025-10-02 00:47:10.226945', 'step': 20209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:10.282350', 'step': 20209, 'epoch': 2}
{'type': 'loss', 'content': 0.0484587699174881, 'timestamp': '2025-10-02 00:47:10.285330', 'step': 20210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:47:10.348282', 'step': 20210, 'epoch': 2}
{'type': 'loss', 'content': 0.06833680719137192, 'timestamp': '2025-10-02 00:47:10.359127', 'step': 20211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:10.413641', 'step': 20211, 'epoch': 2}
{'type': 'loss', 'content': 0.18975158035755157, 'timestamp': '2025-10-02 00:47:10.419758', 'step': 20212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:10.473611', 'step': 20212, 'epoch': 2}
{'type': 'loss', 'content': 0.02214554324746132, 'timestamp': '2025-10-02 00:47:10.476959', 'step': 20213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:10.530183', 'step': 20213, 'epoch': 2}
{'type': 'loss', 'content': 0.11697413772344589, 'timestamp': '2025-10-02 00:47:10.533450', 'step': 20214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:10.588226', 'step': 20214, 'epoch': 2}
{'type': 'loss', 'content': 0.025044389069080353, 'timestamp': '2025-10-02 00:47:10.590728', 'step': 20215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:10.646687', 'step': 20215, 'epoch': 2}
{'type': 'loss', 'content': 0.05666710436344147, 'timestamp': '2025-10-02 00:47:10.652500', 'step': 20216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:10.706489', 'step': 20216, 'epoch': 2}
{'type': 'loss', 'content': 0.010872050188481808, 'timestamp': '2025-10-02 00:47:10.709142', 'step': 20217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:10.763513', 'step': 20217, 'epoch': 2}
{'type': 'loss', 'content': 0.0673423632979393, 'timestamp': '2025-10-02 00:47:10.766026', 'step': 20218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:10.820547', 'step': 20218, 'epoch': 2}
{'type': 'loss', 'content': 0.11683449894189835, 'timestamp': '2025-10-02 00:47:10.823218', 'step': 20219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:47:10.877069', 'step': 20219, 'epoch': 2}
{'type': 'loss', 'content': 0.08433790504932404, 'timestamp': '2025-10-02 00:47:10.883323', 'step': 20220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:10.938083', 'step': 20220, 'epoch': 2}
{'type': 'loss', 'content': 0.044718749821186066, 'timestamp': '2025-10-02 00:47:10.940630', 'step': 20221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:10.995929', 'step': 20221, 'epoch': 2}
{'type': 'loss', 'content': 0.057843662798404694, 'timestamp': '2025-10-02 00:47:10.998400', 'step': 20222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:11.053000', 'step': 20222, 'epoch': 2}
{'type': 'loss', 'content': 0.08091413974761963, 'timestamp': '2025-10-02 00:47:11.060488', 'step': 20223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:11.119199', 'step': 20223, 'epoch': 2}
{'type': 'loss', 'content': 0.053671374917030334, 'timestamp': '2025-10-02 00:47:11.130151', 'step': 20224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:11.184921', 'step': 20224, 'epoch': 2}
{'type': 'loss', 'content': 0.006788589525967836, 'timestamp': '2025-10-02 00:47:11.195208', 'step': 20225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:11.250516', 'step': 20225, 'epoch': 2}
{'type': 'loss', 'content': 0.01994807831943035, 'timestamp': '2025-10-02 00:47:11.253235', 'step': 20226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:11.307682', 'step': 20226, 'epoch': 2}
{'type': 'loss', 'content': 0.12994323670864105, 'timestamp': '2025-10-02 00:47:11.310392', 'step': 20227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:11.365416', 'step': 20227, 'epoch': 2}
{'type': 'loss', 'content': 0.046320777386426926, 'timestamp': '2025-10-02 00:47:11.375538', 'step': 20228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:11.429295', 'step': 20228, 'epoch': 2}
{'type': 'loss', 'content': 0.08670977503061295, 'timestamp': '2025-10-02 00:47:11.431832', 'step': 20229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:11.486062', 'step': 20229, 'epoch': 2}
{'type': 'loss', 'content': 0.16141678392887115, 'timestamp': '2025-10-02 00:47:11.488302', 'step': 20230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:11.543352', 'step': 20230, 'epoch': 2}
{'type': 'loss', 'content': 0.010224569588899612, 'timestamp': '2025-10-02 00:47:11.549128', 'step': 20231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:11.603635', 'step': 20231, 'epoch': 2}
{'type': 'loss', 'content': 0.03421616554260254, 'timestamp': '2025-10-02 00:47:11.610065', 'step': 20232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:11.664323', 'step': 20232, 'epoch': 2}
{'type': 'loss', 'content': 0.1615220457315445, 'timestamp': '2025-10-02 00:47:11.666913', 'step': 20233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:11.722204', 'step': 20233, 'epoch': 2}
{'type': 'loss', 'content': 0.07413163036108017, 'timestamp': '2025-10-02 00:47:11.724729', 'step': 20234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:11.779515', 'step': 20234, 'epoch': 2}
{'type': 'loss', 'content': 0.05743291601538658, 'timestamp': '2025-10-02 00:47:11.785491', 'step': 20235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:11.839402', 'step': 20235, 'epoch': 2}
{'type': 'loss', 'content': 0.12079991400241852, 'timestamp': '2025-10-02 00:47:11.845559', 'step': 20236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:11.903987', 'step': 20236, 'epoch': 2}
{'type': 'loss', 'content': 0.05540749803185463, 'timestamp': '2025-10-02 00:47:11.914921', 'step': 20237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:11.969531', 'step': 20237, 'epoch': 2}
{'type': 'loss', 'content': 0.06237289309501648, 'timestamp': '2025-10-02 00:47:11.972234', 'step': 20238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:12.027259', 'step': 20238, 'epoch': 2}
{'type': 'loss', 'content': 0.042799849063158035, 'timestamp': '2025-10-02 00:47:12.034579', 'step': 20239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:12.089279', 'step': 20239, 'epoch': 2}
{'type': 'loss', 'content': 0.0967460572719574, 'timestamp': '2025-10-02 00:47:12.095755', 'step': 20240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:12.150614', 'step': 20240, 'epoch': 2}
{'type': 'loss', 'content': 0.059241969138383865, 'timestamp': '2025-10-02 00:47:12.153051', 'step': 20241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:12.206876', 'step': 20241, 'epoch': 2}
{'type': 'loss', 'content': 0.01262083649635315, 'timestamp': '2025-10-02 00:47:12.209956', 'step': 20242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:12.265675', 'step': 20242, 'epoch': 2}
{'type': 'loss', 'content': 0.030406823381781578, 'timestamp': '2025-10-02 00:47:12.268279', 'step': 20243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:12.323074', 'step': 20243, 'epoch': 2}
{'type': 'loss', 'content': 0.03520698472857475, 'timestamp': '2025-10-02 00:47:12.329367', 'step': 20244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:12.384181', 'step': 20244, 'epoch': 2}
{'type': 'loss', 'content': 0.0601591020822525, 'timestamp': '2025-10-02 00:47:12.386630', 'step': 20245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:12.444367', 'step': 20245, 'epoch': 2}
{'type': 'loss', 'content': 0.017439814284443855, 'timestamp': '2025-10-02 00:47:12.453906', 'step': 20246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:12.508140', 'step': 20246, 'epoch': 2}
{'type': 'loss', 'content': 0.05732346326112747, 'timestamp': '2025-10-02 00:47:12.512520', 'step': 20247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:12.567997', 'step': 20247, 'epoch': 2}
{'type': 'loss', 'content': 0.09012531489133835, 'timestamp': '2025-10-02 00:47:12.575976', 'step': 20248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:12.637115', 'step': 20248, 'epoch': 2}
{'type': 'loss', 'content': 0.03424570709466934, 'timestamp': '2025-10-02 00:47:12.641528', 'step': 20249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:12.700138', 'step': 20249, 'epoch': 2}
{'type': 'loss', 'content': 0.023283064365386963, 'timestamp': '2025-10-02 00:47:12.703828', 'step': 20250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:12.759110', 'step': 20250, 'epoch': 2}
{'type': 'loss', 'content': 0.02629544585943222, 'timestamp': '2025-10-02 00:47:12.762608', 'step': 20251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:12.819258', 'step': 20251, 'epoch': 2}
{'type': 'loss', 'content': 0.10520338267087936, 'timestamp': '2025-10-02 00:47:12.825818', 'step': 20252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:12.881730', 'step': 20252, 'epoch': 2}
{'type': 'loss', 'content': 0.03270295634865761, 'timestamp': '2025-10-02 00:47:12.884739', 'step': 20253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:12.940360', 'step': 20253, 'epoch': 2}
{'type': 'loss', 'content': 0.07542097568511963, 'timestamp': '2025-10-02 00:47:12.943241', 'step': 20254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:12.999389', 'step': 20254, 'epoch': 2}
{'type': 'loss', 'content': 0.16177982091903687, 'timestamp': '2025-10-02 00:47:13.002742', 'step': 20255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:13.059187', 'step': 20255, 'epoch': 2}
{'type': 'loss', 'content': 0.009396540001034737, 'timestamp': '2025-10-02 00:47:13.065784', 'step': 20256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:13.121590', 'step': 20256, 'epoch': 2}
{'type': 'loss', 'content': 0.030964983627200127, 'timestamp': '2025-10-02 00:47:13.124361', 'step': 20257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:13.187216', 'step': 20257, 'epoch': 2}
{'type': 'loss', 'content': 0.040921445935964584, 'timestamp': '2025-10-02 00:47:13.197852', 'step': 20258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:13.254751', 'step': 20258, 'epoch': 2}
{'type': 'loss', 'content': 0.02260645292699337, 'timestamp': '2025-10-02 00:47:13.264061', 'step': 20259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:13.319796', 'step': 20259, 'epoch': 2}
{'type': 'loss', 'content': 0.09487925469875336, 'timestamp': '2025-10-02 00:47:13.328009', 'step': 20260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:13.383253', 'step': 20260, 'epoch': 2}
{'type': 'loss', 'content': 0.0018063061870634556, 'timestamp': '2025-10-02 00:47:13.392809', 'step': 20261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:13.450196', 'step': 20261, 'epoch': 2}
{'type': 'loss', 'content': 0.07494267076253891, 'timestamp': '2025-10-02 00:47:13.454338', 'step': 20262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:13.510691', 'step': 20262, 'epoch': 2}
{'type': 'loss', 'content': 0.08457104116678238, 'timestamp': '2025-10-02 00:47:13.520022', 'step': 20263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:13.576793', 'step': 20263, 'epoch': 2}
{'type': 'loss', 'content': 0.05168793722987175, 'timestamp': '2025-10-02 00:47:13.583271', 'step': 20264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:13.639659', 'step': 20264, 'epoch': 2}
{'type': 'loss', 'content': 0.013217613101005554, 'timestamp': '2025-10-02 00:47:13.642990', 'step': 20265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:13.698871', 'step': 20265, 'epoch': 2}
{'type': 'loss', 'content': 0.17774565517902374, 'timestamp': '2025-10-02 00:47:13.702373', 'step': 20266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:13.759665', 'step': 20266, 'epoch': 2}
{'type': 'loss', 'content': 0.09704165905714035, 'timestamp': '2025-10-02 00:47:13.762782', 'step': 20267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:13.818506', 'step': 20267, 'epoch': 2}
{'type': 'loss', 'content': 0.022786855697631836, 'timestamp': '2025-10-02 00:47:13.825100', 'step': 20268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:13.880645', 'step': 20268, 'epoch': 2}
{'type': 'loss', 'content': 0.014298814348876476, 'timestamp': '2025-10-02 00:47:13.888313', 'step': 20269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:13.946060', 'step': 20269, 'epoch': 2}
{'type': 'loss', 'content': 0.11601724475622177, 'timestamp': '2025-10-02 00:47:13.952990', 'step': 20270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:14.017578', 'step': 20270, 'epoch': 2}
{'type': 'loss', 'content': 0.012579974718391895, 'timestamp': '2025-10-02 00:47:14.023481', 'step': 20271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:14.096117', 'step': 20271, 'epoch': 2}
{'type': 'loss', 'content': 0.03743012621998787, 'timestamp': '2025-10-02 00:47:14.117448', 'step': 20272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:14.197510', 'step': 20272, 'epoch': 2}
{'type': 'loss', 'content': 0.02759172022342682, 'timestamp': '2025-10-02 00:47:14.206585', 'step': 20273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:14.277672', 'step': 20273, 'epoch': 2}
{'type': 'loss', 'content': 0.14765895903110504, 'timestamp': '2025-10-02 00:47:14.287022', 'step': 20274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:14.359857', 'step': 20274, 'epoch': 2}
{'type': 'loss', 'content': 0.0412265881896019, 'timestamp': '2025-10-02 00:47:14.368765', 'step': 20275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:14.433089', 'step': 20275, 'epoch': 2}
{'type': 'loss', 'content': 0.07318028062582016, 'timestamp': '2025-10-02 00:47:14.446375', 'step': 20276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:14.504649', 'step': 20276, 'epoch': 2}
{'type': 'loss', 'content': 0.06034022569656372, 'timestamp': '2025-10-02 00:47:14.512356', 'step': 20277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:14.584299', 'step': 20277, 'epoch': 2}
{'type': 'loss', 'content': 0.044814325869083405, 'timestamp': '2025-10-02 00:47:14.595192', 'step': 20278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:14.663060', 'step': 20278, 'epoch': 2}
{'type': 'loss', 'content': 0.02478034608066082, 'timestamp': '2025-10-02 00:47:14.672489', 'step': 20279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:14.749486', 'step': 20279, 'epoch': 2}
{'type': 'loss', 'content': 0.01670021563768387, 'timestamp': '2025-10-02 00:47:14.762595', 'step': 20280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:47:14.835686', 'step': 20280, 'epoch': 2}
{'type': 'loss', 'content': 0.016921546310186386, 'timestamp': '2025-10-02 00:47:14.848686', 'step': 20281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:14.914614', 'step': 20281, 'epoch': 2}
{'type': 'loss', 'content': 0.0741625726222992, 'timestamp': '2025-10-02 00:47:14.924098', 'step': 20282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:14.992260', 'step': 20282, 'epoch': 2}
{'type': 'loss', 'content': 0.09530825912952423, 'timestamp': '2025-10-02 00:47:15.001550', 'step': 20283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:15.079088', 'step': 20283, 'epoch': 2}
{'type': 'loss', 'content': 0.05813625082373619, 'timestamp': '2025-10-02 00:47:15.085859', 'step': 20284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:15.161601', 'step': 20284, 'epoch': 2}
{'type': 'loss', 'content': 0.13737070560455322, 'timestamp': '2025-10-02 00:47:15.172538', 'step': 20285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:15.244579', 'step': 20285, 'epoch': 2}
{'type': 'loss', 'content': 0.07241763919591904, 'timestamp': '2025-10-02 00:47:15.248147', 'step': 20286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:15.317613', 'step': 20286, 'epoch': 2}
{'type': 'loss', 'content': 0.02686145342886448, 'timestamp': '2025-10-02 00:47:15.325111', 'step': 20287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:15.394080', 'step': 20287, 'epoch': 2}
{'type': 'loss', 'content': 0.0906803235411644, 'timestamp': '2025-10-02 00:47:15.401405', 'step': 20288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:15.471079', 'step': 20288, 'epoch': 2}
{'type': 'loss', 'content': 0.010513647459447384, 'timestamp': '2025-10-02 00:47:15.480554', 'step': 20289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:15.548273', 'step': 20289, 'epoch': 2}
{'type': 'loss', 'content': 0.026808520779013634, 'timestamp': '2025-10-02 00:47:15.557753', 'step': 20290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:15.628311', 'step': 20290, 'epoch': 2}
{'type': 'loss', 'content': 0.24073678255081177, 'timestamp': '2025-10-02 00:47:15.631385', 'step': 20291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:15.697708', 'step': 20291, 'epoch': 2}
{'type': 'loss', 'content': 0.03877471759915352, 'timestamp': '2025-10-02 00:47:15.704337', 'step': 20292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:15.764801', 'step': 20292, 'epoch': 2}
{'type': 'loss', 'content': 0.13174836337566376, 'timestamp': '2025-10-02 00:47:15.767598', 'step': 20293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:15.833737', 'step': 20293, 'epoch': 2}
{'type': 'loss', 'content': 0.06548050791025162, 'timestamp': '2025-10-02 00:47:15.840644', 'step': 20294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:15.906822', 'step': 20294, 'epoch': 2}
{'type': 'loss', 'content': 0.030658762902021408, 'timestamp': '2025-10-02 00:47:15.911019', 'step': 20295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:15.982669', 'step': 20295, 'epoch': 2}
{'type': 'loss', 'content': 0.14979788661003113, 'timestamp': '2025-10-02 00:47:15.993250', 'step': 20296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:16.055992', 'step': 20296, 'epoch': 2}
{'type': 'loss', 'content': 0.04225890338420868, 'timestamp': '2025-10-02 00:47:16.062081', 'step': 20297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:16.136033', 'step': 20297, 'epoch': 2}
{'type': 'loss', 'content': 0.061809830367565155, 'timestamp': '2025-10-02 00:47:16.143007', 'step': 20298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:16.205295', 'step': 20298, 'epoch': 2}
{'type': 'loss', 'content': 0.049465615302324295, 'timestamp': '2025-10-02 00:47:16.208558', 'step': 20299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:16.280364', 'step': 20299, 'epoch': 2}
{'type': 'loss', 'content': 0.04195886477828026, 'timestamp': '2025-10-02 00:47:16.288223', 'step': 20300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:16.351699', 'step': 20300, 'epoch': 2}
{'type': 'loss', 'content': 0.04977192357182503, 'timestamp': '2025-10-02 00:47:16.360002', 'step': 20301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:16.419621', 'step': 20301, 'epoch': 2}
{'type': 'loss', 'content': 0.022413676604628563, 'timestamp': '2025-10-02 00:47:16.427042', 'step': 20302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:16.497355', 'step': 20302, 'epoch': 2}
{'type': 'loss', 'content': 0.04441976174712181, 'timestamp': '2025-10-02 00:47:16.507691', 'step': 20303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:16.584868', 'step': 20303, 'epoch': 2}
{'type': 'loss', 'content': 0.015399140305817127, 'timestamp': '2025-10-02 00:47:16.596141', 'step': 20304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:16.663176', 'step': 20304, 'epoch': 2}
{'type': 'loss', 'content': 0.042022887617349625, 'timestamp': '2025-10-02 00:47:16.667024', 'step': 20305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:16.734190', 'step': 20305, 'epoch': 2}
{'type': 'loss', 'content': 0.0026032309979200363, 'timestamp': '2025-10-02 00:47:16.743591', 'step': 20306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:16.811227', 'step': 20306, 'epoch': 2}
{'type': 'loss', 'content': 0.03379958122968674, 'timestamp': '2025-10-02 00:47:16.816966', 'step': 20307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:47:16.892340', 'step': 20307, 'epoch': 2}
{'type': 'loss', 'content': 0.007955869659781456, 'timestamp': '2025-10-02 00:47:16.904004', 'step': 20308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:16.974307', 'step': 20308, 'epoch': 2}
{'type': 'loss', 'content': 0.09079796820878983, 'timestamp': '2025-10-02 00:47:16.980131', 'step': 20309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:17.051692', 'step': 20309, 'epoch': 2}
{'type': 'loss', 'content': 0.022845329716801643, 'timestamp': '2025-10-02 00:47:17.061928', 'step': 20310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:17.127368', 'step': 20310, 'epoch': 2}
{'type': 'loss', 'content': 0.119940385222435, 'timestamp': '2025-10-02 00:47:17.142437', 'step': 20311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:17.241401', 'step': 20311, 'epoch': 2}
{'type': 'loss', 'content': 0.10541589558124542, 'timestamp': '2025-10-02 00:47:17.257426', 'step': 20312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:17.331329', 'step': 20312, 'epoch': 2}
{'type': 'loss', 'content': 0.04999084025621414, 'timestamp': '2025-10-02 00:47:17.342340', 'step': 20313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:17.423438', 'step': 20313, 'epoch': 2}
{'type': 'loss', 'content': 0.08520429581403732, 'timestamp': '2025-10-02 00:47:17.436484', 'step': 20314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:17.522346', 'step': 20314, 'epoch': 2}
{'type': 'loss', 'content': 0.04531596601009369, 'timestamp': '2025-10-02 00:47:17.530891', 'step': 20315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:17.601391', 'step': 20315, 'epoch': 2}
{'type': 'loss', 'content': 0.09150046855211258, 'timestamp': '2025-10-02 00:47:17.608689', 'step': 20316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:17.685346', 'step': 20316, 'epoch': 2}
{'type': 'loss', 'content': 0.03388044238090515, 'timestamp': '2025-10-02 00:47:17.688502', 'step': 20317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:47:17.772842', 'step': 20317, 'epoch': 2}
{'type': 'loss', 'content': 0.04404527321457863, 'timestamp': '2025-10-02 00:47:17.785182', 'step': 20318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:17.866513', 'step': 20318, 'epoch': 2}
{'type': 'loss', 'content': 0.014826902188360691, 'timestamp': '2025-10-02 00:47:17.870421', 'step': 20319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:17.943191', 'step': 20319, 'epoch': 2}
{'type': 'loss', 'content': 0.011178107932209969, 'timestamp': '2025-10-02 00:47:17.957010', 'step': 20320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:47:18.040401', 'step': 20320, 'epoch': 2}
{'type': 'loss', 'content': 0.01617448218166828, 'timestamp': '2025-10-02 00:47:18.053946', 'step': 20321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:18.117828', 'step': 20321, 'epoch': 2}
{'type': 'loss', 'content': 0.08191655576229095, 'timestamp': '2025-10-02 00:47:18.128044', 'step': 20322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:18.194092', 'step': 20322, 'epoch': 2}
{'type': 'loss', 'content': 0.04195404052734375, 'timestamp': '2025-10-02 00:47:18.203647', 'step': 20323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:18.265584', 'step': 20323, 'epoch': 2}
{'type': 'loss', 'content': 0.030941275879740715, 'timestamp': '2025-10-02 00:47:18.273060', 'step': 20324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:18.339644', 'step': 20324, 'epoch': 2}
{'type': 'loss', 'content': 0.028846746310591698, 'timestamp': '2025-10-02 00:47:18.344635', 'step': 20325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:18.419234', 'step': 20325, 'epoch': 2}
{'type': 'loss', 'content': 0.017734911292791367, 'timestamp': '2025-10-02 00:47:18.429798', 'step': 20326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:18.489937', 'step': 20326, 'epoch': 2}
{'type': 'loss', 'content': 0.11763322353363037, 'timestamp': '2025-10-02 00:47:18.498410', 'step': 20327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:18.574907', 'step': 20327, 'epoch': 2}
{'type': 'loss', 'content': 0.012247313745319843, 'timestamp': '2025-10-02 00:47:18.586188', 'step': 20328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:18.646549', 'step': 20328, 'epoch': 2}
{'type': 'loss', 'content': 0.04511399567127228, 'timestamp': '2025-10-02 00:47:18.656758', 'step': 20329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:18.722700', 'step': 20329, 'epoch': 2}
{'type': 'loss', 'content': 0.04005713760852814, 'timestamp': '2025-10-02 00:47:18.725390', 'step': 20330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:18.782859', 'step': 20330, 'epoch': 2}
{'type': 'loss', 'content': 0.06287556141614914, 'timestamp': '2025-10-02 00:47:18.786509', 'step': 20331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:18.848299', 'step': 20331, 'epoch': 2}
{'type': 'loss', 'content': 0.05708896368741989, 'timestamp': '2025-10-02 00:47:18.854312', 'step': 20332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:18.927419', 'step': 20332, 'epoch': 2}
{'type': 'loss', 'content': 0.024222854524850845, 'timestamp': '2025-10-02 00:47:18.936979', 'step': 20333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:19.006325', 'step': 20333, 'epoch': 2}
{'type': 'loss', 'content': 0.1252514272928238, 'timestamp': '2025-10-02 00:47:19.013751', 'step': 20334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:19.076538', 'step': 20334, 'epoch': 2}
{'type': 'loss', 'content': 0.07261151820421219, 'timestamp': '2025-10-02 00:47:19.084943', 'step': 20335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:19.160678', 'step': 20335, 'epoch': 2}
{'type': 'loss', 'content': 0.04129667207598686, 'timestamp': '2025-10-02 00:47:19.171933', 'step': 20336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:19.240262', 'step': 20336, 'epoch': 2}
{'type': 'loss', 'content': 0.10497438907623291, 'timestamp': '2025-10-02 00:47:19.242688', 'step': 20337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:19.296740', 'step': 20337, 'epoch': 2}
{'type': 'loss', 'content': 0.05184270814061165, 'timestamp': '2025-10-02 00:47:19.302619', 'step': 20338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:19.357117', 'step': 20338, 'epoch': 2}
{'type': 'loss', 'content': 0.007957926020026207, 'timestamp': '2025-10-02 00:47:19.364602', 'step': 20339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:19.419965', 'step': 20339, 'epoch': 2}
{'type': 'loss', 'content': 0.01634889841079712, 'timestamp': '2025-10-02 00:47:19.426180', 'step': 20340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:19.479858', 'step': 20340, 'epoch': 2}
{'type': 'loss', 'content': 0.18475833535194397, 'timestamp': '2025-10-02 00:47:19.482412', 'step': 20341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:19.536383', 'step': 20341, 'epoch': 2}
{'type': 'loss', 'content': 0.058435115963220596, 'timestamp': '2025-10-02 00:47:19.538991', 'step': 20342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:19.593442', 'step': 20342, 'epoch': 2}
{'type': 'loss', 'content': 0.06649165600538254, 'timestamp': '2025-10-02 00:47:19.595882', 'step': 20343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:19.650531', 'step': 20343, 'epoch': 2}
{'type': 'loss', 'content': 0.1273030936717987, 'timestamp': '2025-10-02 00:47:19.656765', 'step': 20344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:19.710954', 'step': 20344, 'epoch': 2}
{'type': 'loss', 'content': 0.14288142323493958, 'timestamp': '2025-10-02 00:47:19.713363', 'step': 20345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:19.772222', 'step': 20345, 'epoch': 2}
{'type': 'loss', 'content': 0.03425562381744385, 'timestamp': '2025-10-02 00:47:19.782361', 'step': 20346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:19.836447', 'step': 20346, 'epoch': 2}
{'type': 'loss', 'content': 0.06925229728221893, 'timestamp': '2025-10-02 00:47:19.838993', 'step': 20347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:19.893326', 'step': 20347, 'epoch': 2}
{'type': 'loss', 'content': 0.10440924763679504, 'timestamp': '2025-10-02 00:47:19.899766', 'step': 20348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:19.954675', 'step': 20348, 'epoch': 2}
{'type': 'loss', 'content': 0.03683287277817726, 'timestamp': '2025-10-02 00:47:19.961760', 'step': 20349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:20.017735', 'step': 20349, 'epoch': 2}
{'type': 'loss', 'content': 0.05799594894051552, 'timestamp': '2025-10-02 00:47:20.020696', 'step': 20350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:20.077649', 'step': 20350, 'epoch': 2}
{'type': 'loss', 'content': 0.05217908322811127, 'timestamp': '2025-10-02 00:47:20.087129', 'step': 20351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:20.143264', 'step': 20351, 'epoch': 2}
{'type': 'loss', 'content': 0.07126184552907944, 'timestamp': '2025-10-02 00:47:20.149737', 'step': 20352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:20.203611', 'step': 20352, 'epoch': 2}
{'type': 'loss', 'content': 0.05649600923061371, 'timestamp': '2025-10-02 00:47:20.206513', 'step': 20353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:20.268856', 'step': 20353, 'epoch': 2}
{'type': 'loss', 'content': 0.012583685107529163, 'timestamp': '2025-10-02 00:47:20.279339', 'step': 20354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:20.335641', 'step': 20354, 'epoch': 2}
{'type': 'loss', 'content': 0.013749180361628532, 'timestamp': '2025-10-02 00:47:20.339419', 'step': 20355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:20.395270', 'step': 20355, 'epoch': 2}
{'type': 'loss', 'content': 0.05022928863763809, 'timestamp': '2025-10-02 00:47:20.401646', 'step': 20356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:20.462822', 'step': 20356, 'epoch': 2}
{'type': 'loss', 'content': 0.01226828247308731, 'timestamp': '2025-10-02 00:47:20.474160', 'step': 20357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:20.529958', 'step': 20357, 'epoch': 2}
{'type': 'loss', 'content': 0.038037244230508804, 'timestamp': '2025-10-02 00:47:20.532439', 'step': 20358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:20.587136', 'step': 20358, 'epoch': 2}
{'type': 'loss', 'content': 0.05583126097917557, 'timestamp': '2025-10-02 00:47:20.589808', 'step': 20359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:20.645283', 'step': 20359, 'epoch': 2}
{'type': 'loss', 'content': 0.1821637749671936, 'timestamp': '2025-10-02 00:47:20.651516', 'step': 20360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:20.706241', 'step': 20360, 'epoch': 2}
{'type': 'loss', 'content': 0.012474903836846352, 'timestamp': '2025-10-02 00:47:20.711941', 'step': 20361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:20.766445', 'step': 20361, 'epoch': 2}
{'type': 'loss', 'content': 0.02841085195541382, 'timestamp': '2025-10-02 00:47:20.772282', 'step': 20362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:20.829139', 'step': 20362, 'epoch': 2}
{'type': 'loss', 'content': 0.023211805149912834, 'timestamp': '2025-10-02 00:47:20.836356', 'step': 20363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:20.891215', 'step': 20363, 'epoch': 2}
{'type': 'loss', 'content': 0.026668831706047058, 'timestamp': '2025-10-02 00:47:20.899409', 'step': 20364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:20.954147', 'step': 20364, 'epoch': 2}
{'type': 'loss', 'content': 0.17241351306438446, 'timestamp': '2025-10-02 00:47:20.956629', 'step': 20365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:21.011537', 'step': 20365, 'epoch': 2}
{'type': 'loss', 'content': 0.022680560126900673, 'timestamp': '2025-10-02 00:47:21.014296', 'step': 20366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:21.070033', 'step': 20366, 'epoch': 2}
{'type': 'loss', 'content': 0.04775170236825943, 'timestamp': '2025-10-02 00:47:21.077248', 'step': 20367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:21.132886', 'step': 20367, 'epoch': 2}
{'type': 'loss', 'content': 0.11930637061595917, 'timestamp': '2025-10-02 00:47:21.139283', 'step': 20368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:21.193367', 'step': 20368, 'epoch': 2}
{'type': 'loss', 'content': 0.04265850409865379, 'timestamp': '2025-10-02 00:47:21.196395', 'step': 20369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:21.252370', 'step': 20369, 'epoch': 2}
{'type': 'loss', 'content': 0.05236859992146492, 'timestamp': '2025-10-02 00:47:21.254989', 'step': 20370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:21.309005', 'step': 20370, 'epoch': 2}
{'type': 'loss', 'content': 0.06680343300104141, 'timestamp': '2025-10-02 00:47:21.311705', 'step': 20371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:21.366246', 'step': 20371, 'epoch': 2}
{'type': 'loss', 'content': 0.03924349322915077, 'timestamp': '2025-10-02 00:47:21.372462', 'step': 20372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:21.427194', 'step': 20372, 'epoch': 2}
{'type': 'loss', 'content': 0.03262709826231003, 'timestamp': '2025-10-02 00:47:21.429648', 'step': 20373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:47:21.499220', 'step': 20373, 'epoch': 2}
{'type': 'loss', 'content': 0.0503038726747036, 'timestamp': '2025-10-02 00:47:21.511503', 'step': 20374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:21.571526', 'step': 20374, 'epoch': 2}
{'type': 'loss', 'content': 0.03818415105342865, 'timestamp': '2025-10-02 00:47:21.581713', 'step': 20375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:21.636886', 'step': 20375, 'epoch': 2}
{'type': 'loss', 'content': 0.1653963029384613, 'timestamp': '2025-10-02 00:47:21.643588', 'step': 20376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:21.697708', 'step': 20376, 'epoch': 2}
{'type': 'loss', 'content': 0.054502084851264954, 'timestamp': '2025-10-02 00:47:21.700220', 'step': 20377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:21.754412', 'step': 20377, 'epoch': 2}
{'type': 'loss', 'content': 0.09352006763219833, 'timestamp': '2025-10-02 00:47:21.757722', 'step': 20378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:21.813525', 'step': 20378, 'epoch': 2}
{'type': 'loss', 'content': 0.014031064696609974, 'timestamp': '2025-10-02 00:47:21.815869', 'step': 20379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:21.870699', 'step': 20379, 'epoch': 2}
{'type': 'loss', 'content': 0.019623415544629097, 'timestamp': '2025-10-02 00:47:21.876845', 'step': 20380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:21.932480', 'step': 20380, 'epoch': 2}
{'type': 'loss', 'content': 0.06213562935590744, 'timestamp': '2025-10-02 00:47:21.935457', 'step': 20381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:21.990419', 'step': 20381, 'epoch': 2}
{'type': 'loss', 'content': 0.015915894880890846, 'timestamp': '2025-10-02 00:47:21.999770', 'step': 20382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:22.055149', 'step': 20382, 'epoch': 2}
{'type': 'loss', 'content': 0.05385290086269379, 'timestamp': '2025-10-02 00:47:22.057607', 'step': 20383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:22.113088', 'step': 20383, 'epoch': 2}
{'type': 'loss', 'content': 0.10153926908969879, 'timestamp': '2025-10-02 00:47:22.119532', 'step': 20384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:22.174170', 'step': 20384, 'epoch': 2}
{'type': 'loss', 'content': 0.07278608530759811, 'timestamp': '2025-10-02 00:47:22.177049', 'step': 20385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:22.232539', 'step': 20385, 'epoch': 2}
{'type': 'loss', 'content': 0.14910288155078888, 'timestamp': '2025-10-02 00:47:22.234993', 'step': 20386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:22.290299', 'step': 20386, 'epoch': 2}
{'type': 'loss', 'content': 0.023754015564918518, 'timestamp': '2025-10-02 00:47:22.297464', 'step': 20387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:22.359540', 'step': 20387, 'epoch': 2}
{'type': 'loss', 'content': 0.009850280359387398, 'timestamp': '2025-10-02 00:47:22.370818', 'step': 20388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:22.429459', 'step': 20388, 'epoch': 2}
{'type': 'loss', 'content': 0.05937600135803223, 'timestamp': '2025-10-02 00:47:22.440443', 'step': 20389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:22.495756', 'step': 20389, 'epoch': 2}
{'type': 'loss', 'content': 0.026589615270495415, 'timestamp': '2025-10-02 00:47:22.498134', 'step': 20390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:22.555073', 'step': 20390, 'epoch': 2}
{'type': 'loss', 'content': 0.05979032814502716, 'timestamp': '2025-10-02 00:47:22.564601', 'step': 20391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:22.619424', 'step': 20391, 'epoch': 2}
{'type': 'loss', 'content': 0.0815645381808281, 'timestamp': '2025-10-02 00:47:22.626009', 'step': 20392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:22.679519', 'step': 20392, 'epoch': 2}
{'type': 'loss', 'content': 0.051816366612911224, 'timestamp': '2025-10-02 00:47:22.686716', 'step': 20393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:22.741407', 'step': 20393, 'epoch': 2}
{'type': 'loss', 'content': 0.035714056342840195, 'timestamp': '2025-10-02 00:47:22.743974', 'step': 20394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:22.797807', 'step': 20394, 'epoch': 2}
{'type': 'loss', 'content': 0.15386131405830383, 'timestamp': '2025-10-02 00:47:22.800307', 'step': 20395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:47:22.869686', 'step': 20395, 'epoch': 2}
{'type': 'loss', 'content': 0.0017761578783392906, 'timestamp': '2025-10-02 00:47:22.882700', 'step': 20396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:22.937255', 'step': 20396, 'epoch': 2}
{'type': 'loss', 'content': 0.030784117057919502, 'timestamp': '2025-10-02 00:47:22.940781', 'step': 20397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:22.996982', 'step': 20397, 'epoch': 2}
{'type': 'loss', 'content': 0.007945951074361801, 'timestamp': '2025-10-02 00:47:22.999784', 'step': 20398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:23.061995', 'step': 20398, 'epoch': 2}
{'type': 'loss', 'content': 0.043542567640542984, 'timestamp': '2025-10-02 00:47:23.072591', 'step': 20399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:23.128895', 'step': 20399, 'epoch': 2}
{'type': 'loss', 'content': 0.04931396245956421, 'timestamp': '2025-10-02 00:47:23.135104', 'step': 20400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:23.193869', 'step': 20400, 'epoch': 2}
{'type': 'loss', 'content': 0.02027418278157711, 'timestamp': '2025-10-02 00:47:23.204855', 'step': 20401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:23.262467', 'step': 20401, 'epoch': 2}
{'type': 'loss', 'content': 0.059220295399427414, 'timestamp': '2025-10-02 00:47:23.265933', 'step': 20402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:23.322448', 'step': 20402, 'epoch': 2}
{'type': 'loss', 'content': 0.06009342148900032, 'timestamp': '2025-10-02 00:47:23.329714', 'step': 20403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:23.385518', 'step': 20403, 'epoch': 2}
{'type': 'loss', 'content': 0.10307830572128296, 'timestamp': '2025-10-02 00:47:23.393481', 'step': 20404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:23.457420', 'step': 20404, 'epoch': 2}
{'type': 'loss', 'content': 0.010857496410608292, 'timestamp': '2025-10-02 00:47:23.468947', 'step': 20405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:23.528042', 'step': 20405, 'epoch': 2}
{'type': 'loss', 'content': 0.06565684080123901, 'timestamp': '2025-10-02 00:47:23.531235', 'step': 20406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:23.587931', 'step': 20406, 'epoch': 2}
{'type': 'loss', 'content': 0.027752934023737907, 'timestamp': '2025-10-02 00:47:23.595138', 'step': 20407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:23.652623', 'step': 20407, 'epoch': 2}
{'type': 'loss', 'content': 0.04952950030565262, 'timestamp': '2025-10-02 00:47:23.658664', 'step': 20408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:23.715404', 'step': 20408, 'epoch': 2}
{'type': 'loss', 'content': 0.092925526201725, 'timestamp': '2025-10-02 00:47:23.718467', 'step': 20409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:23.774633', 'step': 20409, 'epoch': 2}
{'type': 'loss', 'content': 0.09506785124540329, 'timestamp': '2025-10-02 00:47:23.776582', 'step': 20410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:23.832805', 'step': 20410, 'epoch': 2}
{'type': 'loss', 'content': 0.05489429086446762, 'timestamp': '2025-10-02 00:47:23.836231', 'step': 20411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:23.894846', 'step': 20411, 'epoch': 2}
{'type': 'loss', 'content': 0.0891476422548294, 'timestamp': '2025-10-02 00:47:23.901498', 'step': 20412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:23.958715', 'step': 20412, 'epoch': 2}
{'type': 'loss', 'content': 0.06751105189323425, 'timestamp': '2025-10-02 00:47:23.961846', 'step': 20413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:24.017886', 'step': 20413, 'epoch': 2}
{'type': 'loss', 'content': 0.12377186864614487, 'timestamp': '2025-10-02 00:47:24.020987', 'step': 20414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:24.077419', 'step': 20414, 'epoch': 2}
{'type': 'loss', 'content': 0.10176090896129608, 'timestamp': '2025-10-02 00:47:24.080873', 'step': 20415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:24.141015', 'step': 20415, 'epoch': 2}
{'type': 'loss', 'content': 0.08705481886863708, 'timestamp': '2025-10-02 00:47:24.147692', 'step': 20416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:24.204503', 'step': 20416, 'epoch': 2}
{'type': 'loss', 'content': 0.01774364896118641, 'timestamp': '2025-10-02 00:47:24.207355', 'step': 20417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:24.263235', 'step': 20417, 'epoch': 2}
{'type': 'loss', 'content': 0.05237248167395592, 'timestamp': '2025-10-02 00:47:24.270530', 'step': 20418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:24.329783', 'step': 20418, 'epoch': 2}
{'type': 'loss', 'content': 0.04986841604113579, 'timestamp': '2025-10-02 00:47:24.332242', 'step': 20419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:24.391539', 'step': 20419, 'epoch': 2}
{'type': 'loss', 'content': 0.054792001843452454, 'timestamp': '2025-10-02 00:47:24.398285', 'step': 20420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:24.455583', 'step': 20420, 'epoch': 2}
{'type': 'loss', 'content': 0.11725463718175888, 'timestamp': '2025-10-02 00:47:24.457877', 'step': 20421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:24.514326', 'step': 20421, 'epoch': 2}
{'type': 'loss', 'content': 0.06639490276575089, 'timestamp': '2025-10-02 00:47:24.521516', 'step': 20422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:24.577345', 'step': 20422, 'epoch': 2}
{'type': 'loss', 'content': 0.04095910117030144, 'timestamp': '2025-10-02 00:47:24.580329', 'step': 20423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:24.649701', 'step': 20423, 'epoch': 2}
{'type': 'loss', 'content': 0.04477393999695778, 'timestamp': '2025-10-02 00:47:24.661106', 'step': 20424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:24.715823', 'step': 20424, 'epoch': 2}
{'type': 'loss', 'content': 0.06663312762975693, 'timestamp': '2025-10-02 00:47:24.718867', 'step': 20425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:24.774086', 'step': 20425, 'epoch': 2}
{'type': 'loss', 'content': 0.14726966619491577, 'timestamp': '2025-10-02 00:47:24.779683', 'step': 20426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:24.834528', 'step': 20426, 'epoch': 2}
{'type': 'loss', 'content': 0.14852763712406158, 'timestamp': '2025-10-02 00:47:24.837470', 'step': 20427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:24.891316', 'step': 20427, 'epoch': 2}
{'type': 'loss', 'content': 0.13892845809459686, 'timestamp': '2025-10-02 00:47:24.897464', 'step': 20428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:24.951797', 'step': 20428, 'epoch': 2}
{'type': 'loss', 'content': 0.08887723088264465, 'timestamp': '2025-10-02 00:47:24.957508', 'step': 20429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:25.015329', 'step': 20429, 'epoch': 2}
{'type': 'loss', 'content': 0.15447545051574707, 'timestamp': '2025-10-02 00:47:25.017477', 'step': 20430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:25.071982', 'step': 20430, 'epoch': 2}
{'type': 'loss', 'content': 0.07409821450710297, 'timestamp': '2025-10-02 00:47:25.074339', 'step': 20431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:25.128700', 'step': 20431, 'epoch': 2}
{'type': 'loss', 'content': 0.06940147280693054, 'timestamp': '2025-10-02 00:47:25.138596', 'step': 20432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:25.192249', 'step': 20432, 'epoch': 2}
{'type': 'loss', 'content': 0.07892993092536926, 'timestamp': '2025-10-02 00:47:25.194241', 'step': 20433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:25.250526', 'step': 20433, 'epoch': 2}
{'type': 'loss', 'content': 0.02826792187988758, 'timestamp': '2025-10-02 00:47:25.252926', 'step': 20434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:25.308402', 'step': 20434, 'epoch': 2}
{'type': 'loss', 'content': 0.016637256368994713, 'timestamp': '2025-10-02 00:47:25.311637', 'step': 20435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:25.365677', 'step': 20435, 'epoch': 2}
{'type': 'loss', 'content': 0.08113043010234833, 'timestamp': '2025-10-02 00:47:25.372210', 'step': 20436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:25.427155', 'step': 20436, 'epoch': 2}
{'type': 'loss', 'content': 0.20454004406929016, 'timestamp': '2025-10-02 00:47:25.429816', 'step': 20437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:25.484417', 'step': 20437, 'epoch': 2}
{'type': 'loss', 'content': 0.04224532097578049, 'timestamp': '2025-10-02 00:47:25.491836', 'step': 20438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:25.547320', 'step': 20438, 'epoch': 2}
{'type': 'loss', 'content': 0.04994037374854088, 'timestamp': '2025-10-02 00:47:25.549474', 'step': 20439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:25.606404', 'step': 20439, 'epoch': 2}
{'type': 'loss', 'content': 0.056036874651908875, 'timestamp': '2025-10-02 00:47:25.612136', 'step': 20440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:25.666088', 'step': 20440, 'epoch': 2}
{'type': 'loss', 'content': 0.04592018574476242, 'timestamp': '2025-10-02 00:47:25.668916', 'step': 20441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:25.723691', 'step': 20441, 'epoch': 2}
{'type': 'loss', 'content': 0.09671026468276978, 'timestamp': '2025-10-02 00:47:25.725894', 'step': 20442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:25.780830', 'step': 20442, 'epoch': 2}
{'type': 'loss', 'content': 0.09459394216537476, 'timestamp': '2025-10-02 00:47:25.783132', 'step': 20443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:25.837105', 'step': 20443, 'epoch': 2}
{'type': 'loss', 'content': 0.0752563551068306, 'timestamp': '2025-10-02 00:47:25.847208', 'step': 20444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:25.901404', 'step': 20444, 'epoch': 2}
{'type': 'loss', 'content': 0.2086256593465805, 'timestamp': '2025-10-02 00:47:25.903733', 'step': 20445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:25.959365', 'step': 20445, 'epoch': 2}
{'type': 'loss', 'content': 0.010168543085455894, 'timestamp': '2025-10-02 00:47:25.966788', 'step': 20446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:26.021508', 'step': 20446, 'epoch': 2}
{'type': 'loss', 'content': 0.06567113846540451, 'timestamp': '2025-10-02 00:47:26.024325', 'step': 20447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:26.086058', 'step': 20447, 'epoch': 2}
{'type': 'loss', 'content': 0.12209832668304443, 'timestamp': '2025-10-02 00:47:26.097404', 'step': 20448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:26.152145', 'step': 20448, 'epoch': 2}
{'type': 'loss', 'content': 0.015794219449162483, 'timestamp': '2025-10-02 00:47:26.159474', 'step': 20449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:26.219702', 'step': 20449, 'epoch': 2}
{'type': 'loss', 'content': 0.01531720720231533, 'timestamp': '2025-10-02 00:47:26.229851', 'step': 20450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:26.284768', 'step': 20450, 'epoch': 2}
{'type': 'loss', 'content': 0.005417115055024624, 'timestamp': '2025-10-02 00:47:26.291671', 'step': 20451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:26.347205', 'step': 20451, 'epoch': 2}
{'type': 'loss', 'content': 0.09406262636184692, 'timestamp': '2025-10-02 00:47:26.353279', 'step': 20452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:26.407932', 'step': 20452, 'epoch': 2}
{'type': 'loss', 'content': 0.11761512607336044, 'timestamp': '2025-10-02 00:47:26.410338', 'step': 20453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:26.465344', 'step': 20453, 'epoch': 2}
{'type': 'loss', 'content': 0.04975181818008423, 'timestamp': '2025-10-02 00:47:26.468230', 'step': 20454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:26.523639', 'step': 20454, 'epoch': 2}
{'type': 'loss', 'content': 0.13292726874351501, 'timestamp': '2025-10-02 00:47:26.527735', 'step': 20455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:26.584326', 'step': 20455, 'epoch': 2}
{'type': 'loss', 'content': 0.06090851500630379, 'timestamp': '2025-10-02 00:47:26.590405', 'step': 20456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:26.646161', 'step': 20456, 'epoch': 2}
{'type': 'loss', 'content': 0.04350517690181732, 'timestamp': '2025-10-02 00:47:26.656403', 'step': 20457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:26.710557', 'step': 20457, 'epoch': 2}
{'type': 'loss', 'content': 0.10577142983675003, 'timestamp': '2025-10-02 00:47:26.712762', 'step': 20458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:26.769382', 'step': 20458, 'epoch': 2}
{'type': 'loss', 'content': 0.039053428918123245, 'timestamp': '2025-10-02 00:47:26.778913', 'step': 20459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:26.832719', 'step': 20459, 'epoch': 2}
{'type': 'loss', 'content': 0.1158389151096344, 'timestamp': '2025-10-02 00:47:26.839125', 'step': 20460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:26.893535', 'step': 20460, 'epoch': 2}
{'type': 'loss', 'content': 0.024657562375068665, 'timestamp': '2025-10-02 00:47:26.896216', 'step': 20461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:26.952733', 'step': 20461, 'epoch': 2}
{'type': 'loss', 'content': 0.050513118505477905, 'timestamp': '2025-10-02 00:47:26.955266', 'step': 20462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:27.010459', 'step': 20462, 'epoch': 2}
{'type': 'loss', 'content': 0.008134990930557251, 'timestamp': '2025-10-02 00:47:27.012965', 'step': 20463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:27.067784', 'step': 20463, 'epoch': 2}
{'type': 'loss', 'content': 0.04673474654555321, 'timestamp': '2025-10-02 00:47:27.073863', 'step': 20464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:47:27.139914', 'step': 20464, 'epoch': 2}
{'type': 'loss', 'content': 0.01989026367664337, 'timestamp': '2025-10-02 00:47:27.152847', 'step': 20465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:27.207143', 'step': 20465, 'epoch': 2}
{'type': 'loss', 'content': 0.035201750695705414, 'timestamp': '2025-10-02 00:47:27.214184', 'step': 20466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:27.269169', 'step': 20466, 'epoch': 2}
{'type': 'loss', 'content': 0.029398184269666672, 'timestamp': '2025-10-02 00:47:27.271931', 'step': 20467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:27.326318', 'step': 20467, 'epoch': 2}
{'type': 'loss', 'content': 0.07811953127384186, 'timestamp': '2025-10-02 00:47:27.333172', 'step': 20468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:27.387819', 'step': 20468, 'epoch': 2}
{'type': 'loss', 'content': 0.05096105486154556, 'timestamp': '2025-10-02 00:47:27.390235', 'step': 20469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:27.445077', 'step': 20469, 'epoch': 2}
{'type': 'loss', 'content': 0.039025455713272095, 'timestamp': '2025-10-02 00:47:27.452512', 'step': 20470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:27.508923', 'step': 20470, 'epoch': 2}
{'type': 'loss', 'content': 0.007968808524310589, 'timestamp': '2025-10-02 00:47:27.515859', 'step': 20471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:27.570721', 'step': 20471, 'epoch': 2}
{'type': 'loss', 'content': 0.08820852637290955, 'timestamp': '2025-10-02 00:47:27.580488', 'step': 20472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:27.635840', 'step': 20472, 'epoch': 2}
{'type': 'loss', 'content': 0.027254031971096992, 'timestamp': '2025-10-02 00:47:27.646184', 'step': 20473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:27.710274', 'step': 20473, 'epoch': 2}
{'type': 'loss', 'content': 0.001953959232196212, 'timestamp': '2025-10-02 00:47:27.720946', 'step': 20474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:27.776533', 'step': 20474, 'epoch': 2}
{'type': 'loss', 'content': 0.050547853112220764, 'timestamp': '2025-10-02 00:47:27.779582', 'step': 20475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:27.835049', 'step': 20475, 'epoch': 2}
{'type': 'loss', 'content': 0.08635331690311432, 'timestamp': '2025-10-02 00:47:27.841406', 'step': 20476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:27.897040', 'step': 20476, 'epoch': 2}
{'type': 'loss', 'content': 0.027175752446055412, 'timestamp': '2025-10-02 00:47:27.902755', 'step': 20477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:27.957942', 'step': 20477, 'epoch': 2}
{'type': 'loss', 'content': 0.045872729271650314, 'timestamp': '2025-10-02 00:47:27.960437', 'step': 20478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:28.015681', 'step': 20478, 'epoch': 2}
{'type': 'loss', 'content': 0.055926937609910965, 'timestamp': '2025-10-02 00:47:28.025203', 'step': 20479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:28.080452', 'step': 20479, 'epoch': 2}
{'type': 'loss', 'content': 0.1056613102555275, 'timestamp': '2025-10-02 00:47:28.086801', 'step': 20480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:28.141102', 'step': 20480, 'epoch': 2}
{'type': 'loss', 'content': 0.05483954772353172, 'timestamp': '2025-10-02 00:47:28.143561', 'step': 20481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:28.198258', 'step': 20481, 'epoch': 2}
{'type': 'loss', 'content': 0.057086456567049026, 'timestamp': '2025-10-02 00:47:28.205562', 'step': 20482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:28.261643', 'step': 20482, 'epoch': 2}
{'type': 'loss', 'content': 0.08543410152196884, 'timestamp': '2025-10-02 00:47:28.264208', 'step': 20483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:28.318839', 'step': 20483, 'epoch': 2}
{'type': 'loss', 'content': 0.05482220649719238, 'timestamp': '2025-10-02 00:47:28.329404', 'step': 20484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:28.386341', 'step': 20484, 'epoch': 2}
{'type': 'loss', 'content': 0.04137802869081497, 'timestamp': '2025-10-02 00:47:28.389373', 'step': 20485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:28.443812', 'step': 20485, 'epoch': 2}
{'type': 'loss', 'content': 0.15587814152240753, 'timestamp': '2025-10-02 00:47:28.450541', 'step': 20486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:28.509287', 'step': 20486, 'epoch': 2}
{'type': 'loss', 'content': 0.01634540781378746, 'timestamp': '2025-10-02 00:47:28.511819', 'step': 20487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:28.566399', 'step': 20487, 'epoch': 2}
{'type': 'loss', 'content': 0.06933403760194778, 'timestamp': '2025-10-02 00:47:28.573578', 'step': 20488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:28.627090', 'step': 20488, 'epoch': 2}
{'type': 'loss', 'content': 0.1051492914557457, 'timestamp': '2025-10-02 00:47:28.629644', 'step': 20489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:28.684315', 'step': 20489, 'epoch': 2}
{'type': 'loss', 'content': 0.1643625795841217, 'timestamp': '2025-10-02 00:47:28.689364', 'step': 20490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:28.747243', 'step': 20490, 'epoch': 2}
{'type': 'loss', 'content': 0.019087472930550575, 'timestamp': '2025-10-02 00:47:28.754311', 'step': 20491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:28.809790', 'step': 20491, 'epoch': 2}
{'type': 'loss', 'content': 0.02484382502734661, 'timestamp': '2025-10-02 00:47:28.815811', 'step': 20492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:28.876903', 'step': 20492, 'epoch': 2}
{'type': 'loss', 'content': 0.02818281389772892, 'timestamp': '2025-10-02 00:47:28.888441', 'step': 20493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:28.942419', 'step': 20493, 'epoch': 2}
{'type': 'loss', 'content': 0.11988858133554459, 'timestamp': '2025-10-02 00:47:28.944979', 'step': 20494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:29.005785', 'step': 20494, 'epoch': 2}
{'type': 'loss', 'content': 0.03179952874779701, 'timestamp': '2025-10-02 00:47:29.009575', 'step': 20495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:29.065300', 'step': 20495, 'epoch': 2}
{'type': 'loss', 'content': 0.1977701038122177, 'timestamp': '2025-10-02 00:47:29.088518', 'step': 20496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:29.144534', 'step': 20496, 'epoch': 2}
{'type': 'loss', 'content': 0.0025443958584219217, 'timestamp': '2025-10-02 00:47:29.153928', 'step': 20497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:29.209456', 'step': 20497, 'epoch': 2}
{'type': 'loss', 'content': 0.04857617989182472, 'timestamp': '2025-10-02 00:47:29.212210', 'step': 20498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:47:29.284728', 'step': 20498, 'epoch': 2}
{'type': 'loss', 'content': 0.012499537318944931, 'timestamp': '2025-10-02 00:47:29.297220', 'step': 20499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:29.353893', 'step': 20499, 'epoch': 2}
{'type': 'loss', 'content': 0.05839107185602188, 'timestamp': '2025-10-02 00:47:29.361865', 'step': 20500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 20500', 'timestamp': '2025-10-02 00:47:29.775409', 'step': 20500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:29.828331', 'step': 20500, 'epoch': 2}
{'type': 'loss', 'content': 0.0954379215836525, 'timestamp': '2025-10-02 00:47:29.836866', 'step': 20501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:29.893734', 'step': 20501, 'epoch': 2}
{'type': 'loss', 'content': 0.10988415032625198, 'timestamp': '2025-10-02 00:47:29.896128', 'step': 20502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:29.951381', 'step': 20502, 'epoch': 2}
{'type': 'loss', 'content': 0.10269921272993088, 'timestamp': '2025-10-02 00:47:29.953825', 'step': 20503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:30.010294', 'step': 20503, 'epoch': 2}
{'type': 'loss', 'content': 0.07112151384353638, 'timestamp': '2025-10-02 00:47:30.016369', 'step': 20504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:30.069791', 'step': 20504, 'epoch': 2}
{'type': 'loss', 'content': 0.0937265008687973, 'timestamp': '2025-10-02 00:47:30.072490', 'step': 20505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:30.126040', 'step': 20505, 'epoch': 2}
{'type': 'loss', 'content': 0.09150903671979904, 'timestamp': '2025-10-02 00:47:30.128660', 'step': 20506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:30.182724', 'step': 20506, 'epoch': 2}
{'type': 'loss', 'content': 0.09449037164449692, 'timestamp': '2025-10-02 00:47:30.185385', 'step': 20507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:30.240318', 'step': 20507, 'epoch': 2}
{'type': 'loss', 'content': 0.09142337739467621, 'timestamp': '2025-10-02 00:47:30.246510', 'step': 20508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:30.301368', 'step': 20508, 'epoch': 2}
{'type': 'loss', 'content': 0.0314021110534668, 'timestamp': '2025-10-02 00:47:30.307268', 'step': 20509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:30.362366', 'step': 20509, 'epoch': 2}
{'type': 'loss', 'content': 0.1323906034231186, 'timestamp': '2025-10-02 00:47:30.364928', 'step': 20510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:30.420463', 'step': 20510, 'epoch': 2}
{'type': 'loss', 'content': 0.0018210613634437323, 'timestamp': '2025-10-02 00:47:30.423416', 'step': 20511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:30.478380', 'step': 20511, 'epoch': 2}
{'type': 'loss', 'content': 0.06861213594675064, 'timestamp': '2025-10-02 00:47:30.484777', 'step': 20512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:30.539370', 'step': 20512, 'epoch': 2}
{'type': 'loss', 'content': 0.08598348498344421, 'timestamp': '2025-10-02 00:47:30.541963', 'step': 20513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:30.596151', 'step': 20513, 'epoch': 2}
{'type': 'loss', 'content': 0.12300260365009308, 'timestamp': '2025-10-02 00:47:30.598220', 'step': 20514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:30.653792', 'step': 20514, 'epoch': 2}
{'type': 'loss', 'content': 0.018807213753461838, 'timestamp': '2025-10-02 00:47:30.663298', 'step': 20515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:30.721772', 'step': 20515, 'epoch': 2}
{'type': 'loss', 'content': 0.09025269746780396, 'timestamp': '2025-10-02 00:47:30.728890', 'step': 20516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:47:30.790950', 'step': 20516, 'epoch': 2}
{'type': 'loss', 'content': 0.04587633162736893, 'timestamp': '2025-10-02 00:47:30.802726', 'step': 20517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:30.860380', 'step': 20517, 'epoch': 2}
{'type': 'loss', 'content': 0.03143223375082016, 'timestamp': '2025-10-02 00:47:30.869895', 'step': 20518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:30.926543', 'step': 20518, 'epoch': 2}
{'type': 'loss', 'content': 0.08360879868268967, 'timestamp': '2025-10-02 00:47:30.928981', 'step': 20519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:30.983314', 'step': 20519, 'epoch': 2}
{'type': 'loss', 'content': 0.054838817566633224, 'timestamp': '2025-10-02 00:47:30.989587', 'step': 20520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:31.044266', 'step': 20520, 'epoch': 2}
{'type': 'loss', 'content': 0.04766048491001129, 'timestamp': '2025-10-02 00:47:31.046773', 'step': 20521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:31.101125', 'step': 20521, 'epoch': 2}
{'type': 'loss', 'content': 0.061169933527708054, 'timestamp': '2025-10-02 00:47:31.107134', 'step': 20522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:31.162218', 'step': 20522, 'epoch': 2}
{'type': 'loss', 'content': 0.02565455250442028, 'timestamp': '2025-10-02 00:47:31.168072', 'step': 20523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:31.226351', 'step': 20523, 'epoch': 2}
{'type': 'loss', 'content': 0.02943234145641327, 'timestamp': '2025-10-02 00:47:31.234673', 'step': 20524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:31.290221', 'step': 20524, 'epoch': 2}
{'type': 'loss', 'content': 0.11184149235486984, 'timestamp': '2025-10-02 00:47:31.292936', 'step': 20525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:31.348125', 'step': 20525, 'epoch': 2}
{'type': 'loss', 'content': 0.04505064710974693, 'timestamp': '2025-10-02 00:47:31.350726', 'step': 20526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:31.413886', 'step': 20526, 'epoch': 2}
{'type': 'loss', 'content': 0.11009306460618973, 'timestamp': '2025-10-02 00:47:31.417489', 'step': 20527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:31.475633', 'step': 20527, 'epoch': 2}
{'type': 'loss', 'content': 0.031928420066833496, 'timestamp': '2025-10-02 00:47:31.485909', 'step': 20528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:31.544213', 'step': 20528, 'epoch': 2}
{'type': 'loss', 'content': 0.0819200873374939, 'timestamp': '2025-10-02 00:47:31.550991', 'step': 20529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:31.610611', 'step': 20529, 'epoch': 2}
{'type': 'loss', 'content': 0.04850439727306366, 'timestamp': '2025-10-02 00:47:31.612797', 'step': 20530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:31.672262', 'step': 20530, 'epoch': 2}
{'type': 'loss', 'content': 0.020948711782693863, 'timestamp': '2025-10-02 00:47:31.677333', 'step': 20531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:47:31.750485', 'step': 20531, 'epoch': 2}
{'type': 'loss', 'content': 0.028979388996958733, 'timestamp': '2025-10-02 00:47:31.763904', 'step': 20532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:31.819744', 'step': 20532, 'epoch': 2}
{'type': 'loss', 'content': 0.12405364215373993, 'timestamp': '2025-10-02 00:47:31.822018', 'step': 20533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:31.876196', 'step': 20533, 'epoch': 2}
{'type': 'loss', 'content': 0.14869630336761475, 'timestamp': '2025-10-02 00:47:31.881003', 'step': 20534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:31.941611', 'step': 20534, 'epoch': 2}
{'type': 'loss', 'content': 0.05421631783246994, 'timestamp': '2025-10-02 00:47:31.947484', 'step': 20535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:32.000984', 'step': 20535, 'epoch': 2}
{'type': 'loss', 'content': 0.08975772559642792, 'timestamp': '2025-10-02 00:47:32.007068', 'step': 20536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:32.061234', 'step': 20536, 'epoch': 2}
{'type': 'loss', 'content': 0.049165189266204834, 'timestamp': '2025-10-02 00:47:32.063805', 'step': 20537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:32.120082', 'step': 20537, 'epoch': 2}
{'type': 'loss', 'content': 0.04629272222518921, 'timestamp': '2025-10-02 00:47:32.126128', 'step': 20538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:32.184837', 'step': 20538, 'epoch': 2}
{'type': 'loss', 'content': 0.034051358699798584, 'timestamp': '2025-10-02 00:47:32.191363', 'step': 20539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:32.266377', 'step': 20539, 'epoch': 2}
{'type': 'loss', 'content': 0.11006395518779755, 'timestamp': '2025-10-02 00:47:32.273878', 'step': 20540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:32.333608', 'step': 20540, 'epoch': 2}
{'type': 'loss', 'content': 0.09171374887228012, 'timestamp': '2025-10-02 00:47:32.345715', 'step': 20541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:32.405224', 'step': 20541, 'epoch': 2}
{'type': 'loss', 'content': 0.036823172122240067, 'timestamp': '2025-10-02 00:47:32.408138', 'step': 20542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:32.465291', 'step': 20542, 'epoch': 2}
{'type': 'loss', 'content': 0.05467204749584198, 'timestamp': '2025-10-02 00:47:32.468295', 'step': 20543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:32.523515', 'step': 20543, 'epoch': 2}
{'type': 'loss', 'content': 0.07340943068265915, 'timestamp': '2025-10-02 00:47:32.531828', 'step': 20544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:32.603259', 'step': 20544, 'epoch': 2}
{'type': 'loss', 'content': 0.09846528619527817, 'timestamp': '2025-10-02 00:47:32.612512', 'step': 20545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:32.683257', 'step': 20545, 'epoch': 2}
{'type': 'loss', 'content': 0.1269310563802719, 'timestamp': '2025-10-02 00:47:32.686253', 'step': 20546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:32.743122', 'step': 20546, 'epoch': 2}
{'type': 'loss', 'content': 0.10473565757274628, 'timestamp': '2025-10-02 00:47:32.745507', 'step': 20547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:32.801360', 'step': 20547, 'epoch': 2}
{'type': 'loss', 'content': 0.05806621164083481, 'timestamp': '2025-10-02 00:47:32.811520', 'step': 20548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:32.866777', 'step': 20548, 'epoch': 2}
{'type': 'loss', 'content': 0.060121871531009674, 'timestamp': '2025-10-02 00:47:32.870745', 'step': 20549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:32.934790', 'step': 20549, 'epoch': 2}
{'type': 'loss', 'content': 0.04397298023104668, 'timestamp': '2025-10-02 00:47:32.944131', 'step': 20550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:33.001758', 'step': 20550, 'epoch': 2}
{'type': 'loss', 'content': 0.062087975442409515, 'timestamp': '2025-10-02 00:47:33.007585', 'step': 20551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:33.063127', 'step': 20551, 'epoch': 2}
{'type': 'loss', 'content': 0.026513205841183662, 'timestamp': '2025-10-02 00:47:33.069504', 'step': 20552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:33.122717', 'step': 20552, 'epoch': 2}
{'type': 'loss', 'content': 0.15670187771320343, 'timestamp': '2025-10-02 00:47:33.126529', 'step': 20553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:33.183021', 'step': 20553, 'epoch': 2}
{'type': 'loss', 'content': 0.03250005841255188, 'timestamp': '2025-10-02 00:47:33.192583', 'step': 20554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:33.251093', 'step': 20554, 'epoch': 2}
{'type': 'loss', 'content': 0.056805964559316635, 'timestamp': '2025-10-02 00:47:33.253788', 'step': 20555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:47:33.309934', 'step': 20555, 'epoch': 2}
{'type': 'loss', 'content': 0.19700780510902405, 'timestamp': '2025-10-02 00:47:33.316353', 'step': 20556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:33.373928', 'step': 20556, 'epoch': 2}
{'type': 'loss', 'content': 0.0669153481721878, 'timestamp': '2025-10-02 00:47:33.379962', 'step': 20557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:33.437280', 'step': 20557, 'epoch': 2}
{'type': 'loss', 'content': 0.1466188132762909, 'timestamp': '2025-10-02 00:47:33.439732', 'step': 20558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:33.496494', 'step': 20558, 'epoch': 2}
{'type': 'loss', 'content': 0.05504550412297249, 'timestamp': '2025-10-02 00:47:33.503988', 'step': 20559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:33.565812', 'step': 20559, 'epoch': 2}
{'type': 'loss', 'content': 0.06615139544010162, 'timestamp': '2025-10-02 00:47:33.576698', 'step': 20560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:33.632582', 'step': 20560, 'epoch': 2}
{'type': 'loss', 'content': 0.03298742696642876, 'timestamp': '2025-10-02 00:47:33.638520', 'step': 20561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:33.696202', 'step': 20561, 'epoch': 2}
{'type': 'loss', 'content': 0.019400864839553833, 'timestamp': '2025-10-02 00:47:33.701489', 'step': 20562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:33.759657', 'step': 20562, 'epoch': 2}
{'type': 'loss', 'content': 0.05017968267202377, 'timestamp': '2025-10-02 00:47:33.763363', 'step': 20563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:33.820836', 'step': 20563, 'epoch': 2}
{'type': 'loss', 'content': 0.02039892040193081, 'timestamp': '2025-10-02 00:47:33.830950', 'step': 20564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:33.886612', 'step': 20564, 'epoch': 2}
{'type': 'loss', 'content': 0.0742727667093277, 'timestamp': '2025-10-02 00:47:33.889661', 'step': 20565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:33.945076', 'step': 20565, 'epoch': 2}
{'type': 'loss', 'content': 0.11965146660804749, 'timestamp': '2025-10-02 00:47:33.948650', 'step': 20566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:34.004738', 'step': 20566, 'epoch': 2}
{'type': 'loss', 'content': 0.035429686307907104, 'timestamp': '2025-10-02 00:47:34.010670', 'step': 20567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:34.068996', 'step': 20567, 'epoch': 2}
{'type': 'loss', 'content': 0.02025308459997177, 'timestamp': '2025-10-02 00:47:34.079278', 'step': 20568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:34.136647', 'step': 20568, 'epoch': 2}
{'type': 'loss', 'content': 0.026477331295609474, 'timestamp': '2025-10-02 00:47:34.139465', 'step': 20569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:34.198815', 'step': 20569, 'epoch': 2}
{'type': 'loss', 'content': 0.029809361323714256, 'timestamp': '2025-10-02 00:47:34.208297', 'step': 20570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:47:34.272124', 'step': 20570, 'epoch': 2}
{'type': 'loss', 'content': 0.046155352145433426, 'timestamp': '2025-10-02 00:47:34.282981', 'step': 20571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:34.338846', 'step': 20571, 'epoch': 2}
{'type': 'loss', 'content': 0.053529929369688034, 'timestamp': '2025-10-02 00:47:34.345679', 'step': 20572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:34.410251', 'step': 20572, 'epoch': 2}
{'type': 'loss', 'content': 0.007801446598023176, 'timestamp': '2025-10-02 00:47:34.421778', 'step': 20573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:34.489357', 'step': 20573, 'epoch': 2}
{'type': 'loss', 'content': 0.06616664677858353, 'timestamp': '2025-10-02 00:47:34.497591', 'step': 20574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:34.554642', 'step': 20574, 'epoch': 2}
{'type': 'loss', 'content': 0.10780847072601318, 'timestamp': '2025-10-02 00:47:34.558156', 'step': 20575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:34.615545', 'step': 20575, 'epoch': 2}
{'type': 'loss', 'content': 0.0463407039642334, 'timestamp': '2025-10-02 00:47:34.622352', 'step': 20576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:34.678717', 'step': 20576, 'epoch': 2}
{'type': 'loss', 'content': 0.06521368026733398, 'timestamp': '2025-10-02 00:47:34.683819', 'step': 20577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:34.742194', 'step': 20577, 'epoch': 2}
{'type': 'loss', 'content': 0.09367620944976807, 'timestamp': '2025-10-02 00:47:34.745459', 'step': 20578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:34.801134', 'step': 20578, 'epoch': 2}
{'type': 'loss', 'content': 0.10699141025543213, 'timestamp': '2025-10-02 00:47:34.803657', 'step': 20579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:34.862015', 'step': 20579, 'epoch': 2}
{'type': 'loss', 'content': 0.06839136779308319, 'timestamp': '2025-10-02 00:47:34.869021', 'step': 20580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:34.925459', 'step': 20580, 'epoch': 2}
{'type': 'loss', 'content': 0.039210401475429535, 'timestamp': '2025-10-02 00:47:34.931392', 'step': 20581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:34.988495', 'step': 20581, 'epoch': 2}
{'type': 'loss', 'content': 0.1482163816690445, 'timestamp': '2025-10-02 00:47:34.991126', 'step': 20582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:35.047184', 'step': 20582, 'epoch': 2}
{'type': 'loss', 'content': 0.09804093092679977, 'timestamp': '2025-10-02 00:47:35.052980', 'step': 20583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:35.114247', 'step': 20583, 'epoch': 2}
{'type': 'loss', 'content': 0.0072806235402822495, 'timestamp': '2025-10-02 00:47:35.125230', 'step': 20584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:47:35.194943', 'step': 20584, 'epoch': 2}
{'type': 'loss', 'content': 0.004393471870571375, 'timestamp': '2025-10-02 00:47:35.208375', 'step': 20585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:35.266939', 'step': 20585, 'epoch': 2}
{'type': 'loss', 'content': 0.055413663387298584, 'timestamp': '2025-10-02 00:47:35.270707', 'step': 20586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:35.326861', 'step': 20586, 'epoch': 2}
{'type': 'loss', 'content': 0.01380821131169796, 'timestamp': '2025-10-02 00:47:35.329613', 'step': 20587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:35.392044', 'step': 20587, 'epoch': 2}
{'type': 'loss', 'content': 0.03345043957233429, 'timestamp': '2025-10-02 00:47:35.403269', 'step': 20588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:35.456463', 'step': 20588, 'epoch': 2}
{'type': 'loss', 'content': 0.044551506638526917, 'timestamp': '2025-10-02 00:47:35.459117', 'step': 20589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:35.513382', 'step': 20589, 'epoch': 2}
{'type': 'loss', 'content': 0.11956614255905151, 'timestamp': '2025-10-02 00:47:35.515893', 'step': 20590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:35.570528', 'step': 20590, 'epoch': 2}
{'type': 'loss', 'content': 0.09824449568986893, 'timestamp': '2025-10-02 00:47:35.573120', 'step': 20591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:35.628057', 'step': 20591, 'epoch': 2}
{'type': 'loss', 'content': 0.039126474410295486, 'timestamp': '2025-10-02 00:47:35.634745', 'step': 20592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:35.689695', 'step': 20592, 'epoch': 2}
{'type': 'loss', 'content': 0.10777547955513, 'timestamp': '2025-10-02 00:47:35.692511', 'step': 20593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:35.748807', 'step': 20593, 'epoch': 2}
{'type': 'loss', 'content': 0.142804816365242, 'timestamp': '2025-10-02 00:47:35.751448', 'step': 20594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:35.806984', 'step': 20594, 'epoch': 2}
{'type': 'loss', 'content': 0.02069348469376564, 'timestamp': '2025-10-02 00:47:35.814178', 'step': 20595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:35.868844', 'step': 20595, 'epoch': 2}
{'type': 'loss', 'content': 0.0104243578389287, 'timestamp': '2025-10-02 00:47:35.876838', 'step': 20596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:35.932906', 'step': 20596, 'epoch': 2}
{'type': 'loss', 'content': 0.025971397757530212, 'timestamp': '2025-10-02 00:47:35.942184', 'step': 20597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:35.997679', 'step': 20597, 'epoch': 2}
{'type': 'loss', 'content': 0.020164839923381805, 'timestamp': '2025-10-02 00:47:36.003375', 'step': 20598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:36.059093', 'step': 20598, 'epoch': 2}
{'type': 'loss', 'content': 0.03743808716535568, 'timestamp': '2025-10-02 00:47:36.061767', 'step': 20599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:36.115954', 'step': 20599, 'epoch': 2}
{'type': 'loss', 'content': 0.058966364711523056, 'timestamp': '2025-10-02 00:47:36.122465', 'step': 20600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:36.176506', 'step': 20600, 'epoch': 2}
{'type': 'loss', 'content': 0.04370315000414848, 'timestamp': '2025-10-02 00:47:36.178897', 'step': 20601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:36.233527', 'step': 20601, 'epoch': 2}
{'type': 'loss', 'content': 0.18676799535751343, 'timestamp': '2025-10-02 00:47:36.236495', 'step': 20602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:36.300224', 'step': 20602, 'epoch': 2}
{'type': 'loss', 'content': 0.06910669803619385, 'timestamp': '2025-10-02 00:47:36.310863', 'step': 20603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:36.365547', 'step': 20603, 'epoch': 2}
{'type': 'loss', 'content': 0.060267336666584015, 'timestamp': '2025-10-02 00:47:36.371432', 'step': 20604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:36.425908', 'step': 20604, 'epoch': 2}
{'type': 'loss', 'content': 0.023218508809804916, 'timestamp': '2025-10-02 00:47:36.428458', 'step': 20605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:47:36.482637', 'step': 20605, 'epoch': 2}
{'type': 'loss', 'content': 0.24691353738307953, 'timestamp': '2025-10-02 00:47:36.485085', 'step': 20606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:36.540024', 'step': 20606, 'epoch': 2}
{'type': 'loss', 'content': 0.10761725902557373, 'timestamp': '2025-10-02 00:47:36.545600', 'step': 20607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:36.601119', 'step': 20607, 'epoch': 2}
{'type': 'loss', 'content': 0.12486087530851364, 'timestamp': '2025-10-02 00:47:36.606820', 'step': 20608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:47:36.661064', 'step': 20608, 'epoch': 2}
{'type': 'loss', 'content': 0.10252483934164047, 'timestamp': '2025-10-02 00:47:36.663684', 'step': 20609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:36.722317', 'step': 20609, 'epoch': 2}
{'type': 'loss', 'content': 0.01595112681388855, 'timestamp': '2025-10-02 00:47:36.731843', 'step': 20610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:36.794291', 'step': 20610, 'epoch': 2}
{'type': 'loss', 'content': 0.047790661454200745, 'timestamp': '2025-10-02 00:47:36.804764', 'step': 20611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:36.866635', 'step': 20611, 'epoch': 2}
{'type': 'loss', 'content': 0.011557248421013355, 'timestamp': '2025-10-02 00:47:36.877843', 'step': 20612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:36.939602', 'step': 20612, 'epoch': 2}
{'type': 'loss', 'content': 0.01085567008703947, 'timestamp': '2025-10-02 00:47:36.950926', 'step': 20613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:37.007365', 'step': 20613, 'epoch': 2}
{'type': 'loss', 'content': 0.013632838614284992, 'timestamp': '2025-10-02 00:47:37.009717', 'step': 20614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:37.064495', 'step': 20614, 'epoch': 2}
{'type': 'loss', 'content': 0.11228292435407639, 'timestamp': '2025-10-02 00:47:37.067016', 'step': 20615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:47:37.124266', 'step': 20615, 'epoch': 2}
{'type': 'loss', 'content': 0.052959319204092026, 'timestamp': '2025-10-02 00:47:37.130446', 'step': 20616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:37.185144', 'step': 20616, 'epoch': 2}
{'type': 'loss', 'content': 0.07506678253412247, 'timestamp': '2025-10-02 00:47:37.190782', 'step': 20617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:37.245191', 'step': 20617, 'epoch': 2}
{'type': 'loss', 'content': 0.103494793176651, 'timestamp': '2025-10-02 00:47:37.247929', 'step': 20618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:37.302742', 'step': 20618, 'epoch': 2}
{'type': 'loss', 'content': 0.09613634645938873, 'timestamp': '2025-10-02 00:47:37.309794', 'step': 20619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:37.365573', 'step': 20619, 'epoch': 2}
{'type': 'loss', 'content': 0.04004116356372833, 'timestamp': '2025-10-02 00:47:37.371888', 'step': 20620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:37.428176', 'step': 20620, 'epoch': 2}
{'type': 'loss', 'content': 0.11690235137939453, 'timestamp': '2025-10-02 00:47:37.430614', 'step': 20621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:37.485570', 'step': 20621, 'epoch': 2}
{'type': 'loss', 'content': 0.12691304087638855, 'timestamp': '2025-10-02 00:47:37.488609', 'step': 20622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:37.543194', 'step': 20622, 'epoch': 2}
{'type': 'loss', 'content': 0.0355270653963089, 'timestamp': '2025-10-02 00:47:37.545721', 'step': 20623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:37.601575', 'step': 20623, 'epoch': 2}
{'type': 'loss', 'content': 0.0331881046295166, 'timestamp': '2025-10-02 00:47:37.611571', 'step': 20624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:37.666917', 'step': 20624, 'epoch': 2}
{'type': 'loss', 'content': 0.1646517664194107, 'timestamp': '2025-10-02 00:47:37.669385', 'step': 20625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:47:37.731715', 'step': 20625, 'epoch': 2}
{'type': 'loss', 'content': 0.06993978470563889, 'timestamp': '2025-10-02 00:47:37.742516', 'step': 20626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:37.799108', 'step': 20626, 'epoch': 2}
{'type': 'loss', 'content': 0.019451845437288284, 'timestamp': '2025-10-02 00:47:37.808629', 'step': 20627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:37.863531', 'step': 20627, 'epoch': 2}
{'type': 'loss', 'content': 0.034327030181884766, 'timestamp': '2025-10-02 00:47:37.870754', 'step': 20628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:37.925243', 'step': 20628, 'epoch': 2}
{'type': 'loss', 'content': 0.048861004412174225, 'timestamp': '2025-10-02 00:47:37.930825', 'step': 20629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:37.993307', 'step': 20629, 'epoch': 2}
{'type': 'loss', 'content': 0.06313268095254898, 'timestamp': '2025-10-02 00:47:38.003757', 'step': 20630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:38.058326', 'step': 20630, 'epoch': 2}
{'type': 'loss', 'content': 0.03946591913700104, 'timestamp': '2025-10-02 00:47:38.060876', 'step': 20631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:38.116392', 'step': 20631, 'epoch': 2}
{'type': 'loss', 'content': 0.09456316381692886, 'timestamp': '2025-10-02 00:47:38.122462', 'step': 20632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:38.177379', 'step': 20632, 'epoch': 2}
{'type': 'loss', 'content': 0.03654816746711731, 'timestamp': '2025-10-02 00:47:38.180632', 'step': 20633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:38.235632', 'step': 20633, 'epoch': 2}
{'type': 'loss', 'content': 0.03251075744628906, 'timestamp': '2025-10-02 00:47:38.239242', 'step': 20634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:38.297660', 'step': 20634, 'epoch': 2}
{'type': 'loss', 'content': 0.0542188361287117, 'timestamp': '2025-10-02 00:47:38.300297', 'step': 20635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:38.356992', 'step': 20635, 'epoch': 2}
{'type': 'loss', 'content': 0.0518084391951561, 'timestamp': '2025-10-02 00:47:38.363272', 'step': 20636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:38.419787', 'step': 20636, 'epoch': 2}
{'type': 'loss', 'content': 0.026477567851543427, 'timestamp': '2025-10-02 00:47:38.429078', 'step': 20637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:38.484403', 'step': 20637, 'epoch': 2}
{'type': 'loss', 'content': 0.02997145801782608, 'timestamp': '2025-10-02 00:47:38.486814', 'step': 20638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:38.542896', 'step': 20638, 'epoch': 2}
{'type': 'loss', 'content': 0.02303771674633026, 'timestamp': '2025-10-02 00:47:38.545412', 'step': 20639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:38.600238', 'step': 20639, 'epoch': 2}
{'type': 'loss', 'content': 0.026543617248535156, 'timestamp': '2025-10-02 00:47:38.606461', 'step': 20640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:38.660434', 'step': 20640, 'epoch': 2}
{'type': 'loss', 'content': 0.0830100029706955, 'timestamp': '2025-10-02 00:47:38.663375', 'step': 20641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:38.718359', 'step': 20641, 'epoch': 2}
{'type': 'loss', 'content': 0.09705344587564468, 'timestamp': '2025-10-02 00:47:38.720868', 'step': 20642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:38.775436', 'step': 20642, 'epoch': 2}
{'type': 'loss', 'content': 0.0955965518951416, 'timestamp': '2025-10-02 00:47:38.779203', 'step': 20643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:38.838958', 'step': 20643, 'epoch': 2}
{'type': 'loss', 'content': 0.013508692383766174, 'timestamp': '2025-10-02 00:47:38.849884', 'step': 20644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:38.909884', 'step': 20644, 'epoch': 2}
{'type': 'loss', 'content': 0.028394976630806923, 'timestamp': '2025-10-02 00:47:38.921176', 'step': 20645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:38.976403', 'step': 20645, 'epoch': 2}
{'type': 'loss', 'content': 0.044431354850530624, 'timestamp': '2025-10-02 00:47:38.981697', 'step': 20646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:39.037374', 'step': 20646, 'epoch': 2}
{'type': 'loss', 'content': 0.08059249818325043, 'timestamp': '2025-10-02 00:47:39.044509', 'step': 20647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:39.100635', 'step': 20647, 'epoch': 2}
{'type': 'loss', 'content': 0.044648487120866776, 'timestamp': '2025-10-02 00:47:39.110901', 'step': 20648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:39.165118', 'step': 20648, 'epoch': 2}
{'type': 'loss', 'content': 0.1579262912273407, 'timestamp': '2025-10-02 00:47:39.167881', 'step': 20649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:39.222157', 'step': 20649, 'epoch': 2}
{'type': 'loss', 'content': 0.07949186861515045, 'timestamp': '2025-10-02 00:47:39.224616', 'step': 20650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:39.284037', 'step': 20650, 'epoch': 2}
{'type': 'loss', 'content': 0.0102195143699646, 'timestamp': '2025-10-02 00:47:39.294170', 'step': 20651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:47:39.357313', 'step': 20651, 'epoch': 2}
{'type': 'loss', 'content': 0.008721432648599148, 'timestamp': '2025-10-02 00:47:39.368896', 'step': 20652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:39.422858', 'step': 20652, 'epoch': 2}
{'type': 'loss', 'content': 0.09426003694534302, 'timestamp': '2025-10-02 00:47:39.425353', 'step': 20653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:39.484695', 'step': 20653, 'epoch': 2}
{'type': 'loss', 'content': 0.06074962019920349, 'timestamp': '2025-10-02 00:47:39.494780', 'step': 20654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:39.557220', 'step': 20654, 'epoch': 2}
{'type': 'loss', 'content': 0.006798776797950268, 'timestamp': '2025-10-02 00:47:39.567660', 'step': 20655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:39.622840', 'step': 20655, 'epoch': 2}
{'type': 'loss', 'content': 0.034205060452222824, 'timestamp': '2025-10-02 00:47:39.631201', 'step': 20656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:39.685293', 'step': 20656, 'epoch': 2}
{'type': 'loss', 'content': 0.03872281685471535, 'timestamp': '2025-10-02 00:47:39.688593', 'step': 20657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:39.743232', 'step': 20657, 'epoch': 2}
{'type': 'loss', 'content': 0.039033036679029465, 'timestamp': '2025-10-02 00:47:39.746116', 'step': 20658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:39.802011', 'step': 20658, 'epoch': 2}
{'type': 'loss', 'content': 0.08377400785684586, 'timestamp': '2025-10-02 00:47:39.805011', 'step': 20659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:39.860258', 'step': 20659, 'epoch': 2}
{'type': 'loss', 'content': 0.09399092942476273, 'timestamp': '2025-10-02 00:47:39.866180', 'step': 20660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:39.922568', 'step': 20660, 'epoch': 2}
{'type': 'loss', 'content': 0.006277098320424557, 'timestamp': '2025-10-02 00:47:39.925136', 'step': 20661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:39.980852', 'step': 20661, 'epoch': 2}
{'type': 'loss', 'content': 0.03208519518375397, 'timestamp': '2025-10-02 00:47:39.983346', 'step': 20662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:40.038275', 'step': 20662, 'epoch': 2}
{'type': 'loss', 'content': 0.11966189742088318, 'timestamp': '2025-10-02 00:47:40.041053', 'step': 20663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:40.096246', 'step': 20663, 'epoch': 2}
{'type': 'loss', 'content': 0.018878867849707603, 'timestamp': '2025-10-02 00:47:40.102277', 'step': 20664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:40.157156', 'step': 20664, 'epoch': 2}
{'type': 'loss', 'content': 0.08762089908123016, 'timestamp': '2025-10-02 00:47:40.159612', 'step': 20665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:40.215062', 'step': 20665, 'epoch': 2}
{'type': 'loss', 'content': 0.026796823367476463, 'timestamp': '2025-10-02 00:47:40.224351', 'step': 20666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:40.279067', 'step': 20666, 'epoch': 2}
{'type': 'loss', 'content': 0.052380409091711044, 'timestamp': '2025-10-02 00:47:40.281674', 'step': 20667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:40.343946', 'step': 20667, 'epoch': 2}
{'type': 'loss', 'content': 0.024512561038136482, 'timestamp': '2025-10-02 00:47:40.355355', 'step': 20668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:40.409409', 'step': 20668, 'epoch': 2}
{'type': 'loss', 'content': 0.15569114685058594, 'timestamp': '2025-10-02 00:47:40.411888', 'step': 20669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:40.467365', 'step': 20669, 'epoch': 2}
{'type': 'loss', 'content': 0.0889132171869278, 'timestamp': '2025-10-02 00:47:40.469849', 'step': 20670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:40.525193', 'step': 20670, 'epoch': 2}
{'type': 'loss', 'content': 0.041972119361162186, 'timestamp': '2025-10-02 00:47:40.527515', 'step': 20671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:40.585353', 'step': 20671, 'epoch': 2}
{'type': 'loss', 'content': 0.07909228652715683, 'timestamp': '2025-10-02 00:47:40.591227', 'step': 20672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:40.647215', 'step': 20672, 'epoch': 2}
{'type': 'loss', 'content': 0.13707764446735382, 'timestamp': '2025-10-02 00:47:40.652763', 'step': 20673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:40.707316', 'step': 20673, 'epoch': 2}
{'type': 'loss', 'content': 0.02807551994919777, 'timestamp': '2025-10-02 00:47:40.714571', 'step': 20674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:40.769752', 'step': 20674, 'epoch': 2}
{'type': 'loss', 'content': 0.035612255334854126, 'timestamp': '2025-10-02 00:47:40.775391', 'step': 20675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:40.831248', 'step': 20675, 'epoch': 2}
{'type': 'loss', 'content': 0.022494610399007797, 'timestamp': '2025-10-02 00:47:40.839213', 'step': 20676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:40.893710', 'step': 20676, 'epoch': 2}
{'type': 'loss', 'content': 0.10473129153251648, 'timestamp': '2025-10-02 00:47:40.896424', 'step': 20677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:40.952342', 'step': 20677, 'epoch': 2}
{'type': 'loss', 'content': 0.03699440509080887, 'timestamp': '2025-10-02 00:47:40.955246', 'step': 20678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:41.010181', 'step': 20678, 'epoch': 2}
{'type': 'loss', 'content': 0.019831614568829536, 'timestamp': '2025-10-02 00:47:41.012752', 'step': 20679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:41.066756', 'step': 20679, 'epoch': 2}
{'type': 'loss', 'content': 0.10641103982925415, 'timestamp': '2025-10-02 00:47:41.073170', 'step': 20680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:41.128074', 'step': 20680, 'epoch': 2}
{'type': 'loss', 'content': 0.024191388860344887, 'timestamp': '2025-10-02 00:47:41.137345', 'step': 20681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:41.191934', 'step': 20681, 'epoch': 2}
{'type': 'loss', 'content': 0.0673414096236229, 'timestamp': '2025-10-02 00:47:41.194381', 'step': 20682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:41.249462', 'step': 20682, 'epoch': 2}
{'type': 'loss', 'content': 0.0066690766252577305, 'timestamp': '2025-10-02 00:47:41.252357', 'step': 20683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:41.306988', 'step': 20683, 'epoch': 2}
{'type': 'loss', 'content': 0.03255097195506096, 'timestamp': '2025-10-02 00:47:41.314049', 'step': 20684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:41.367824', 'step': 20684, 'epoch': 2}
{'type': 'loss', 'content': 0.07695624977350235, 'timestamp': '2025-10-02 00:47:41.378056', 'step': 20685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:41.433368', 'step': 20685, 'epoch': 2}
{'type': 'loss', 'content': 0.08041650801897049, 'timestamp': '2025-10-02 00:47:41.438996', 'step': 20686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:41.495223', 'step': 20686, 'epoch': 2}
{'type': 'loss', 'content': 0.0192462969571352, 'timestamp': '2025-10-02 00:47:41.497770', 'step': 20687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:41.557451', 'step': 20687, 'epoch': 2}
{'type': 'loss', 'content': 0.032102230936288834, 'timestamp': '2025-10-02 00:47:41.568430', 'step': 20688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:41.623250', 'step': 20688, 'epoch': 2}
{'type': 'loss', 'content': 0.13239362835884094, 'timestamp': '2025-10-02 00:47:41.625604', 'step': 20689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:41.684132', 'step': 20689, 'epoch': 2}
{'type': 'loss', 'content': 0.05696551501750946, 'timestamp': '2025-10-02 00:47:41.694271', 'step': 20690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:47:41.748678', 'step': 20690, 'epoch': 2}
{'type': 'loss', 'content': 0.08353212475776672, 'timestamp': '2025-10-02 00:47:41.751527', 'step': 20691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:41.806640', 'step': 20691, 'epoch': 2}
{'type': 'loss', 'content': 0.0660703107714653, 'timestamp': '2025-10-02 00:47:41.812472', 'step': 20692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:41.867672', 'step': 20692, 'epoch': 2}
{'type': 'loss', 'content': 0.04509144648909569, 'timestamp': '2025-10-02 00:47:41.869944', 'step': 20693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:41.924284', 'step': 20693, 'epoch': 2}
{'type': 'loss', 'content': 0.0817771703004837, 'timestamp': '2025-10-02 00:47:41.927110', 'step': 20694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:41.981812', 'step': 20694, 'epoch': 2}
{'type': 'loss', 'content': 0.04086527228355408, 'timestamp': '2025-10-02 00:47:41.984343', 'step': 20695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:42.039026', 'step': 20695, 'epoch': 2}
{'type': 'loss', 'content': 0.06841804832220078, 'timestamp': '2025-10-02 00:47:42.045119', 'step': 20696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:42.098770', 'step': 20696, 'epoch': 2}
{'type': 'loss', 'content': 0.09222032129764557, 'timestamp': '2025-10-02 00:47:42.101661', 'step': 20697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:42.156945', 'step': 20697, 'epoch': 2}
{'type': 'loss', 'content': 0.0947374776005745, 'timestamp': '2025-10-02 00:47:42.159504', 'step': 20698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:42.214760', 'step': 20698, 'epoch': 2}
{'type': 'loss', 'content': 0.01991376467049122, 'timestamp': '2025-10-02 00:47:42.217393', 'step': 20699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:42.273422', 'step': 20699, 'epoch': 2}
{'type': 'loss', 'content': 0.014804655686020851, 'timestamp': '2025-10-02 00:47:42.283319', 'step': 20700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:47:42.344557', 'step': 20700, 'epoch': 2}
{'type': 'loss', 'content': 0.029036154970526695, 'timestamp': '2025-10-02 00:47:42.356293', 'step': 20701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:42.412327', 'step': 20701, 'epoch': 2}
{'type': 'loss', 'content': 0.1223776564002037, 'timestamp': '2025-10-02 00:47:42.414491', 'step': 20702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:42.469434', 'step': 20702, 'epoch': 2}
{'type': 'loss', 'content': 0.05605965852737427, 'timestamp': '2025-10-02 00:47:42.471887', 'step': 20703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:47:42.525962', 'step': 20703, 'epoch': 2}
{'type': 'loss', 'content': 0.1191890686750412, 'timestamp': '2025-10-02 00:47:42.531950', 'step': 20704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:42.586369', 'step': 20704, 'epoch': 2}
{'type': 'loss', 'content': 0.06507870554924011, 'timestamp': '2025-10-02 00:47:42.588828', 'step': 20705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:42.649196', 'step': 20705, 'epoch': 2}
{'type': 'loss', 'content': 0.003638733411207795, 'timestamp': '2025-10-02 00:47:42.659371', 'step': 20706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:42.714988', 'step': 20706, 'epoch': 2}
{'type': 'loss', 'content': 0.03498274087905884, 'timestamp': '2025-10-02 00:47:42.717391', 'step': 20707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:42.778646', 'step': 20707, 'epoch': 2}
{'type': 'loss', 'content': 0.039773035794496536, 'timestamp': '2025-10-02 00:47:42.789793', 'step': 20708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:42.843802', 'step': 20708, 'epoch': 2}
{'type': 'loss', 'content': 0.10659433156251907, 'timestamp': '2025-10-02 00:47:42.847239', 'step': 20709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:42.905693', 'step': 20709, 'epoch': 2}
{'type': 'loss', 'content': 0.04631062597036362, 'timestamp': '2025-10-02 00:47:42.914999', 'step': 20710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:42.971518', 'step': 20710, 'epoch': 2}
{'type': 'loss', 'content': 0.0033444399014115334, 'timestamp': '2025-10-02 00:47:42.974366', 'step': 20711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:43.032131', 'step': 20711, 'epoch': 2}
{'type': 'loss', 'content': 0.15996122360229492, 'timestamp': '2025-10-02 00:47:43.039092', 'step': 20712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:43.094949', 'step': 20712, 'epoch': 2}
{'type': 'loss', 'content': 0.011537821032106876, 'timestamp': '2025-10-02 00:47:43.097331', 'step': 20713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:43.152089', 'step': 20713, 'epoch': 2}
{'type': 'loss', 'content': 0.0994785726070404, 'timestamp': '2025-10-02 00:47:43.155291', 'step': 20714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:43.212531', 'step': 20714, 'epoch': 2}
{'type': 'loss', 'content': 0.10026779770851135, 'timestamp': '2025-10-02 00:47:43.215088', 'step': 20715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:43.271141', 'step': 20715, 'epoch': 2}
{'type': 'loss', 'content': 0.025826478376984596, 'timestamp': '2025-10-02 00:47:43.277756', 'step': 20716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:43.339039', 'step': 20716, 'epoch': 2}
{'type': 'loss', 'content': 0.07252449542284012, 'timestamp': '2025-10-02 00:47:43.349951', 'step': 20717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:43.411452', 'step': 20717, 'epoch': 2}
{'type': 'loss', 'content': 0.04337453842163086, 'timestamp': '2025-10-02 00:47:43.421582', 'step': 20718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:43.480909', 'step': 20718, 'epoch': 2}
{'type': 'loss', 'content': 0.0658041462302208, 'timestamp': '2025-10-02 00:47:43.483689', 'step': 20719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:43.540551', 'step': 20719, 'epoch': 2}
{'type': 'loss', 'content': 0.06446413695812225, 'timestamp': '2025-10-02 00:47:43.547753', 'step': 20720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:47:43.623319', 'step': 20720, 'epoch': 2}
{'type': 'loss', 'content': 0.02333328127861023, 'timestamp': '2025-10-02 00:47:43.637981', 'step': 20721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:43.702173', 'step': 20721, 'epoch': 2}
{'type': 'loss', 'content': 0.006556084379553795, 'timestamp': '2025-10-02 00:47:43.712635', 'step': 20722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:43.770767', 'step': 20722, 'epoch': 2}
{'type': 'loss', 'content': 0.07518356293439865, 'timestamp': '2025-10-02 00:47:43.774432', 'step': 20723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:43.832440', 'step': 20723, 'epoch': 2}
{'type': 'loss', 'content': 0.12852416932582855, 'timestamp': '2025-10-02 00:47:43.839235', 'step': 20724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:43.897509', 'step': 20724, 'epoch': 2}
{'type': 'loss', 'content': 0.05488681048154831, 'timestamp': '2025-10-02 00:47:43.900724', 'step': 20725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:43.957143', 'step': 20725, 'epoch': 2}
{'type': 'loss', 'content': 0.07508506625890732, 'timestamp': '2025-10-02 00:47:43.960600', 'step': 20726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:44.018246', 'step': 20726, 'epoch': 2}
{'type': 'loss', 'content': 0.05167219787836075, 'timestamp': '2025-10-02 00:47:44.021760', 'step': 20727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:44.079232', 'step': 20727, 'epoch': 2}
{'type': 'loss', 'content': 0.09345120936632156, 'timestamp': '2025-10-02 00:47:44.085719', 'step': 20728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:44.142671', 'step': 20728, 'epoch': 2}
{'type': 'loss', 'content': 0.03968552500009537, 'timestamp': '2025-10-02 00:47:44.147639', 'step': 20729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:44.205375', 'step': 20729, 'epoch': 2}
{'type': 'loss', 'content': 0.043326061218976974, 'timestamp': '2025-10-02 00:47:44.211069', 'step': 20730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:44.268809', 'step': 20730, 'epoch': 2}
{'type': 'loss', 'content': 0.029109343886375427, 'timestamp': '2025-10-02 00:47:44.275948', 'step': 20731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:44.331630', 'step': 20731, 'epoch': 2}
{'type': 'loss', 'content': 0.11166463047266006, 'timestamp': '2025-10-02 00:47:44.338218', 'step': 20732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:44.394045', 'step': 20732, 'epoch': 2}
{'type': 'loss', 'content': 0.037012550979852676, 'timestamp': '2025-10-02 00:47:44.397122', 'step': 20733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:44.455605', 'step': 20733, 'epoch': 2}
{'type': 'loss', 'content': 0.08197731524705887, 'timestamp': '2025-10-02 00:47:44.458787', 'step': 20734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:44.514840', 'step': 20734, 'epoch': 2}
{'type': 'loss', 'content': 0.10490735620260239, 'timestamp': '2025-10-02 00:47:44.517195', 'step': 20735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:44.572108', 'step': 20735, 'epoch': 2}
{'type': 'loss', 'content': 0.06815192848443985, 'timestamp': '2025-10-02 00:47:44.578815', 'step': 20736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:44.633609', 'step': 20736, 'epoch': 2}
{'type': 'loss', 'content': 0.07155732810497284, 'timestamp': '2025-10-02 00:47:44.636122', 'step': 20737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:44.690434', 'step': 20737, 'epoch': 2}
{'type': 'loss', 'content': 0.032989710569381714, 'timestamp': '2025-10-02 00:47:44.693446', 'step': 20738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:44.747290', 'step': 20738, 'epoch': 2}
{'type': 'loss', 'content': 0.1335528939962387, 'timestamp': '2025-10-02 00:47:44.749674', 'step': 20739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:44.805170', 'step': 20739, 'epoch': 2}
{'type': 'loss', 'content': 0.07723368704319, 'timestamp': '2025-10-02 00:47:44.811351', 'step': 20740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:44.865222', 'step': 20740, 'epoch': 2}
{'type': 'loss', 'content': 0.0742664709687233, 'timestamp': '2025-10-02 00:47:44.869824', 'step': 20741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:44.925231', 'step': 20741, 'epoch': 2}
{'type': 'loss', 'content': 0.06531010568141937, 'timestamp': '2025-10-02 00:47:44.927596', 'step': 20742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:44.983045', 'step': 20742, 'epoch': 2}
{'type': 'loss', 'content': 0.008733601309359074, 'timestamp': '2025-10-02 00:47:44.992226', 'step': 20743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:45.047786', 'step': 20743, 'epoch': 2}
{'type': 'loss', 'content': 0.052591606974601746, 'timestamp': '2025-10-02 00:47:45.058097', 'step': 20744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:45.112987', 'step': 20744, 'epoch': 2}
{'type': 'loss', 'content': 0.036798130720853806, 'timestamp': '2025-10-02 00:47:45.118239', 'step': 20745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:45.172744', 'step': 20745, 'epoch': 2}
{'type': 'loss', 'content': 0.05291687697172165, 'timestamp': '2025-10-02 00:47:45.175639', 'step': 20746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:45.230672', 'step': 20746, 'epoch': 2}
{'type': 'loss', 'content': 0.027503909543156624, 'timestamp': '2025-10-02 00:47:45.239878', 'step': 20747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:45.295316', 'step': 20747, 'epoch': 2}
{'type': 'loss', 'content': 0.038750678300857544, 'timestamp': '2025-10-02 00:47:45.301693', 'step': 20748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:45.356291', 'step': 20748, 'epoch': 2}
{'type': 'loss', 'content': 0.055130209773778915, 'timestamp': '2025-10-02 00:47:45.361472', 'step': 20749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:45.423879', 'step': 20749, 'epoch': 2}
{'type': 'loss', 'content': 0.10563337802886963, 'timestamp': '2025-10-02 00:47:45.426425', 'step': 20750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:45.481377', 'step': 20750, 'epoch': 2}
{'type': 'loss', 'content': 0.1132553219795227, 'timestamp': '2025-10-02 00:47:45.483644', 'step': 20751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:45.539529', 'step': 20751, 'epoch': 2}
{'type': 'loss', 'content': 0.04927767068147659, 'timestamp': '2025-10-02 00:47:45.549794', 'step': 20752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:45.603711', 'step': 20752, 'epoch': 2}
{'type': 'loss', 'content': 0.09456523507833481, 'timestamp': '2025-10-02 00:47:45.606155', 'step': 20753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:45.665919', 'step': 20753, 'epoch': 2}
{'type': 'loss', 'content': 0.01840999349951744, 'timestamp': '2025-10-02 00:47:45.671242', 'step': 20754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:45.728188', 'step': 20754, 'epoch': 2}
{'type': 'loss', 'content': 0.0994396060705185, 'timestamp': '2025-10-02 00:47:45.732769', 'step': 20755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:47:45.787817', 'step': 20755, 'epoch': 2}
{'type': 'loss', 'content': 0.09549105167388916, 'timestamp': '2025-10-02 00:47:45.793933', 'step': 20756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:45.849417', 'step': 20756, 'epoch': 2}
{'type': 'loss', 'content': 0.015516464598476887, 'timestamp': '2025-10-02 00:47:45.851980', 'step': 20757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:45.913268', 'step': 20757, 'epoch': 2}
{'type': 'loss', 'content': 0.0251076128333807, 'timestamp': '2025-10-02 00:47:45.923725', 'step': 20758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:45.977983', 'step': 20758, 'epoch': 2}
{'type': 'loss', 'content': 0.08886649459600449, 'timestamp': '2025-10-02 00:47:45.980420', 'step': 20759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:46.035107', 'step': 20759, 'epoch': 2}
{'type': 'loss', 'content': 0.06508639454841614, 'timestamp': '2025-10-02 00:47:46.041469', 'step': 20760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:46.095008', 'step': 20760, 'epoch': 2}
{'type': 'loss', 'content': 0.12039410322904587, 'timestamp': '2025-10-02 00:47:46.097561', 'step': 20761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:47:46.174327', 'step': 20761, 'epoch': 2}
{'type': 'loss', 'content': 0.058739472180604935, 'timestamp': '2025-10-02 00:47:46.187737', 'step': 20762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:46.243311', 'step': 20762, 'epoch': 2}
{'type': 'loss', 'content': 0.1052415743470192, 'timestamp': '2025-10-02 00:47:46.245784', 'step': 20763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:46.300709', 'step': 20763, 'epoch': 2}
{'type': 'loss', 'content': 0.1475391685962677, 'timestamp': '2025-10-02 00:47:46.306713', 'step': 20764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:46.360838', 'step': 20764, 'epoch': 2}
{'type': 'loss', 'content': 0.14156164228916168, 'timestamp': '2025-10-02 00:47:46.363307', 'step': 20765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:46.417792', 'step': 20765, 'epoch': 2}
{'type': 'loss', 'content': 0.05805150046944618, 'timestamp': '2025-10-02 00:47:46.424953', 'step': 20766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:46.480352', 'step': 20766, 'epoch': 2}
{'type': 'loss', 'content': 0.08430055528879166, 'timestamp': '2025-10-02 00:47:46.482850', 'step': 20767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:46.538544', 'step': 20767, 'epoch': 2}
{'type': 'loss', 'content': 0.100505031645298, 'timestamp': '2025-10-02 00:47:46.544419', 'step': 20768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:46.599356', 'step': 20768, 'epoch': 2}
{'type': 'loss', 'content': 0.0172564759850502, 'timestamp': '2025-10-02 00:47:46.601916', 'step': 20769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:46.656438', 'step': 20769, 'epoch': 2}
{'type': 'loss', 'content': 0.06691304594278336, 'timestamp': '2025-10-02 00:47:46.665666', 'step': 20770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:46.720850', 'step': 20770, 'epoch': 2}
{'type': 'loss', 'content': 0.10410303622484207, 'timestamp': '2025-10-02 00:47:46.723386', 'step': 20771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:47:46.790634', 'step': 20771, 'epoch': 2}
{'type': 'loss', 'content': 0.040072619915008545, 'timestamp': '2025-10-02 00:47:46.803329', 'step': 20772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:46.858355', 'step': 20772, 'epoch': 2}
{'type': 'loss', 'content': 0.08093539625406265, 'timestamp': '2025-10-02 00:47:46.860815', 'step': 20773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:46.925290', 'step': 20773, 'epoch': 2}
{'type': 'loss', 'content': 0.009720880538225174, 'timestamp': '2025-10-02 00:47:46.935876', 'step': 20774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:46.990447', 'step': 20774, 'epoch': 2}
{'type': 'loss', 'content': 0.14273425936698914, 'timestamp': '2025-10-02 00:47:46.993312', 'step': 20775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:47.047636', 'step': 20775, 'epoch': 2}
{'type': 'loss', 'content': 0.06954986602067947, 'timestamp': '2025-10-02 00:47:47.053739', 'step': 20776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:47.108743', 'step': 20776, 'epoch': 2}
{'type': 'loss', 'content': 0.013890635222196579, 'timestamp': '2025-10-02 00:47:47.118959', 'step': 20777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:47.174328', 'step': 20777, 'epoch': 2}
{'type': 'loss', 'content': 0.09123210608959198, 'timestamp': '2025-10-02 00:47:47.176713', 'step': 20778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:47:47.231126', 'step': 20778, 'epoch': 2}
{'type': 'loss', 'content': 0.028989875689148903, 'timestamp': '2025-10-02 00:47:47.233726', 'step': 20779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:47.293114', 'step': 20779, 'epoch': 2}
{'type': 'loss', 'content': 0.014766717329621315, 'timestamp': '2025-10-02 00:47:47.304047', 'step': 20780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:47.358572', 'step': 20780, 'epoch': 2}
{'type': 'loss', 'content': 0.0331554114818573, 'timestamp': '2025-10-02 00:47:47.363572', 'step': 20781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:47.418166', 'step': 20781, 'epoch': 2}
{'type': 'loss', 'content': 0.04406087100505829, 'timestamp': '2025-10-02 00:47:47.420435', 'step': 20782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:47.476763', 'step': 20782, 'epoch': 2}
{'type': 'loss', 'content': 0.05153420940041542, 'timestamp': '2025-10-02 00:47:47.479405', 'step': 20783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:47.536734', 'step': 20783, 'epoch': 2}
{'type': 'loss', 'content': 0.010233299806714058, 'timestamp': '2025-10-02 00:47:47.544551', 'step': 20784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:47.599125', 'step': 20784, 'epoch': 2}
{'type': 'loss', 'content': 0.07009928673505783, 'timestamp': '2025-10-02 00:47:47.606199', 'step': 20785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:47.662280', 'step': 20785, 'epoch': 2}
{'type': 'loss', 'content': 0.010519159957766533, 'timestamp': '2025-10-02 00:47:47.664877', 'step': 20786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:47.721397', 'step': 20786, 'epoch': 2}
{'type': 'loss', 'content': 0.0656140074133873, 'timestamp': '2025-10-02 00:47:47.730851', 'step': 20787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:47.785115', 'step': 20787, 'epoch': 2}
{'type': 'loss', 'content': 0.17784909904003143, 'timestamp': '2025-10-02 00:47:47.791139', 'step': 20788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:47.846050', 'step': 20788, 'epoch': 2}
{'type': 'loss', 'content': 0.12132672220468521, 'timestamp': '2025-10-02 00:47:47.848854', 'step': 20789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:47.904028', 'step': 20789, 'epoch': 2}
{'type': 'loss', 'content': 0.0501730851829052, 'timestamp': '2025-10-02 00:47:47.906498', 'step': 20790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:47.961336', 'step': 20790, 'epoch': 2}
{'type': 'loss', 'content': 0.049097951501607895, 'timestamp': '2025-10-02 00:47:47.966952', 'step': 20791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:48.021536', 'step': 20791, 'epoch': 2}
{'type': 'loss', 'content': 0.09559953957796097, 'timestamp': '2025-10-02 00:47:48.028010', 'step': 20792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:48.082185', 'step': 20792, 'epoch': 2}
{'type': 'loss', 'content': 0.08326007425785065, 'timestamp': '2025-10-02 00:47:48.084619', 'step': 20793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:48.139386', 'step': 20793, 'epoch': 2}
{'type': 'loss', 'content': 0.04701562970876694, 'timestamp': '2025-10-02 00:47:48.142183', 'step': 20794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:48.197272', 'step': 20794, 'epoch': 2}
{'type': 'loss', 'content': 0.023014096543192863, 'timestamp': '2025-10-02 00:47:48.200106', 'step': 20795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:48.256069', 'step': 20795, 'epoch': 2}
{'type': 'loss', 'content': 0.19992657005786896, 'timestamp': '2025-10-02 00:47:48.262257', 'step': 20796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:48.315427', 'step': 20796, 'epoch': 2}
{'type': 'loss', 'content': 0.09431378543376923, 'timestamp': '2025-10-02 00:47:48.318977', 'step': 20797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:48.374011', 'step': 20797, 'epoch': 2}
{'type': 'loss', 'content': 0.011070799082517624, 'timestamp': '2025-10-02 00:47:48.377024', 'step': 20798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:48.433813', 'step': 20798, 'epoch': 2}
{'type': 'loss', 'content': 0.02581164613366127, 'timestamp': '2025-10-02 00:47:48.436346', 'step': 20799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:48.492744', 'step': 20799, 'epoch': 2}
{'type': 'loss', 'content': 0.034565817564725876, 'timestamp': '2025-10-02 00:47:48.499434', 'step': 20800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:48.554284', 'step': 20800, 'epoch': 2}
{'type': 'loss', 'content': 0.06751323491334915, 'timestamp': '2025-10-02 00:47:48.556687', 'step': 20801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:48.611776', 'step': 20801, 'epoch': 2}
{'type': 'loss', 'content': 0.07279828935861588, 'timestamp': '2025-10-02 00:47:48.614126', 'step': 20802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:48.668629', 'step': 20802, 'epoch': 2}
{'type': 'loss', 'content': 0.08198245614767075, 'timestamp': '2025-10-02 00:47:48.671211', 'step': 20803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:48.726436', 'step': 20803, 'epoch': 2}
{'type': 'loss', 'content': 0.026915699243545532, 'timestamp': '2025-10-02 00:47:48.732918', 'step': 20804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:48.787605', 'step': 20804, 'epoch': 2}
{'type': 'loss', 'content': 0.061855245381593704, 'timestamp': '2025-10-02 00:47:48.794918', 'step': 20805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:48.854137', 'step': 20805, 'epoch': 2}
{'type': 'loss', 'content': 0.03196626901626587, 'timestamp': '2025-10-02 00:47:48.864316', 'step': 20806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:48.919521', 'step': 20806, 'epoch': 2}
{'type': 'loss', 'content': 0.03761064633727074, 'timestamp': '2025-10-02 00:47:48.925275', 'step': 20807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:48.982180', 'step': 20807, 'epoch': 2}
{'type': 'loss', 'content': 0.029198018833994865, 'timestamp': '2025-10-02 00:47:48.990708', 'step': 20808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:49.045288', 'step': 20808, 'epoch': 2}
{'type': 'loss', 'content': 0.10233551263809204, 'timestamp': '2025-10-02 00:47:49.047926', 'step': 20809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:49.110053', 'step': 20809, 'epoch': 2}
{'type': 'loss', 'content': 0.007513567805290222, 'timestamp': '2025-10-02 00:47:49.120456', 'step': 20810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:49.177130', 'step': 20810, 'epoch': 2}
{'type': 'loss', 'content': 0.02143056131899357, 'timestamp': '2025-10-02 00:47:49.184225', 'step': 20811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:49.239408', 'step': 20811, 'epoch': 2}
{'type': 'loss', 'content': 0.1440175622701645, 'timestamp': '2025-10-02 00:47:49.245864', 'step': 20812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:49.301847', 'step': 20812, 'epoch': 2}
{'type': 'loss', 'content': 0.0013180269161239266, 'timestamp': '2025-10-02 00:47:49.304450', 'step': 20813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:49.360526', 'step': 20813, 'epoch': 2}
{'type': 'loss', 'content': 0.050067540258169174, 'timestamp': '2025-10-02 00:47:49.365041', 'step': 20814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:49.420223', 'step': 20814, 'epoch': 2}
{'type': 'loss', 'content': 0.05795905366539955, 'timestamp': '2025-10-02 00:47:49.427443', 'step': 20815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:49.482684', 'step': 20815, 'epoch': 2}
{'type': 'loss', 'content': 0.04172667860984802, 'timestamp': '2025-10-02 00:47:49.489031', 'step': 20816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:49.543021', 'step': 20816, 'epoch': 2}
{'type': 'loss', 'content': 0.15129335224628448, 'timestamp': '2025-10-02 00:47:49.546085', 'step': 20817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:47:49.602328', 'step': 20817, 'epoch': 2}
{'type': 'loss', 'content': 0.028068989515304565, 'timestamp': '2025-10-02 00:47:49.605006', 'step': 20818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:49.660081', 'step': 20818, 'epoch': 2}
{'type': 'loss', 'content': 0.06793170422315598, 'timestamp': '2025-10-02 00:47:49.662849', 'step': 20819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:49.718038', 'step': 20819, 'epoch': 2}
{'type': 'loss', 'content': 0.07437417656183243, 'timestamp': '2025-10-02 00:47:49.728114', 'step': 20820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:49.782546', 'step': 20820, 'epoch': 2}
{'type': 'loss', 'content': 0.021368082612752914, 'timestamp': '2025-10-02 00:47:49.785250', 'step': 20821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:49.840789', 'step': 20821, 'epoch': 2}
{'type': 'loss', 'content': 0.02784746140241623, 'timestamp': '2025-10-02 00:47:49.850299', 'step': 20822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:49.906321', 'step': 20822, 'epoch': 2}
{'type': 'loss', 'content': 0.051765076816082, 'timestamp': '2025-10-02 00:47:49.911905', 'step': 20823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:47:49.967812', 'step': 20823, 'epoch': 2}
{'type': 'loss', 'content': 0.08856518566608429, 'timestamp': '2025-10-02 00:47:49.973868', 'step': 20824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:50.028311', 'step': 20824, 'epoch': 2}
{'type': 'loss', 'content': 0.09980656206607819, 'timestamp': '2025-10-02 00:47:50.030800', 'step': 20825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:50.085964', 'step': 20825, 'epoch': 2}
{'type': 'loss', 'content': 0.1291896402835846, 'timestamp': '2025-10-02 00:47:50.088574', 'step': 20826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:50.144074', 'step': 20826, 'epoch': 2}
{'type': 'loss', 'content': 0.03796277940273285, 'timestamp': '2025-10-02 00:47:50.153556', 'step': 20827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:50.207986', 'step': 20827, 'epoch': 2}
{'type': 'loss', 'content': 0.08091022074222565, 'timestamp': '2025-10-02 00:47:50.214147', 'step': 20828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:50.268510', 'step': 20828, 'epoch': 2}
{'type': 'loss', 'content': 0.10879562050104141, 'timestamp': '2025-10-02 00:47:50.271127', 'step': 20829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:50.325946', 'step': 20829, 'epoch': 2}
{'type': 'loss', 'content': 0.0634615570306778, 'timestamp': '2025-10-02 00:47:50.328345', 'step': 20830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:50.384536', 'step': 20830, 'epoch': 2}
{'type': 'loss', 'content': 0.046641670167446136, 'timestamp': '2025-10-02 00:47:50.388576', 'step': 20831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:50.444292', 'step': 20831, 'epoch': 2}
{'type': 'loss', 'content': 0.060951948165893555, 'timestamp': '2025-10-02 00:47:50.451268', 'step': 20832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:47:50.517543', 'step': 20832, 'epoch': 2}
{'type': 'loss', 'content': 0.022059138864278793, 'timestamp': '2025-10-02 00:47:50.530434', 'step': 20833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:50.586394', 'step': 20833, 'epoch': 2}
{'type': 'loss', 'content': 0.03234325349330902, 'timestamp': '2025-10-02 00:47:50.595874', 'step': 20834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:50.651043', 'step': 20834, 'epoch': 2}
{'type': 'loss', 'content': 0.07263951748609543, 'timestamp': '2025-10-02 00:47:50.653754', 'step': 20835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:50.707881', 'step': 20835, 'epoch': 2}
{'type': 'loss', 'content': 0.1762305647134781, 'timestamp': '2025-10-02 00:47:50.714963', 'step': 20836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:50.769039', 'step': 20836, 'epoch': 2}
{'type': 'loss', 'content': 0.07365946471691132, 'timestamp': '2025-10-02 00:47:50.771646', 'step': 20837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:50.827264', 'step': 20837, 'epoch': 2}
{'type': 'loss', 'content': 0.10719989240169525, 'timestamp': '2025-10-02 00:47:50.829691', 'step': 20838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:50.886282', 'step': 20838, 'epoch': 2}
{'type': 'loss', 'content': 0.03164786845445633, 'timestamp': '2025-10-02 00:47:50.889257', 'step': 20839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:50.944732', 'step': 20839, 'epoch': 2}
{'type': 'loss', 'content': 0.07067633420228958, 'timestamp': '2025-10-02 00:47:50.951181', 'step': 20840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:51.006103', 'step': 20840, 'epoch': 2}
{'type': 'loss', 'content': 0.0031428858637809753, 'timestamp': '2025-10-02 00:47:51.013433', 'step': 20841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:51.068934', 'step': 20841, 'epoch': 2}
{'type': 'loss', 'content': 0.06884746998548508, 'timestamp': '2025-10-02 00:47:51.071522', 'step': 20842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:51.127659', 'step': 20842, 'epoch': 2}
{'type': 'loss', 'content': 0.12283666431903839, 'timestamp': '2025-10-02 00:47:51.130301', 'step': 20843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:51.191516', 'step': 20843, 'epoch': 2}
{'type': 'loss', 'content': 0.020780367776751518, 'timestamp': '2025-10-02 00:47:51.202689', 'step': 20844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:51.256741', 'step': 20844, 'epoch': 2}
{'type': 'loss', 'content': 0.12701481580734253, 'timestamp': '2025-10-02 00:47:51.259245', 'step': 20845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:51.314697', 'step': 20845, 'epoch': 2}
{'type': 'loss', 'content': 0.028430750593543053, 'timestamp': '2025-10-02 00:47:51.324165', 'step': 20846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:51.379527', 'step': 20846, 'epoch': 2}
{'type': 'loss', 'content': 0.011731477454304695, 'timestamp': '2025-10-02 00:47:51.382043', 'step': 20847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:51.437521', 'step': 20847, 'epoch': 2}
{'type': 'loss', 'content': 0.02330964431166649, 'timestamp': '2025-10-02 00:47:51.444156', 'step': 20848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:51.498595', 'step': 20848, 'epoch': 2}
{'type': 'loss', 'content': 0.03230925649404526, 'timestamp': '2025-10-02 00:47:51.501041', 'step': 20849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:51.555184', 'step': 20849, 'epoch': 2}
{'type': 'loss', 'content': 0.09130126982927322, 'timestamp': '2025-10-02 00:47:51.558043', 'step': 20850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:47:51.612652', 'step': 20850, 'epoch': 2}
{'type': 'loss', 'content': 0.08033010363578796, 'timestamp': '2025-10-02 00:47:51.615243', 'step': 20851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:51.670754', 'step': 20851, 'epoch': 2}
{'type': 'loss', 'content': 0.03847341611981392, 'timestamp': '2025-10-02 00:47:51.681057', 'step': 20852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:47:51.735498', 'step': 20852, 'epoch': 2}
{'type': 'loss', 'content': 0.10228990018367767, 'timestamp': '2025-10-02 00:47:51.738283', 'step': 20853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:47:51.793048', 'step': 20853, 'epoch': 2}
{'type': 'loss', 'content': 0.04120196774601936, 'timestamp': '2025-10-02 00:47:51.795481', 'step': 20854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:51.850524', 'step': 20854, 'epoch': 2}
{'type': 'loss', 'content': 0.014775604009628296, 'timestamp': '2025-10-02 00:47:51.857724', 'step': 20855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:47:51.912398', 'step': 20855, 'epoch': 2}
{'type': 'loss', 'content': 0.08533692359924316, 'timestamp': '2025-10-02 00:47:51.920579', 'step': 20856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:47:51.981317', 'step': 20856, 'epoch': 2}
{'type': 'loss', 'content': 0.026939135044813156, 'timestamp': '2025-10-02 00:47:51.992591', 'step': 20857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:52.047821', 'step': 20857, 'epoch': 2}
{'type': 'loss', 'content': 0.03189055621623993, 'timestamp': '2025-10-02 00:47:52.050734', 'step': 20858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:52.107245', 'step': 20858, 'epoch': 2}
{'type': 'loss', 'content': 0.007891399785876274, 'timestamp': '2025-10-02 00:47:52.112752', 'step': 20859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:47:52.171335', 'step': 20859, 'epoch': 2}
{'type': 'loss', 'content': 0.04075094312429428, 'timestamp': '2025-10-02 00:47:52.178480', 'step': 20860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:47:52.235688', 'step': 20860, 'epoch': 2}
{'type': 'loss', 'content': 0.014595712535083294, 'timestamp': '2025-10-02 00:47:52.245956', 'step': 20861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:52.303449', 'step': 20861, 'epoch': 2}
{'type': 'loss', 'content': 0.21584966778755188, 'timestamp': '2025-10-02 00:47:52.306835', 'step': 20862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:52.364314', 'step': 20862, 'epoch': 2}
{'type': 'loss', 'content': 0.09713876992464066, 'timestamp': '2025-10-02 00:47:52.367838', 'step': 20863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:47:52.424995', 'step': 20863, 'epoch': 2}
{'type': 'loss', 'content': 0.09467845410108566, 'timestamp': '2025-10-02 00:47:52.431660', 'step': 20864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:52.488342', 'step': 20864, 'epoch': 2}
{'type': 'loss', 'content': 0.09911244362592697, 'timestamp': '2025-10-02 00:47:52.493896', 'step': 20865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:52.551769', 'step': 20865, 'epoch': 2}
{'type': 'loss', 'content': 0.11173126101493835, 'timestamp': '2025-10-02 00:47:52.554429', 'step': 20866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:52.611731', 'step': 20866, 'epoch': 2}
{'type': 'loss', 'content': 0.01443789154291153, 'timestamp': '2025-10-02 00:47:52.615106', 'step': 20867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:52.672988', 'step': 20867, 'epoch': 2}
{'type': 'loss', 'content': 0.11986992508172989, 'timestamp': '2025-10-02 00:47:52.679175', 'step': 20868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:47:52.735666', 'step': 20868, 'epoch': 2}
{'type': 'loss', 'content': 0.13271766901016235, 'timestamp': '2025-10-02 00:47:52.738606', 'step': 20869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:52.795519', 'step': 20869, 'epoch': 2}
{'type': 'loss', 'content': 0.08740203082561493, 'timestamp': '2025-10-02 00:47:52.798333', 'step': 20870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:47:52.854218', 'step': 20870, 'epoch': 2}
{'type': 'loss', 'content': 0.21630357205867767, 'timestamp': '2025-10-02 00:47:52.859788', 'step': 20871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:47:52.916524', 'step': 20871, 'epoch': 2}
{'type': 'loss', 'content': 0.009660555981099606, 'timestamp': '2025-10-02 00:47:52.924574', 'step': 20872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:47:52.981165', 'step': 20872, 'epoch': 2}
{'type': 'loss', 'content': 0.047064509242773056, 'timestamp': '2025-10-02 00:47:52.990221', 'step': 20873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:47:53.059194', 'step': 20873, 'epoch': 2}
{'type': 'loss', 'content': 0.011435607448220253, 'timestamp': '2025-10-02 00:47:53.071170', 'step': 20874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:53.133400', 'step': 20874, 'epoch': 2}
{'type': 'loss', 'content': 0.030647555366158485, 'timestamp': '2025-10-02 00:47:53.143575', 'step': 20875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:47:53.203788', 'step': 20875, 'epoch': 2}
{'type': 'loss', 'content': 0.007796478923410177, 'timestamp': '2025-10-02 00:47:53.214754', 'step': 20876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:47:53.271959', 'step': 20876, 'epoch': 2}
{'type': 'loss', 'content': 0.059851717203855515, 'timestamp': '2025-10-02 00:47:53.274566', 'step': 20877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:47:53.330529', 'step': 20877, 'epoch': 2}
{'type': 'loss', 'content': 0.08342093974351883, 'timestamp': '2025-10-02 00:47:53.336316', 'step': 20878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:47:53.406882', 'step': 20878, 'epoch': 2}
{'type': 'loss', 'content': 0.044342413544654846, 'timestamp': '2025-10-02 00:47:53.418791', 'step': 20879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:47:53.483371', 'step': 20879, 'epoch': 2}
{'type': 'loss', 'content': 0.005892318673431873, 'timestamp': '2025-10-02 00:47:53.494805', 'step': 20880, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:48:20.798870', 'step': 20880, 'epoch': 2}
{'type': 'pplx', 'content': 101.07848275797457, 'timestamp': '2025-10-02 00:48:20.817556', 'step': 20880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:20.907439', 'step': 20880, 'epoch': 2}
{'type': 'loss', 'content': 0.08257459104061127, 'timestamp': '2025-10-02 00:48:20.926655', 'step': 20881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:21.036526', 'step': 20881, 'epoch': 2}
{'type': 'loss', 'content': 0.1605425477027893, 'timestamp': '2025-10-02 00:48:21.041251', 'step': 20882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:21.128117', 'step': 20882, 'epoch': 2}
{'type': 'loss', 'content': 0.07210902869701385, 'timestamp': '2025-10-02 00:48:21.134511', 'step': 20883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:21.222153', 'step': 20883, 'epoch': 2}
{'type': 'loss', 'content': 0.021145105361938477, 'timestamp': '2025-10-02 00:48:21.231664', 'step': 20884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:21.291770', 'step': 20884, 'epoch': 2}
{'type': 'loss', 'content': 0.12513777613639832, 'timestamp': '2025-10-02 00:48:21.295609', 'step': 20885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:21.389075', 'step': 20885, 'epoch': 2}
{'type': 'loss', 'content': 0.023055193945765495, 'timestamp': '2025-10-02 00:48:21.393106', 'step': 20886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:21.452703', 'step': 20886, 'epoch': 2}
{'type': 'loss', 'content': 0.03296012803912163, 'timestamp': '2025-10-02 00:48:21.467256', 'step': 20887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:21.536851', 'step': 20887, 'epoch': 2}
{'type': 'loss', 'content': 0.0675974041223526, 'timestamp': '2025-10-02 00:48:21.554806', 'step': 20888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:21.625668', 'step': 20888, 'epoch': 2}
{'type': 'loss', 'content': 0.019302070140838623, 'timestamp': '2025-10-02 00:48:21.631338', 'step': 20889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:21.704853', 'step': 20889, 'epoch': 2}
{'type': 'loss', 'content': 0.07283904403448105, 'timestamp': '2025-10-02 00:48:21.718396', 'step': 20890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:21.787898', 'step': 20890, 'epoch': 2}
{'type': 'loss', 'content': 0.008443384431302547, 'timestamp': '2025-10-02 00:48:21.796933', 'step': 20891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:21.879288', 'step': 20891, 'epoch': 2}
{'type': 'loss', 'content': 0.04246707260608673, 'timestamp': '2025-10-02 00:48:21.889310', 'step': 20892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:21.986323', 'step': 20892, 'epoch': 2}
{'type': 'loss', 'content': 0.005243998020887375, 'timestamp': '2025-10-02 00:48:21.993564', 'step': 20893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:22.080745', 'step': 20893, 'epoch': 2}
{'type': 'loss', 'content': 0.10776066035032272, 'timestamp': '2025-10-02 00:48:22.085268', 'step': 20894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:48:22.183257', 'step': 20894, 'epoch': 2}
{'type': 'loss', 'content': 0.028064057230949402, 'timestamp': '2025-10-02 00:48:22.193819', 'step': 20895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:22.256260', 'step': 20895, 'epoch': 2}
{'type': 'loss', 'content': 0.08055199682712555, 'timestamp': '2025-10-02 00:48:22.280631', 'step': 20896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:22.341403', 'step': 20896, 'epoch': 2}
{'type': 'loss', 'content': 0.06059989333152771, 'timestamp': '2025-10-02 00:48:22.362580', 'step': 20897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:22.445233', 'step': 20897, 'epoch': 2}
{'type': 'loss', 'content': 0.007920484989881516, 'timestamp': '2025-10-02 00:48:22.455424', 'step': 20898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:22.517811', 'step': 20898, 'epoch': 2}
{'type': 'loss', 'content': 0.045692045241594315, 'timestamp': '2025-10-02 00:48:22.522299', 'step': 20899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:22.611383', 'step': 20899, 'epoch': 2}
{'type': 'loss', 'content': 0.06017225980758667, 'timestamp': '2025-10-02 00:48:22.618388', 'step': 20900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:22.709288', 'step': 20900, 'epoch': 2}
{'type': 'loss', 'content': 0.04870440810918808, 'timestamp': '2025-10-02 00:48:22.712954', 'step': 20901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:22.792810', 'step': 20901, 'epoch': 2}
{'type': 'loss', 'content': 0.00797518715262413, 'timestamp': '2025-10-02 00:48:22.803301', 'step': 20902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:48:22.879319', 'step': 20902, 'epoch': 2}
{'type': 'loss', 'content': 0.008214696310460567, 'timestamp': '2025-10-02 00:48:22.891730', 'step': 20903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:22.962660', 'step': 20903, 'epoch': 2}
{'type': 'loss', 'content': 0.010887467302381992, 'timestamp': '2025-10-02 00:48:22.969710', 'step': 20904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:23.040600', 'step': 20904, 'epoch': 2}
{'type': 'loss', 'content': 0.05333152040839195, 'timestamp': '2025-10-02 00:48:23.044350', 'step': 20905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:23.110363', 'step': 20905, 'epoch': 2}
{'type': 'loss', 'content': 0.08669872581958771, 'timestamp': '2025-10-02 00:48:23.113416', 'step': 20906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:23.180615', 'step': 20906, 'epoch': 2}
{'type': 'loss', 'content': 0.14916068315505981, 'timestamp': '2025-10-02 00:48:23.184973', 'step': 20907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:23.255021', 'step': 20907, 'epoch': 2}
{'type': 'loss', 'content': 0.07830698788166046, 'timestamp': '2025-10-02 00:48:23.262817', 'step': 20908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:23.329656', 'step': 20908, 'epoch': 2}
{'type': 'loss', 'content': 0.022337861359119415, 'timestamp': '2025-10-02 00:48:23.334633', 'step': 20909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:48:23.400419', 'step': 20909, 'epoch': 2}
{'type': 'loss', 'content': 0.0178518146276474, 'timestamp': '2025-10-02 00:48:23.411130', 'step': 20910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:23.477456', 'step': 20910, 'epoch': 2}
{'type': 'loss', 'content': 0.05319724231958389, 'timestamp': '2025-10-02 00:48:23.487908', 'step': 20911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:23.546842', 'step': 20911, 'epoch': 2}
{'type': 'loss', 'content': 0.029148399829864502, 'timestamp': '2025-10-02 00:48:23.553764', 'step': 20912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:23.615477', 'step': 20912, 'epoch': 2}
{'type': 'loss', 'content': 0.0665757954120636, 'timestamp': '2025-10-02 00:48:23.626423', 'step': 20913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:23.682844', 'step': 20913, 'epoch': 2}
{'type': 'loss', 'content': 0.0986572653055191, 'timestamp': '2025-10-02 00:48:23.688821', 'step': 20914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:23.753950', 'step': 20914, 'epoch': 2}
{'type': 'loss', 'content': 0.07385608553886414, 'timestamp': '2025-10-02 00:48:23.761757', 'step': 20915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:23.834409', 'step': 20915, 'epoch': 2}
{'type': 'loss', 'content': 0.011843129992485046, 'timestamp': '2025-10-02 00:48:23.841168', 'step': 20916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:23.899489', 'step': 20916, 'epoch': 2}
{'type': 'loss', 'content': 0.051712144166231155, 'timestamp': '2025-10-02 00:48:23.909781', 'step': 20917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:23.978266', 'step': 20917, 'epoch': 2}
{'type': 'loss', 'content': 0.017621858045458794, 'timestamp': '2025-10-02 00:48:23.982463', 'step': 20918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:24.054740', 'step': 20918, 'epoch': 2}
{'type': 'loss', 'content': 0.05498190596699715, 'timestamp': '2025-10-02 00:48:24.058235', 'step': 20919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:24.116244', 'step': 20919, 'epoch': 2}
{'type': 'loss', 'content': 0.021070901304483414, 'timestamp': '2025-10-02 00:48:24.123038', 'step': 20920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:24.180663', 'step': 20920, 'epoch': 2}
{'type': 'loss', 'content': 0.05531930923461914, 'timestamp': '2025-10-02 00:48:24.183915', 'step': 20921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:24.258093', 'step': 20921, 'epoch': 2}
{'type': 'loss', 'content': 0.065451979637146, 'timestamp': '2025-10-02 00:48:24.262348', 'step': 20922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:24.319822', 'step': 20922, 'epoch': 2}
{'type': 'loss', 'content': 0.08832460641860962, 'timestamp': '2025-10-02 00:48:24.325889', 'step': 20923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:24.393875', 'step': 20923, 'epoch': 2}
{'type': 'loss', 'content': 0.03969525173306465, 'timestamp': '2025-10-02 00:48:24.401360', 'step': 20924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:24.460500', 'step': 20924, 'epoch': 2}
{'type': 'loss', 'content': 0.06969095766544342, 'timestamp': '2025-10-02 00:48:24.464614', 'step': 20925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:24.524490', 'step': 20925, 'epoch': 2}
{'type': 'loss', 'content': 0.16036668419837952, 'timestamp': '2025-10-02 00:48:24.532153', 'step': 20926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:24.589489', 'step': 20926, 'epoch': 2}
{'type': 'loss', 'content': 0.03756583109498024, 'timestamp': '2025-10-02 00:48:24.606374', 'step': 20927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:24.676032', 'step': 20927, 'epoch': 2}
{'type': 'loss', 'content': 0.08195915073156357, 'timestamp': '2025-10-02 00:48:24.683908', 'step': 20928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:24.740131', 'step': 20928, 'epoch': 2}
{'type': 'loss', 'content': 0.09567657113075256, 'timestamp': '2025-10-02 00:48:24.744214', 'step': 20929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:24.803879', 'step': 20929, 'epoch': 2}
{'type': 'loss', 'content': 0.022480275481939316, 'timestamp': '2025-10-02 00:48:24.807463', 'step': 20930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:24.868070', 'step': 20930, 'epoch': 2}
{'type': 'loss', 'content': 0.028812000527977943, 'timestamp': '2025-10-02 00:48:24.872634', 'step': 20931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:24.941902', 'step': 20931, 'epoch': 2}
{'type': 'loss', 'content': 0.05128104239702225, 'timestamp': '2025-10-02 00:48:24.949479', 'step': 20932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:25.010092', 'step': 20932, 'epoch': 2}
{'type': 'loss', 'content': 0.04506383836269379, 'timestamp': '2025-10-02 00:48:25.022701', 'step': 20933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:25.101756', 'step': 20933, 'epoch': 2}
{'type': 'loss', 'content': 0.02478291094303131, 'timestamp': '2025-10-02 00:48:25.105727', 'step': 20934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:25.171152', 'step': 20934, 'epoch': 2}
{'type': 'loss', 'content': 0.14354845881462097, 'timestamp': '2025-10-02 00:48:25.182325', 'step': 20935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:25.241205', 'step': 20935, 'epoch': 2}
{'type': 'loss', 'content': 0.049551088362932205, 'timestamp': '2025-10-02 00:48:25.248183', 'step': 20936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:25.307767', 'step': 20936, 'epoch': 2}
{'type': 'loss', 'content': 0.0010160168167203665, 'timestamp': '2025-10-02 00:48:25.318057', 'step': 20937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:25.376226', 'step': 20937, 'epoch': 2}
{'type': 'loss', 'content': 0.051049310714006424, 'timestamp': '2025-10-02 00:48:25.380313', 'step': 20938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:25.439185', 'step': 20938, 'epoch': 2}
{'type': 'loss', 'content': 0.10323645174503326, 'timestamp': '2025-10-02 00:48:25.444347', 'step': 20939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:25.516477', 'step': 20939, 'epoch': 2}
{'type': 'loss', 'content': 0.05140674486756325, 'timestamp': '2025-10-02 00:48:25.523916', 'step': 20940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:25.584080', 'step': 20940, 'epoch': 2}
{'type': 'loss', 'content': 0.08734790980815887, 'timestamp': '2025-10-02 00:48:25.588432', 'step': 20941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:25.669274', 'step': 20941, 'epoch': 2}
{'type': 'loss', 'content': 0.030909797176718712, 'timestamp': '2025-10-02 00:48:25.682397', 'step': 20942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:25.744486', 'step': 20942, 'epoch': 2}
{'type': 'loss', 'content': 0.02894129417836666, 'timestamp': '2025-10-02 00:48:25.754684', 'step': 20943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:25.815638', 'step': 20943, 'epoch': 2}
{'type': 'loss', 'content': 0.0698535293340683, 'timestamp': '2025-10-02 00:48:25.825719', 'step': 20944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:48:25.900246', 'step': 20944, 'epoch': 2}
{'type': 'loss', 'content': 0.019053032621741295, 'timestamp': '2025-10-02 00:48:25.911768', 'step': 20945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:25.968477', 'step': 20945, 'epoch': 2}
{'type': 'loss', 'content': 0.057706065475940704, 'timestamp': '2025-10-02 00:48:25.972401', 'step': 20946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:48:26.053724', 'step': 20946, 'epoch': 2}
{'type': 'loss', 'content': 0.06418510526418686, 'timestamp': '2025-10-02 00:48:26.065988', 'step': 20947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:26.132232', 'step': 20947, 'epoch': 2}
{'type': 'loss', 'content': 0.057545196264982224, 'timestamp': '2025-10-02 00:48:26.139115', 'step': 20948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:26.196110', 'step': 20948, 'epoch': 2}
{'type': 'loss', 'content': 0.022411661222577095, 'timestamp': '2025-10-02 00:48:26.207324', 'step': 20949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:26.264401', 'step': 20949, 'epoch': 2}
{'type': 'loss', 'content': 0.08629084378480911, 'timestamp': '2025-10-02 00:48:26.270550', 'step': 20950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:26.326547', 'step': 20950, 'epoch': 2}
{'type': 'loss', 'content': 0.20539958775043488, 'timestamp': '2025-10-02 00:48:26.329899', 'step': 20951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:26.391698', 'step': 20951, 'epoch': 2}
{'type': 'loss', 'content': 0.13075995445251465, 'timestamp': '2025-10-02 00:48:26.398772', 'step': 20952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:26.459681', 'step': 20952, 'epoch': 2}
{'type': 'loss', 'content': 0.12173338234424591, 'timestamp': '2025-10-02 00:48:26.463758', 'step': 20953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:26.526206', 'step': 20953, 'epoch': 2}
{'type': 'loss', 'content': 0.04497037082910538, 'timestamp': '2025-10-02 00:48:26.529062', 'step': 20954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:26.586057', 'step': 20954, 'epoch': 2}
{'type': 'loss', 'content': 0.03998567536473274, 'timestamp': '2025-10-02 00:48:26.589628', 'step': 20955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:26.650382', 'step': 20955, 'epoch': 2}
{'type': 'loss', 'content': 0.016884643584489822, 'timestamp': '2025-10-02 00:48:26.660716', 'step': 20956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:26.717479', 'step': 20956, 'epoch': 2}
{'type': 'loss', 'content': 0.1503576934337616, 'timestamp': '2025-10-02 00:48:26.721232', 'step': 20957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:26.779289', 'step': 20957, 'epoch': 2}
{'type': 'loss', 'content': 0.06811691075563431, 'timestamp': '2025-10-02 00:48:26.786959', 'step': 20958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:26.849520', 'step': 20958, 'epoch': 2}
{'type': 'loss', 'content': 0.0829460546374321, 'timestamp': '2025-10-02 00:48:26.852783', 'step': 20959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:26.918326', 'step': 20959, 'epoch': 2}
{'type': 'loss', 'content': 0.04186108335852623, 'timestamp': '2025-10-02 00:48:26.929588', 'step': 20960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:26.985876', 'step': 20960, 'epoch': 2}
{'type': 'loss', 'content': 0.05002978816628456, 'timestamp': '2025-10-02 00:48:26.988481', 'step': 20961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:27.052668', 'step': 20961, 'epoch': 2}
{'type': 'loss', 'content': 0.0540500208735466, 'timestamp': '2025-10-02 00:48:27.060367', 'step': 20962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:27.118215', 'step': 20962, 'epoch': 2}
{'type': 'loss', 'content': 0.0367790162563324, 'timestamp': '2025-10-02 00:48:27.127570', 'step': 20963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:27.191067', 'step': 20963, 'epoch': 2}
{'type': 'loss', 'content': 0.09534917771816254, 'timestamp': '2025-10-02 00:48:27.197670', 'step': 20964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:27.253927', 'step': 20964, 'epoch': 2}
{'type': 'loss', 'content': 0.03478091582655907, 'timestamp': '2025-10-02 00:48:27.259966', 'step': 20965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:27.317032', 'step': 20965, 'epoch': 2}
{'type': 'loss', 'content': 0.03977324813604355, 'timestamp': '2025-10-02 00:48:27.324640', 'step': 20966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:27.386122', 'step': 20966, 'epoch': 2}
{'type': 'loss', 'content': 0.04833797365427017, 'timestamp': '2025-10-02 00:48:27.389152', 'step': 20967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:27.449061', 'step': 20967, 'epoch': 2}
{'type': 'loss', 'content': 0.11412354558706284, 'timestamp': '2025-10-02 00:48:27.456212', 'step': 20968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:27.512589', 'step': 20968, 'epoch': 2}
{'type': 'loss', 'content': 0.08302386105060577, 'timestamp': '2025-10-02 00:48:27.516348', 'step': 20969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:27.579046', 'step': 20969, 'epoch': 2}
{'type': 'loss', 'content': 0.028475962579250336, 'timestamp': '2025-10-02 00:48:27.588438', 'step': 20970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:27.646035', 'step': 20970, 'epoch': 2}
{'type': 'loss', 'content': 0.05136615037918091, 'timestamp': '2025-10-02 00:48:27.651914', 'step': 20971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:27.714071', 'step': 20971, 'epoch': 2}
{'type': 'loss', 'content': 0.0456506609916687, 'timestamp': '2025-10-02 00:48:27.720625', 'step': 20972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:27.781186', 'step': 20972, 'epoch': 2}
{'type': 'loss', 'content': 0.07423317432403564, 'timestamp': '2025-10-02 00:48:27.784925', 'step': 20973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:27.866026', 'step': 20973, 'epoch': 2}
{'type': 'loss', 'content': 0.12270565330982208, 'timestamp': '2025-10-02 00:48:27.869693', 'step': 20974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:27.926508', 'step': 20974, 'epoch': 2}
{'type': 'loss', 'content': 0.04948948323726654, 'timestamp': '2025-10-02 00:48:27.935849', 'step': 20975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:28.008198', 'step': 20975, 'epoch': 2}
{'type': 'loss', 'content': 0.030931740999221802, 'timestamp': '2025-10-02 00:48:28.014327', 'step': 20976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:28.070685', 'step': 20976, 'epoch': 2}
{'type': 'loss', 'content': 0.12137540429830551, 'timestamp': '2025-10-02 00:48:28.074458', 'step': 20977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:28.136502', 'step': 20977, 'epoch': 2}
{'type': 'loss', 'content': 0.06193425506353378, 'timestamp': '2025-10-02 00:48:28.144226', 'step': 20978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:28.211311', 'step': 20978, 'epoch': 2}
{'type': 'loss', 'content': 0.014536119997501373, 'timestamp': '2025-10-02 00:48:28.220886', 'step': 20979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:28.276201', 'step': 20979, 'epoch': 2}
{'type': 'loss', 'content': 0.11095017939805984, 'timestamp': '2025-10-02 00:48:28.285947', 'step': 20980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:28.345735', 'step': 20980, 'epoch': 2}
{'type': 'loss', 'content': 0.05250594764947891, 'timestamp': '2025-10-02 00:48:28.356708', 'step': 20981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:28.416760', 'step': 20981, 'epoch': 2}
{'type': 'loss', 'content': 0.059831760823726654, 'timestamp': '2025-10-02 00:48:28.420689', 'step': 20982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:28.487944', 'step': 20982, 'epoch': 2}
{'type': 'loss', 'content': 0.020099055022001266, 'timestamp': '2025-10-02 00:48:28.495429', 'step': 20983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:28.565160', 'step': 20983, 'epoch': 2}
{'type': 'loss', 'content': 0.04602302238345146, 'timestamp': '2025-10-02 00:48:28.574905', 'step': 20984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:28.634229', 'step': 20984, 'epoch': 2}
{'type': 'loss', 'content': 0.15519720315933228, 'timestamp': '2025-10-02 00:48:28.637075', 'step': 20985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:28.694604', 'step': 20985, 'epoch': 2}
{'type': 'loss', 'content': 0.10090451687574387, 'timestamp': '2025-10-02 00:48:28.697324', 'step': 20986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:28.753131', 'step': 20986, 'epoch': 2}
{'type': 'loss', 'content': 0.026554923504590988, 'timestamp': '2025-10-02 00:48:28.756415', 'step': 20987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:28.813405', 'step': 20987, 'epoch': 2}
{'type': 'loss', 'content': 0.03238604590296745, 'timestamp': '2025-10-02 00:48:28.823831', 'step': 20988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:28.892598', 'step': 20988, 'epoch': 2}
{'type': 'loss', 'content': 0.053389765322208405, 'timestamp': '2025-10-02 00:48:28.900572', 'step': 20989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:28.960025', 'step': 20989, 'epoch': 2}
{'type': 'loss', 'content': 0.1185227781534195, 'timestamp': '2025-10-02 00:48:28.962626', 'step': 20990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:29.027740', 'step': 20990, 'epoch': 2}
{'type': 'loss', 'content': 0.033075962215662, 'timestamp': '2025-10-02 00:48:29.037219', 'step': 20991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:29.094922', 'step': 20991, 'epoch': 2}
{'type': 'loss', 'content': 0.09096698462963104, 'timestamp': '2025-10-02 00:48:29.101753', 'step': 20992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:29.173729', 'step': 20992, 'epoch': 2}
{'type': 'loss', 'content': 0.06532781571149826, 'timestamp': '2025-10-02 00:48:29.181976', 'step': 20993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:29.263077', 'step': 20993, 'epoch': 2}
{'type': 'loss', 'content': 0.14472824335098267, 'timestamp': '2025-10-02 00:48:29.265905', 'step': 20994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:29.335663', 'step': 20994, 'epoch': 2}
{'type': 'loss', 'content': 0.10059976577758789, 'timestamp': '2025-10-02 00:48:29.339533', 'step': 20995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:29.408437', 'step': 20995, 'epoch': 2}
{'type': 'loss', 'content': 0.1295095682144165, 'timestamp': '2025-10-02 00:48:29.418304', 'step': 20996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:29.482097', 'step': 20996, 'epoch': 2}
{'type': 'loss', 'content': 0.02088264189660549, 'timestamp': '2025-10-02 00:48:29.485351', 'step': 20997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:29.541028', 'step': 20997, 'epoch': 2}
{'type': 'loss', 'content': 0.09397323429584503, 'timestamp': '2025-10-02 00:48:29.544304', 'step': 20998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:29.602157', 'step': 20998, 'epoch': 2}
{'type': 'loss', 'content': 0.02448379062116146, 'timestamp': '2025-10-02 00:48:29.611492', 'step': 20999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:29.690257', 'step': 20999, 'epoch': 2}
{'type': 'loss', 'content': 0.06519399583339691, 'timestamp': '2025-10-02 00:48:29.700588', 'step': 21000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 21000', 'timestamp': '2025-10-02 00:48:30.159027', 'step': 21000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:48:30.234072', 'step': 21000, 'epoch': 2}
{'type': 'loss', 'content': 0.02906745672225952, 'timestamp': '2025-10-02 00:48:30.247465', 'step': 21001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:30.321306', 'step': 21001, 'epoch': 2}
{'type': 'loss', 'content': 0.02157524600625038, 'timestamp': '2025-10-02 00:48:30.329545', 'step': 21002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:30.398657', 'step': 21002, 'epoch': 2}
{'type': 'loss', 'content': 0.02448013424873352, 'timestamp': '2025-10-02 00:48:30.401322', 'step': 21003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:48:30.461293', 'step': 21003, 'epoch': 2}
{'type': 'loss', 'content': 0.12492392212152481, 'timestamp': '2025-10-02 00:48:30.468040', 'step': 21004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:30.534286', 'step': 21004, 'epoch': 2}
{'type': 'loss', 'content': 0.07690320163965225, 'timestamp': '2025-10-02 00:48:30.537329', 'step': 21005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:30.612711', 'step': 21005, 'epoch': 2}
{'type': 'loss', 'content': 0.010594291612505913, 'timestamp': '2025-10-02 00:48:30.623170', 'step': 21006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:30.694764', 'step': 21006, 'epoch': 2}
{'type': 'loss', 'content': 0.022368617355823517, 'timestamp': '2025-10-02 00:48:30.700281', 'step': 21007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:30.772481', 'step': 21007, 'epoch': 2}
{'type': 'loss', 'content': 0.06757999211549759, 'timestamp': '2025-10-02 00:48:30.782855', 'step': 21008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:30.843127', 'step': 21008, 'epoch': 2}
{'type': 'loss', 'content': 0.05082535743713379, 'timestamp': '2025-10-02 00:48:30.846344', 'step': 21009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:30.927780', 'step': 21009, 'epoch': 2}
{'type': 'loss', 'content': 0.04565545916557312, 'timestamp': '2025-10-02 00:48:30.930895', 'step': 21010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:30.998012', 'step': 21010, 'epoch': 2}
{'type': 'loss', 'content': 0.08602774143218994, 'timestamp': '2025-10-02 00:48:31.011592', 'step': 21011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:31.091214', 'step': 21011, 'epoch': 2}
{'type': 'loss', 'content': 0.037340279668569565, 'timestamp': '2025-10-02 00:48:31.098977', 'step': 21012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:48:31.174654', 'step': 21012, 'epoch': 2}
{'type': 'loss', 'content': 0.012653352692723274, 'timestamp': '2025-10-02 00:48:31.186309', 'step': 21013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:31.256024', 'step': 21013, 'epoch': 2}
{'type': 'loss', 'content': 0.16462185978889465, 'timestamp': '2025-10-02 00:48:31.258838', 'step': 21014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:31.324743', 'step': 21014, 'epoch': 2}
{'type': 'loss', 'content': 0.07229771465063095, 'timestamp': '2025-10-02 00:48:31.327824', 'step': 21015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:31.406945', 'step': 21015, 'epoch': 2}
{'type': 'loss', 'content': 0.16525663435459137, 'timestamp': '2025-10-02 00:48:31.414070', 'step': 21016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:31.482100', 'step': 21016, 'epoch': 2}
{'type': 'loss', 'content': 0.09211038798093796, 'timestamp': '2025-10-02 00:48:31.485393', 'step': 21017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:31.542717', 'step': 21017, 'epoch': 2}
{'type': 'loss', 'content': 0.04005714878439903, 'timestamp': '2025-10-02 00:48:31.553455', 'step': 21018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:31.620858', 'step': 21018, 'epoch': 2}
{'type': 'loss', 'content': 0.10110728442668915, 'timestamp': '2025-10-02 00:48:31.625340', 'step': 21019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:31.689798', 'step': 21019, 'epoch': 2}
{'type': 'loss', 'content': 0.01654389686882496, 'timestamp': '2025-10-02 00:48:31.700708', 'step': 21020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:31.756903', 'step': 21020, 'epoch': 2}
{'type': 'loss', 'content': 0.09267349541187286, 'timestamp': '2025-10-02 00:48:31.761232', 'step': 21021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:31.827618', 'step': 21021, 'epoch': 2}
{'type': 'loss', 'content': 0.046296872198581696, 'timestamp': '2025-10-02 00:48:31.838734', 'step': 21022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:31.907121', 'step': 21022, 'epoch': 2}
{'type': 'loss', 'content': 0.04833389073610306, 'timestamp': '2025-10-02 00:48:31.917954', 'step': 21023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:32.004721', 'step': 21023, 'epoch': 2}
{'type': 'loss', 'content': 0.0842253789305687, 'timestamp': '2025-10-02 00:48:32.011841', 'step': 21024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:32.087854', 'step': 21024, 'epoch': 2}
{'type': 'loss', 'content': 0.120188407599926, 'timestamp': '2025-10-02 00:48:32.091467', 'step': 21025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:32.158982', 'step': 21025, 'epoch': 2}
{'type': 'loss', 'content': 0.11333778500556946, 'timestamp': '2025-10-02 00:48:32.166684', 'step': 21026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:32.230778', 'step': 21026, 'epoch': 2}
{'type': 'loss', 'content': 0.19478075206279755, 'timestamp': '2025-10-02 00:48:32.233933', 'step': 21027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:32.293864', 'step': 21027, 'epoch': 2}
{'type': 'loss', 'content': 0.032460153102874756, 'timestamp': '2025-10-02 00:48:32.304082', 'step': 21028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:32.365749', 'step': 21028, 'epoch': 2}
{'type': 'loss', 'content': 0.050478074699640274, 'timestamp': '2025-10-02 00:48:32.370373', 'step': 21029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:32.433759', 'step': 21029, 'epoch': 2}
{'type': 'loss', 'content': 0.01942877471446991, 'timestamp': '2025-10-02 00:48:32.443280', 'step': 21030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:32.511157', 'step': 21030, 'epoch': 2}
{'type': 'loss', 'content': 0.05984731391072273, 'timestamp': '2025-10-02 00:48:32.521258', 'step': 21031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:32.583854', 'step': 21031, 'epoch': 2}
{'type': 'loss', 'content': 0.02388075180351734, 'timestamp': '2025-10-02 00:48:32.599652', 'step': 21032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:32.668737', 'step': 21032, 'epoch': 2}
{'type': 'loss', 'content': 0.0116056427359581, 'timestamp': '2025-10-02 00:48:32.671585', 'step': 21033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 00:48:32.773694', 'step': 21033, 'epoch': 2}
{'type': 'loss', 'content': 0.03483309969305992, 'timestamp': '2025-10-02 00:48:32.790136', 'step': 21034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:32.846044', 'step': 21034, 'epoch': 2}
{'type': 'loss', 'content': 0.10736310482025146, 'timestamp': '2025-10-02 00:48:32.851549', 'step': 21035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:32.911532', 'step': 21035, 'epoch': 2}
{'type': 'loss', 'content': 0.06591910868883133, 'timestamp': '2025-10-02 00:48:32.920764', 'step': 21036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:32.983810', 'step': 21036, 'epoch': 2}
{'type': 'loss', 'content': 0.023473987355828285, 'timestamp': '2025-10-02 00:48:32.994132', 'step': 21037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:33.055764', 'step': 21037, 'epoch': 2}
{'type': 'loss', 'content': 0.03144613280892372, 'timestamp': '2025-10-02 00:48:33.065958', 'step': 21038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:33.133357', 'step': 21038, 'epoch': 2}
{'type': 'loss', 'content': 0.17427487671375275, 'timestamp': '2025-10-02 00:48:33.136946', 'step': 21039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:33.204598', 'step': 21039, 'epoch': 2}
{'type': 'loss', 'content': 0.038387130945920944, 'timestamp': '2025-10-02 00:48:33.211441', 'step': 21040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:33.267869', 'step': 21040, 'epoch': 2}
{'type': 'loss', 'content': 0.0695912092924118, 'timestamp': '2025-10-02 00:48:33.270118', 'step': 21041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:33.325790', 'step': 21041, 'epoch': 2}
{'type': 'loss', 'content': 0.11918377876281738, 'timestamp': '2025-10-02 00:48:33.329794', 'step': 21042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:33.398186', 'step': 21042, 'epoch': 2}
{'type': 'loss', 'content': 0.034726131707429886, 'timestamp': '2025-10-02 00:48:33.401089', 'step': 21043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:33.461197', 'step': 21043, 'epoch': 2}
{'type': 'loss', 'content': 0.049179624766111374, 'timestamp': '2025-10-02 00:48:33.474629', 'step': 21044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:33.547066', 'step': 21044, 'epoch': 2}
{'type': 'loss', 'content': 0.11278782784938812, 'timestamp': '2025-10-02 00:48:33.555939', 'step': 21045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:33.619385', 'step': 21045, 'epoch': 2}
{'type': 'loss', 'content': 0.08589284121990204, 'timestamp': '2025-10-02 00:48:33.631558', 'step': 21046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:33.711773', 'step': 21046, 'epoch': 2}
{'type': 'loss', 'content': 0.0736454576253891, 'timestamp': '2025-10-02 00:48:33.717641', 'step': 21047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:33.808489', 'step': 21047, 'epoch': 2}
{'type': 'loss', 'content': 0.025016184896230698, 'timestamp': '2025-10-02 00:48:33.823899', 'step': 21048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:33.882865', 'step': 21048, 'epoch': 2}
{'type': 'loss', 'content': 0.07095939666032791, 'timestamp': '2025-10-02 00:48:33.886143', 'step': 21049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:33.964502', 'step': 21049, 'epoch': 2}
{'type': 'loss', 'content': 0.05381520092487335, 'timestamp': '2025-10-02 00:48:33.978326', 'step': 21050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:34.083047', 'step': 21050, 'epoch': 2}
{'type': 'loss', 'content': 0.0681840255856514, 'timestamp': '2025-10-02 00:48:34.092717', 'step': 21051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:34.165776', 'step': 21051, 'epoch': 2}
{'type': 'loss', 'content': 0.003209594637155533, 'timestamp': '2025-10-02 00:48:34.173365', 'step': 21052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:34.237938', 'step': 21052, 'epoch': 2}
{'type': 'loss', 'content': 0.10999386012554169, 'timestamp': '2025-10-02 00:48:34.248242', 'step': 21053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:34.320877', 'step': 21053, 'epoch': 2}
{'type': 'loss', 'content': 0.02351098693907261, 'timestamp': '2025-10-02 00:48:34.330103', 'step': 21054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:34.402419', 'step': 21054, 'epoch': 2}
{'type': 'loss', 'content': 0.08325830101966858, 'timestamp': '2025-10-02 00:48:34.409915', 'step': 21055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:34.481876', 'step': 21055, 'epoch': 2}
{'type': 'loss', 'content': 0.053077198565006256, 'timestamp': '2025-10-02 00:48:34.488545', 'step': 21056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:34.562402', 'step': 21056, 'epoch': 2}
{'type': 'loss', 'content': 0.0037756520323455334, 'timestamp': '2025-10-02 00:48:34.572131', 'step': 21057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:34.629784', 'step': 21057, 'epoch': 2}
{'type': 'loss', 'content': 0.07783559709787369, 'timestamp': '2025-10-02 00:48:34.633241', 'step': 21058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:34.712326', 'step': 21058, 'epoch': 2}
{'type': 'loss', 'content': 0.03585987538099289, 'timestamp': '2025-10-02 00:48:34.715749', 'step': 21059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:34.790151', 'step': 21059, 'epoch': 2}
{'type': 'loss', 'content': 0.006087609101086855, 'timestamp': '2025-10-02 00:48:34.798133', 'step': 21060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:48:34.861844', 'step': 21060, 'epoch': 2}
{'type': 'loss', 'content': 0.043787069618701935, 'timestamp': '2025-10-02 00:48:34.875530', 'step': 21061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:34.949030', 'step': 21061, 'epoch': 2}
{'type': 'loss', 'content': 0.010436675511300564, 'timestamp': '2025-10-02 00:48:34.961059', 'step': 21062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:35.044232', 'step': 21062, 'epoch': 2}
{'type': 'loss', 'content': 0.02501138485968113, 'timestamp': '2025-10-02 00:48:35.056888', 'step': 21063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:35.133640', 'step': 21063, 'epoch': 2}
{'type': 'loss', 'content': 0.04029504582285881, 'timestamp': '2025-10-02 00:48:35.140940', 'step': 21064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:35.208454', 'step': 21064, 'epoch': 2}
{'type': 'loss', 'content': 0.10546056926250458, 'timestamp': '2025-10-02 00:48:35.211955', 'step': 21065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:35.287181', 'step': 21065, 'epoch': 2}
{'type': 'loss', 'content': 0.0608840249478817, 'timestamp': '2025-10-02 00:48:35.290586', 'step': 21066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:35.362339', 'step': 21066, 'epoch': 2}
{'type': 'loss', 'content': 0.0542951375246048, 'timestamp': '2025-10-02 00:48:35.373635', 'step': 21067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:35.460278', 'step': 21067, 'epoch': 2}
{'type': 'loss', 'content': 0.05639771744608879, 'timestamp': '2025-10-02 00:48:35.468473', 'step': 21068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:35.541977', 'step': 21068, 'epoch': 2}
{'type': 'loss', 'content': 0.029898209497332573, 'timestamp': '2025-10-02 00:48:35.549487', 'step': 21069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:35.626772', 'step': 21069, 'epoch': 2}
{'type': 'loss', 'content': 0.035510219633579254, 'timestamp': '2025-10-02 00:48:35.631579', 'step': 21070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:35.698340', 'step': 21070, 'epoch': 2}
{'type': 'loss', 'content': 0.08202818781137466, 'timestamp': '2025-10-02 00:48:35.701461', 'step': 21071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:35.775268', 'step': 21071, 'epoch': 2}
{'type': 'loss', 'content': 0.041416555643081665, 'timestamp': '2025-10-02 00:48:35.785585', 'step': 21072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:35.852253', 'step': 21072, 'epoch': 2}
{'type': 'loss', 'content': 0.0194232240319252, 'timestamp': '2025-10-02 00:48:35.862137', 'step': 21073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:35.932859', 'step': 21073, 'epoch': 2}
{'type': 'loss', 'content': 0.006821012124419212, 'timestamp': '2025-10-02 00:48:35.940478', 'step': 21074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:36.013708', 'step': 21074, 'epoch': 2}
{'type': 'loss', 'content': 0.09466510266065598, 'timestamp': '2025-10-02 00:48:36.017648', 'step': 21075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:36.075060', 'step': 21075, 'epoch': 2}
{'type': 'loss', 'content': 0.08301184326410294, 'timestamp': '2025-10-02 00:48:36.087421', 'step': 21076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:36.150720', 'step': 21076, 'epoch': 2}
{'type': 'loss', 'content': 0.08983336389064789, 'timestamp': '2025-10-02 00:48:36.156699', 'step': 21077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:36.214752', 'step': 21077, 'epoch': 2}
{'type': 'loss', 'content': 0.05508657917380333, 'timestamp': '2025-10-02 00:48:36.224192', 'step': 21078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:36.294051', 'step': 21078, 'epoch': 2}
{'type': 'loss', 'content': 0.07842443138360977, 'timestamp': '2025-10-02 00:48:36.298853', 'step': 21079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:36.378821', 'step': 21079, 'epoch': 2}
{'type': 'loss', 'content': 0.026824794709682465, 'timestamp': '2025-10-02 00:48:36.395882', 'step': 21080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:36.453998', 'step': 21080, 'epoch': 2}
{'type': 'loss', 'content': 0.04099227115511894, 'timestamp': '2025-10-02 00:48:36.457474', 'step': 21081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:36.540966', 'step': 21081, 'epoch': 2}
{'type': 'loss', 'content': 0.15257301926612854, 'timestamp': '2025-10-02 00:48:36.544767', 'step': 21082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:36.617915', 'step': 21082, 'epoch': 2}
{'type': 'loss', 'content': 0.0031520372722297907, 'timestamp': '2025-10-02 00:48:36.628533', 'step': 21083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:36.686689', 'step': 21083, 'epoch': 2}
{'type': 'loss', 'content': 0.06795770674943924, 'timestamp': '2025-10-02 00:48:36.701090', 'step': 21084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:36.764685', 'step': 21084, 'epoch': 2}
{'type': 'loss', 'content': 0.07890164852142334, 'timestamp': '2025-10-02 00:48:36.775779', 'step': 21085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:36.858719', 'step': 21085, 'epoch': 2}
{'type': 'loss', 'content': 0.06341750919818878, 'timestamp': '2025-10-02 00:48:36.869292', 'step': 21086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:36.952143', 'step': 21086, 'epoch': 2}
{'type': 'loss', 'content': 0.04523961618542671, 'timestamp': '2025-10-02 00:48:36.957170', 'step': 21087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:37.024150', 'step': 21087, 'epoch': 2}
{'type': 'loss', 'content': 0.026347137987613678, 'timestamp': '2025-10-02 00:48:37.039269', 'step': 21088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:37.102990', 'step': 21088, 'epoch': 2}
{'type': 'loss', 'content': 0.028211113065481186, 'timestamp': '2025-10-02 00:48:37.106008', 'step': 21089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:37.174804', 'step': 21089, 'epoch': 2}
{'type': 'loss', 'content': 0.18688814342021942, 'timestamp': '2025-10-02 00:48:37.178959', 'step': 21090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:37.235936', 'step': 21090, 'epoch': 2}
{'type': 'loss', 'content': 0.062424320727586746, 'timestamp': '2025-10-02 00:48:37.245360', 'step': 21091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:48:37.317076', 'step': 21091, 'epoch': 2}
{'type': 'loss', 'content': 0.01817399635910988, 'timestamp': '2025-10-02 00:48:37.328464', 'step': 21092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:37.405293', 'step': 21092, 'epoch': 2}
{'type': 'loss', 'content': 0.07474945485591888, 'timestamp': '2025-10-02 00:48:37.409609', 'step': 21093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:48:37.497279', 'step': 21093, 'epoch': 2}
{'type': 'loss', 'content': 0.005361891817301512, 'timestamp': '2025-10-02 00:48:37.510475', 'step': 21094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:37.572333', 'step': 21094, 'epoch': 2}
{'type': 'loss', 'content': 0.013574603013694286, 'timestamp': '2025-10-02 00:48:37.576945', 'step': 21095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:37.635688', 'step': 21095, 'epoch': 2}
{'type': 'loss', 'content': 0.05310683697462082, 'timestamp': '2025-10-02 00:48:37.643745', 'step': 21096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:48:37.708791', 'step': 21096, 'epoch': 2}
{'type': 'loss', 'content': 0.0560026615858078, 'timestamp': '2025-10-02 00:48:37.720305', 'step': 21097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:37.789672', 'step': 21097, 'epoch': 2}
{'type': 'loss', 'content': 0.055391594767570496, 'timestamp': '2025-10-02 00:48:37.803651', 'step': 21098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:37.899456', 'step': 21098, 'epoch': 2}
{'type': 'loss', 'content': 0.02446984313428402, 'timestamp': '2025-10-02 00:48:37.909950', 'step': 21099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:37.995147', 'step': 21099, 'epoch': 2}
{'type': 'loss', 'content': 0.038386065512895584, 'timestamp': '2025-10-02 00:48:38.012016', 'step': 21100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:38.083840', 'step': 21100, 'epoch': 2}
{'type': 'loss', 'content': 0.08710218966007233, 'timestamp': '2025-10-02 00:48:38.099102', 'step': 21101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:38.181715', 'step': 21101, 'epoch': 2}
{'type': 'loss', 'content': 0.004729812499135733, 'timestamp': '2025-10-02 00:48:38.186388', 'step': 21102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:38.244391', 'step': 21102, 'epoch': 2}
{'type': 'loss', 'content': 0.09069734066724777, 'timestamp': '2025-10-02 00:48:38.247805', 'step': 21103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:38.319955', 'step': 21103, 'epoch': 2}
{'type': 'loss', 'content': 0.026964617893099785, 'timestamp': '2025-10-02 00:48:38.330912', 'step': 21104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:38.389760', 'step': 21104, 'epoch': 2}
{'type': 'loss', 'content': 0.16574397683143616, 'timestamp': '2025-10-02 00:48:38.392778', 'step': 21105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:38.450202', 'step': 21105, 'epoch': 2}
{'type': 'loss', 'content': 0.04379463940858841, 'timestamp': '2025-10-02 00:48:38.453825', 'step': 21106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:38.525061', 'step': 21106, 'epoch': 2}
{'type': 'loss', 'content': 0.05612080544233322, 'timestamp': '2025-10-02 00:48:38.534408', 'step': 21107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:38.592330', 'step': 21107, 'epoch': 2}
{'type': 'loss', 'content': 0.0734153538942337, 'timestamp': '2025-10-02 00:48:38.599832', 'step': 21108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:38.657498', 'step': 21108, 'epoch': 2}
{'type': 'loss', 'content': 0.04786678031086922, 'timestamp': '2025-10-02 00:48:38.666627', 'step': 21109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:38.732334', 'step': 21109, 'epoch': 2}
{'type': 'loss', 'content': 0.02786432020366192, 'timestamp': '2025-10-02 00:48:38.741989', 'step': 21110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:38.814133', 'step': 21110, 'epoch': 2}
{'type': 'loss', 'content': 0.10110519826412201, 'timestamp': '2025-10-02 00:48:38.823652', 'step': 21111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:38.886509', 'step': 21111, 'epoch': 2}
{'type': 'loss', 'content': 0.08551632612943649, 'timestamp': '2025-10-02 00:48:38.901104', 'step': 21112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:38.958439', 'step': 21112, 'epoch': 2}
{'type': 'loss', 'content': 0.01717514917254448, 'timestamp': '2025-10-02 00:48:38.966939', 'step': 21113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:39.047981', 'step': 21113, 'epoch': 2}
{'type': 'loss', 'content': 0.09199129045009613, 'timestamp': '2025-10-02 00:48:39.051270', 'step': 21114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:39.116394', 'step': 21114, 'epoch': 2}
{'type': 'loss', 'content': 0.034812260419130325, 'timestamp': '2025-10-02 00:48:39.119434', 'step': 21115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:39.194156', 'step': 21115, 'epoch': 2}
{'type': 'loss', 'content': 0.01184801198542118, 'timestamp': '2025-10-02 00:48:39.204460', 'step': 21116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:39.261683', 'step': 21116, 'epoch': 2}
{'type': 'loss', 'content': 0.05697028711438179, 'timestamp': '2025-10-02 00:48:39.264694', 'step': 21117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:39.325524', 'step': 21117, 'epoch': 2}
{'type': 'loss', 'content': 0.019032029435038567, 'timestamp': '2025-10-02 00:48:39.335083', 'step': 21118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:39.393906', 'step': 21118, 'epoch': 2}
{'type': 'loss', 'content': 0.09049427509307861, 'timestamp': '2025-10-02 00:48:39.397851', 'step': 21119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:48:39.470864', 'step': 21119, 'epoch': 2}
{'type': 'loss', 'content': 0.029295144602656364, 'timestamp': '2025-10-02 00:48:39.487166', 'step': 21120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:39.543039', 'step': 21120, 'epoch': 2}
{'type': 'loss', 'content': 0.07185473293066025, 'timestamp': '2025-10-02 00:48:39.546516', 'step': 21121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:39.610322', 'step': 21121, 'epoch': 2}
{'type': 'loss', 'content': 0.11341695487499237, 'timestamp': '2025-10-02 00:48:39.614390', 'step': 21122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:39.679362', 'step': 21122, 'epoch': 2}
{'type': 'loss', 'content': 0.02349180541932583, 'timestamp': '2025-10-02 00:48:39.689668', 'step': 21123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:39.757216', 'step': 21123, 'epoch': 2}
{'type': 'loss', 'content': 0.02400585263967514, 'timestamp': '2025-10-02 00:48:39.764418', 'step': 21124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:39.842014', 'step': 21124, 'epoch': 2}
{'type': 'loss', 'content': 0.010941890999674797, 'timestamp': '2025-10-02 00:48:39.845447', 'step': 21125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:39.918245', 'step': 21125, 'epoch': 2}
{'type': 'loss', 'content': 0.04505032300949097, 'timestamp': '2025-10-02 00:48:39.929489', 'step': 21126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:39.998404', 'step': 21126, 'epoch': 2}
{'type': 'loss', 'content': 0.044473569840192795, 'timestamp': '2025-10-02 00:48:40.008042', 'step': 21127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:40.075999', 'step': 21127, 'epoch': 2}
{'type': 'loss', 'content': 0.007568191271275282, 'timestamp': '2025-10-02 00:48:40.083102', 'step': 21128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:40.140518', 'step': 21128, 'epoch': 2}
{'type': 'loss', 'content': 0.09227237850427628, 'timestamp': '2025-10-02 00:48:40.149538', 'step': 21129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:40.213896', 'step': 21129, 'epoch': 2}
{'type': 'loss', 'content': 0.004108821041882038, 'timestamp': '2025-10-02 00:48:40.217852', 'step': 21130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:40.278759', 'step': 21130, 'epoch': 2}
{'type': 'loss', 'content': 0.025354351848363876, 'timestamp': '2025-10-02 00:48:40.288123', 'step': 21131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:40.358042', 'step': 21131, 'epoch': 2}
{'type': 'loss', 'content': 0.016756342723965645, 'timestamp': '2025-10-02 00:48:40.365380', 'step': 21132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:40.436174', 'step': 21132, 'epoch': 2}
{'type': 'loss', 'content': 0.12205144017934799, 'timestamp': '2025-10-02 00:48:40.442248', 'step': 21133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:40.516943', 'step': 21133, 'epoch': 2}
{'type': 'loss', 'content': 0.06582273542881012, 'timestamp': '2025-10-02 00:48:40.528736', 'step': 21134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:40.606378', 'step': 21134, 'epoch': 2}
{'type': 'loss', 'content': 0.04166527837514877, 'timestamp': '2025-10-02 00:48:40.615154', 'step': 21135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:40.673729', 'step': 21135, 'epoch': 2}
{'type': 'loss', 'content': 0.01747838780283928, 'timestamp': '2025-10-02 00:48:40.682171', 'step': 21136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:40.746109', 'step': 21136, 'epoch': 2}
{'type': 'loss', 'content': 0.027890797704458237, 'timestamp': '2025-10-02 00:48:40.756373', 'step': 21137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:40.820318', 'step': 21137, 'epoch': 2}
{'type': 'loss', 'content': 0.08367474377155304, 'timestamp': '2025-10-02 00:48:40.826322', 'step': 21138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:40.886675', 'step': 21138, 'epoch': 2}
{'type': 'loss', 'content': 0.04525339603424072, 'timestamp': '2025-10-02 00:48:40.889481', 'step': 21139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:40.953350', 'step': 21139, 'epoch': 2}
{'type': 'loss', 'content': 0.029987478628754616, 'timestamp': '2025-10-02 00:48:40.967048', 'step': 21140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:41.036055', 'step': 21140, 'epoch': 2}
{'type': 'loss', 'content': 0.0006877492414787412, 'timestamp': '2025-10-02 00:48:41.044098', 'step': 21141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:41.109964', 'step': 21141, 'epoch': 2}
{'type': 'loss', 'content': 0.03583373874425888, 'timestamp': '2025-10-02 00:48:41.113966', 'step': 21142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:41.190914', 'step': 21142, 'epoch': 2}
{'type': 'loss', 'content': 0.09295392781496048, 'timestamp': '2025-10-02 00:48:41.194221', 'step': 21143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:41.262038', 'step': 21143, 'epoch': 2}
{'type': 'loss', 'content': 0.10249556601047516, 'timestamp': '2025-10-02 00:48:41.272188', 'step': 21144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:41.329402', 'step': 21144, 'epoch': 2}
{'type': 'loss', 'content': 0.08459056168794632, 'timestamp': '2025-10-02 00:48:41.340902', 'step': 21145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:41.403805', 'step': 21145, 'epoch': 2}
{'type': 'loss', 'content': 0.08771990239620209, 'timestamp': '2025-10-02 00:48:41.407266', 'step': 21146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:41.464896', 'step': 21146, 'epoch': 2}
{'type': 'loss', 'content': 0.12949907779693604, 'timestamp': '2025-10-02 00:48:41.468990', 'step': 21147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:41.533503', 'step': 21147, 'epoch': 2}
{'type': 'loss', 'content': 0.09506769478321075, 'timestamp': '2025-10-02 00:48:41.541351', 'step': 21148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:41.627278', 'step': 21148, 'epoch': 2}
{'type': 'loss', 'content': 0.038158949464559555, 'timestamp': '2025-10-02 00:48:41.632054', 'step': 21149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:48:41.722037', 'step': 21149, 'epoch': 2}
{'type': 'loss', 'content': 0.04260999336838722, 'timestamp': '2025-10-02 00:48:41.734377', 'step': 21150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:48:41.809447', 'step': 21150, 'epoch': 2}
{'type': 'loss', 'content': 0.005438516847789288, 'timestamp': '2025-10-02 00:48:41.820293', 'step': 21151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:41.896982', 'step': 21151, 'epoch': 2}
{'type': 'loss', 'content': 0.05537669360637665, 'timestamp': '2025-10-02 00:48:41.911138', 'step': 21152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:41.979089', 'step': 21152, 'epoch': 2}
{'type': 'loss', 'content': 0.06569690257310867, 'timestamp': '2025-10-02 00:48:41.990106', 'step': 21153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:42.046731', 'step': 21153, 'epoch': 2}
{'type': 'loss', 'content': 0.10029082000255585, 'timestamp': '2025-10-02 00:48:42.049918', 'step': 21154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:42.110234', 'step': 21154, 'epoch': 2}
{'type': 'loss', 'content': 0.06662733107805252, 'timestamp': '2025-10-02 00:48:42.115972', 'step': 21155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:42.172174', 'step': 21155, 'epoch': 2}
{'type': 'loss', 'content': 0.044714488089084625, 'timestamp': '2025-10-02 00:48:42.181190', 'step': 21156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:48:42.251842', 'step': 21156, 'epoch': 2}
{'type': 'loss', 'content': 0.021670209243893623, 'timestamp': '2025-10-02 00:48:42.265240', 'step': 21157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:42.330354', 'step': 21157, 'epoch': 2}
{'type': 'loss', 'content': 0.05413633957505226, 'timestamp': '2025-10-02 00:48:42.340507', 'step': 21158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:42.407621', 'step': 21158, 'epoch': 2}
{'type': 'loss', 'content': 0.0068806009367108345, 'timestamp': '2025-10-02 00:48:42.417792', 'step': 21159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:42.484439', 'step': 21159, 'epoch': 2}
{'type': 'loss', 'content': 0.05737970396876335, 'timestamp': '2025-10-02 00:48:42.495386', 'step': 21160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:42.557788', 'step': 21160, 'epoch': 2}
{'type': 'loss', 'content': 0.011876705102622509, 'timestamp': '2025-10-02 00:48:42.563984', 'step': 21161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:42.629619', 'step': 21161, 'epoch': 2}
{'type': 'loss', 'content': 0.028695618733763695, 'timestamp': '2025-10-02 00:48:42.637167', 'step': 21162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:42.694148', 'step': 21162, 'epoch': 2}
{'type': 'loss', 'content': 0.014350579120218754, 'timestamp': '2025-10-02 00:48:42.700065', 'step': 21163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:42.756780', 'step': 21163, 'epoch': 2}
{'type': 'loss', 'content': 0.07995010167360306, 'timestamp': '2025-10-02 00:48:42.762932', 'step': 21164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:42.818731', 'step': 21164, 'epoch': 2}
{'type': 'loss', 'content': 0.059976354241371155, 'timestamp': '2025-10-02 00:48:42.820986', 'step': 21165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:42.876156', 'step': 21165, 'epoch': 2}
{'type': 'loss', 'content': 0.04937301576137543, 'timestamp': '2025-10-02 00:48:42.878629', 'step': 21166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:42.934020', 'step': 21166, 'epoch': 2}
{'type': 'loss', 'content': 0.028189025819301605, 'timestamp': '2025-10-02 00:48:42.936517', 'step': 21167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:42.992944', 'step': 21167, 'epoch': 2}
{'type': 'loss', 'content': 0.01507037878036499, 'timestamp': '2025-10-02 00:48:43.000669', 'step': 21168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:43.063181', 'step': 21168, 'epoch': 2}
{'type': 'loss', 'content': 0.06744658201932907, 'timestamp': '2025-10-02 00:48:43.070521', 'step': 21169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:43.133445', 'step': 21169, 'epoch': 2}
{'type': 'loss', 'content': 0.03573279827833176, 'timestamp': '2025-10-02 00:48:43.143795', 'step': 21170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:43.205800', 'step': 21170, 'epoch': 2}
{'type': 'loss', 'content': 0.12130866199731827, 'timestamp': '2025-10-02 00:48:43.209120', 'step': 21171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:43.268518', 'step': 21171, 'epoch': 2}
{'type': 'loss', 'content': 0.06028122454881668, 'timestamp': '2025-10-02 00:48:43.274913', 'step': 21172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:43.338046', 'step': 21172, 'epoch': 2}
{'type': 'loss', 'content': 0.03870350122451782, 'timestamp': '2025-10-02 00:48:43.349334', 'step': 21173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:43.404577', 'step': 21173, 'epoch': 2}
{'type': 'loss', 'content': 0.13059581816196442, 'timestamp': '2025-10-02 00:48:43.414148', 'step': 21174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:43.474813', 'step': 21174, 'epoch': 2}
{'type': 'loss', 'content': 0.05519436299800873, 'timestamp': '2025-10-02 00:48:43.484350', 'step': 21175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:43.552892', 'step': 21175, 'epoch': 2}
{'type': 'loss', 'content': 0.018869508057832718, 'timestamp': '2025-10-02 00:48:43.560111', 'step': 21176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:43.619769', 'step': 21176, 'epoch': 2}
{'type': 'loss', 'content': 0.07447343319654465, 'timestamp': '2025-10-02 00:48:43.624662', 'step': 21177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:43.693734', 'step': 21177, 'epoch': 2}
{'type': 'loss', 'content': 0.12141618877649307, 'timestamp': '2025-10-02 00:48:43.697248', 'step': 21178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:43.776777', 'step': 21178, 'epoch': 2}
{'type': 'loss', 'content': 0.0516207180917263, 'timestamp': '2025-10-02 00:48:43.784117', 'step': 21179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:43.841314', 'step': 21179, 'epoch': 2}
{'type': 'loss', 'content': 0.035868026316165924, 'timestamp': '2025-10-02 00:48:43.848528', 'step': 21180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:43.911339', 'step': 21180, 'epoch': 2}
{'type': 'loss', 'content': 0.09966090321540833, 'timestamp': '2025-10-02 00:48:43.914227', 'step': 21181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:43.973181', 'step': 21181, 'epoch': 2}
{'type': 'loss', 'content': 0.056162793189287186, 'timestamp': '2025-10-02 00:48:43.980709', 'step': 21182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:44.039362', 'step': 21182, 'epoch': 2}
{'type': 'loss', 'content': 0.022850140929222107, 'timestamp': '2025-10-02 00:48:44.046749', 'step': 21183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:44.107724', 'step': 21183, 'epoch': 2}
{'type': 'loss', 'content': 0.09800102561712265, 'timestamp': '2025-10-02 00:48:44.116827', 'step': 21184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:44.187680', 'step': 21184, 'epoch': 2}
{'type': 'loss', 'content': 0.07413189113140106, 'timestamp': '2025-10-02 00:48:44.196584', 'step': 21185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:44.262205', 'step': 21185, 'epoch': 2}
{'type': 'loss', 'content': 0.029450668022036552, 'timestamp': '2025-10-02 00:48:44.265675', 'step': 21186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:44.330223', 'step': 21186, 'epoch': 2}
{'type': 'loss', 'content': 0.013462208211421967, 'timestamp': '2025-10-02 00:48:44.339617', 'step': 21187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:44.398066', 'step': 21187, 'epoch': 2}
{'type': 'loss', 'content': 0.031892139464616776, 'timestamp': '2025-10-02 00:48:44.404367', 'step': 21188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:44.461401', 'step': 21188, 'epoch': 2}
{'type': 'loss', 'content': 0.07650693506002426, 'timestamp': '2025-10-02 00:48:44.464539', 'step': 21189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:44.523804', 'step': 21189, 'epoch': 2}
{'type': 'loss', 'content': 0.04529368877410889, 'timestamp': '2025-10-02 00:48:44.526598', 'step': 21190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:44.590726', 'step': 21190, 'epoch': 2}
{'type': 'loss', 'content': 0.07506392896175385, 'timestamp': '2025-10-02 00:48:44.598184', 'step': 21191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:44.660521', 'step': 21191, 'epoch': 2}
{'type': 'loss', 'content': 0.06929564476013184, 'timestamp': '2025-10-02 00:48:44.667821', 'step': 21192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:44.724323', 'step': 21192, 'epoch': 2}
{'type': 'loss', 'content': 0.03822273761034012, 'timestamp': '2025-10-02 00:48:44.727164', 'step': 21193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:44.782750', 'step': 21193, 'epoch': 2}
{'type': 'loss', 'content': 0.17437665164470673, 'timestamp': '2025-10-02 00:48:44.784917', 'step': 21194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:44.842412', 'step': 21194, 'epoch': 2}
{'type': 'loss', 'content': 0.10797534137964249, 'timestamp': '2025-10-02 00:48:44.844894', 'step': 21195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:44.899526', 'step': 21195, 'epoch': 2}
{'type': 'loss', 'content': 0.06022133305668831, 'timestamp': '2025-10-02 00:48:44.905831', 'step': 21196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:44.960990', 'step': 21196, 'epoch': 2}
{'type': 'loss', 'content': 0.030458815395832062, 'timestamp': '2025-10-02 00:48:44.968684', 'step': 21197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:45.037084', 'step': 21197, 'epoch': 2}
{'type': 'loss', 'content': 0.03414992615580559, 'timestamp': '2025-10-02 00:48:45.042864', 'step': 21198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:45.101934', 'step': 21198, 'epoch': 2}
{'type': 'loss', 'content': 0.08779283612966537, 'timestamp': '2025-10-02 00:48:45.108540', 'step': 21199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:45.172492', 'step': 21199, 'epoch': 2}
{'type': 'loss', 'content': 0.009717206470668316, 'timestamp': '2025-10-02 00:48:45.180375', 'step': 21200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:45.237101', 'step': 21200, 'epoch': 2}
{'type': 'loss', 'content': 0.06821686774492264, 'timestamp': '2025-10-02 00:48:45.244050', 'step': 21201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:45.301665', 'step': 21201, 'epoch': 2}
{'type': 'loss', 'content': 0.022207286208868027, 'timestamp': '2025-10-02 00:48:45.307676', 'step': 21202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:45.380961', 'step': 21202, 'epoch': 2}
{'type': 'loss', 'content': 0.04353372007608414, 'timestamp': '2025-10-02 00:48:45.391436', 'step': 21203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:45.453735', 'step': 21203, 'epoch': 2}
{'type': 'loss', 'content': 0.060301344841718674, 'timestamp': '2025-10-02 00:48:45.460240', 'step': 21204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:45.526236', 'step': 21204, 'epoch': 2}
{'type': 'loss', 'content': 0.01354686077684164, 'timestamp': '2025-10-02 00:48:45.537206', 'step': 21205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:45.600876', 'step': 21205, 'epoch': 2}
{'type': 'loss', 'content': 0.03550504893064499, 'timestamp': '2025-10-02 00:48:45.610444', 'step': 21206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:45.673948', 'step': 21206, 'epoch': 2}
{'type': 'loss', 'content': 0.007788101676851511, 'timestamp': '2025-10-02 00:48:45.677386', 'step': 21207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:45.738860', 'step': 21207, 'epoch': 2}
{'type': 'loss', 'content': 0.030900340527296066, 'timestamp': '2025-10-02 00:48:45.746500', 'step': 21208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:45.802601', 'step': 21208, 'epoch': 2}
{'type': 'loss', 'content': 0.06269422173500061, 'timestamp': '2025-10-02 00:48:45.806236', 'step': 21209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:45.863616', 'step': 21209, 'epoch': 2}
{'type': 'loss', 'content': 0.03556952625513077, 'timestamp': '2025-10-02 00:48:45.866802', 'step': 21210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:45.928194', 'step': 21210, 'epoch': 2}
{'type': 'loss', 'content': 0.2534756064414978, 'timestamp': '2025-10-02 00:48:45.936562', 'step': 21211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:46.008421', 'step': 21211, 'epoch': 2}
{'type': 'loss', 'content': 0.11294777691364288, 'timestamp': '2025-10-02 00:48:46.015055', 'step': 21212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:46.076635', 'step': 21212, 'epoch': 2}
{'type': 'loss', 'content': 0.09110818058252335, 'timestamp': '2025-10-02 00:48:46.085978', 'step': 21213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:46.151669', 'step': 21213, 'epoch': 2}
{'type': 'loss', 'content': 0.24796774983406067, 'timestamp': '2025-10-02 00:48:46.164471', 'step': 21214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:46.233087', 'step': 21214, 'epoch': 2}
{'type': 'loss', 'content': 0.024815311655402184, 'timestamp': '2025-10-02 00:48:46.238901', 'step': 21215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:46.317909', 'step': 21215, 'epoch': 2}
{'type': 'loss', 'content': 0.08628129959106445, 'timestamp': '2025-10-02 00:48:46.331085', 'step': 21216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:46.406651', 'step': 21216, 'epoch': 2}
{'type': 'loss', 'content': 0.05269700288772583, 'timestamp': '2025-10-02 00:48:46.416085', 'step': 21217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:46.474758', 'step': 21217, 'epoch': 2}
{'type': 'loss', 'content': 0.009031306952238083, 'timestamp': '2025-10-02 00:48:46.477558', 'step': 21218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:46.543177', 'step': 21218, 'epoch': 2}
{'type': 'loss', 'content': 0.020401151850819588, 'timestamp': '2025-10-02 00:48:46.545932', 'step': 21219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:46.610643', 'step': 21219, 'epoch': 2}
{'type': 'loss', 'content': 0.0411934033036232, 'timestamp': '2025-10-02 00:48:46.617053', 'step': 21220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:46.680485', 'step': 21220, 'epoch': 2}
{'type': 'loss', 'content': 0.014584919437766075, 'timestamp': '2025-10-02 00:48:46.690701', 'step': 21221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:46.758681', 'step': 21221, 'epoch': 2}
{'type': 'loss', 'content': 0.029073180630803108, 'timestamp': '2025-10-02 00:48:46.768387', 'step': 21222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:46.830716', 'step': 21222, 'epoch': 2}
{'type': 'loss', 'content': 0.033015601336956024, 'timestamp': '2025-10-02 00:48:46.834199', 'step': 21223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:46.912836', 'step': 21223, 'epoch': 2}
{'type': 'loss', 'content': 0.01787813939154148, 'timestamp': '2025-10-02 00:48:46.923800', 'step': 21224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:46.998712', 'step': 21224, 'epoch': 2}
{'type': 'loss', 'content': 0.07410785555839539, 'timestamp': '2025-10-02 00:48:47.009719', 'step': 21225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:47.081260', 'step': 21225, 'epoch': 2}
{'type': 'loss', 'content': 0.01095319539308548, 'timestamp': '2025-10-02 00:48:47.090781', 'step': 21226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:47.153654', 'step': 21226, 'epoch': 2}
{'type': 'loss', 'content': 0.04142427444458008, 'timestamp': '2025-10-02 00:48:47.161350', 'step': 21227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:47.232186', 'step': 21227, 'epoch': 2}
{'type': 'loss', 'content': 0.07086540013551712, 'timestamp': '2025-10-02 00:48:47.243113', 'step': 21228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:47.298234', 'step': 21228, 'epoch': 2}
{'type': 'loss', 'content': 0.05844514071941376, 'timestamp': '2025-10-02 00:48:47.307660', 'step': 21229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:47.371138', 'step': 21229, 'epoch': 2}
{'type': 'loss', 'content': 0.01732923649251461, 'timestamp': '2025-10-02 00:48:47.380496', 'step': 21230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:47.438199', 'step': 21230, 'epoch': 2}
{'type': 'loss', 'content': 0.023603180423378944, 'timestamp': '2025-10-02 00:48:47.443928', 'step': 21231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:47.504363', 'step': 21231, 'epoch': 2}
{'type': 'loss', 'content': 0.061759840697050095, 'timestamp': '2025-10-02 00:48:47.514695', 'step': 21232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:47.578494', 'step': 21232, 'epoch': 2}
{'type': 'loss', 'content': 0.001992872916162014, 'timestamp': '2025-10-02 00:48:47.589496', 'step': 21233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:47.663760', 'step': 21233, 'epoch': 2}
{'type': 'loss', 'content': 0.049858734011650085, 'timestamp': '2025-10-02 00:48:47.672672', 'step': 21234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:47.745799', 'step': 21234, 'epoch': 2}
{'type': 'loss', 'content': 0.01433300320059061, 'timestamp': '2025-10-02 00:48:47.751596', 'step': 21235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:47.821975', 'step': 21235, 'epoch': 2}
{'type': 'loss', 'content': 0.027213530614972115, 'timestamp': '2025-10-02 00:48:47.830303', 'step': 21236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:47.898546', 'step': 21236, 'epoch': 2}
{'type': 'loss', 'content': 0.07081066071987152, 'timestamp': '2025-10-02 00:48:47.903675', 'step': 21237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:47.961799', 'step': 21237, 'epoch': 2}
{'type': 'loss', 'content': 0.05911741778254509, 'timestamp': '2025-10-02 00:48:47.965344', 'step': 21238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:48.023864', 'step': 21238, 'epoch': 2}
{'type': 'loss', 'content': 0.0717773586511612, 'timestamp': '2025-10-02 00:48:48.028840', 'step': 21239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:48.086891', 'step': 21239, 'epoch': 2}
{'type': 'loss', 'content': 0.022695738822221756, 'timestamp': '2025-10-02 00:48:48.095104', 'step': 21240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:48.166640', 'step': 21240, 'epoch': 2}
{'type': 'loss', 'content': 0.0754314735531807, 'timestamp': '2025-10-02 00:48:48.169434', 'step': 21241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:48.231748', 'step': 21241, 'epoch': 2}
{'type': 'loss', 'content': 0.04351232573390007, 'timestamp': '2025-10-02 00:48:48.235753', 'step': 21242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:48.320515', 'step': 21242, 'epoch': 2}
{'type': 'loss', 'content': 0.04245660454034805, 'timestamp': '2025-10-02 00:48:48.323847', 'step': 21243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:48.392207', 'step': 21243, 'epoch': 2}
{'type': 'loss', 'content': 0.09649450331926346, 'timestamp': '2025-10-02 00:48:48.399629', 'step': 21244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:48.469380', 'step': 21244, 'epoch': 2}
{'type': 'loss', 'content': 0.018375640735030174, 'timestamp': '2025-10-02 00:48:48.476864', 'step': 21245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:48.547512', 'step': 21245, 'epoch': 2}
{'type': 'loss', 'content': 0.02614402025938034, 'timestamp': '2025-10-02 00:48:48.550329', 'step': 21246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:48.616817', 'step': 21246, 'epoch': 2}
{'type': 'loss', 'content': 0.08944001793861389, 'timestamp': '2025-10-02 00:48:48.619527', 'step': 21247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:48.676141', 'step': 21247, 'epoch': 2}
{'type': 'loss', 'content': 0.03822721540927887, 'timestamp': '2025-10-02 00:48:48.686499', 'step': 21248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:48.750053', 'step': 21248, 'epoch': 2}
{'type': 'loss', 'content': 0.16542647778987885, 'timestamp': '2025-10-02 00:48:48.753300', 'step': 21249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:48.820667', 'step': 21249, 'epoch': 2}
{'type': 'loss', 'content': 0.1352706104516983, 'timestamp': '2025-10-02 00:48:48.830717', 'step': 21250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:48.895611', 'step': 21250, 'epoch': 2}
{'type': 'loss', 'content': 0.01427482720464468, 'timestamp': '2025-10-02 00:48:48.899520', 'step': 21251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:48.969622', 'step': 21251, 'epoch': 2}
{'type': 'loss', 'content': 0.06316889822483063, 'timestamp': '2025-10-02 00:48:48.983763', 'step': 21252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:49.048738', 'step': 21252, 'epoch': 2}
{'type': 'loss', 'content': 0.10921766608953476, 'timestamp': '2025-10-02 00:48:49.058100', 'step': 21253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:49.122354', 'step': 21253, 'epoch': 2}
{'type': 'loss', 'content': 0.1348947286605835, 'timestamp': '2025-10-02 00:48:49.125736', 'step': 21254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:49.186747', 'step': 21254, 'epoch': 2}
{'type': 'loss', 'content': 0.10722371935844421, 'timestamp': '2025-10-02 00:48:49.194200', 'step': 21255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:49.259667', 'step': 21255, 'epoch': 2}
{'type': 'loss', 'content': 0.010738593526184559, 'timestamp': '2025-10-02 00:48:49.268295', 'step': 21256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:49.330912', 'step': 21256, 'epoch': 2}
{'type': 'loss', 'content': 0.14633814990520477, 'timestamp': '2025-10-02 00:48:49.338385', 'step': 21257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:49.407247', 'step': 21257, 'epoch': 2}
{'type': 'loss', 'content': 0.059854816645383835, 'timestamp': '2025-10-02 00:48:49.411360', 'step': 21258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:49.467292', 'step': 21258, 'epoch': 2}
{'type': 'loss', 'content': 0.017282234504818916, 'timestamp': '2025-10-02 00:48:49.476628', 'step': 21259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:49.541237', 'step': 21259, 'epoch': 2}
{'type': 'loss', 'content': 0.042724113911390305, 'timestamp': '2025-10-02 00:48:49.551575', 'step': 21260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:49.612609', 'step': 21260, 'epoch': 2}
{'type': 'loss', 'content': 0.04896416515111923, 'timestamp': '2025-10-02 00:48:49.615548', 'step': 21261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:49.676139', 'step': 21261, 'epoch': 2}
{'type': 'loss', 'content': 0.16055753827095032, 'timestamp': '2025-10-02 00:48:49.680544', 'step': 21262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:49.755526', 'step': 21262, 'epoch': 2}
{'type': 'loss', 'content': 0.06632789224386215, 'timestamp': '2025-10-02 00:48:49.765943', 'step': 21263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:49.826148', 'step': 21263, 'epoch': 2}
{'type': 'loss', 'content': 0.07596912980079651, 'timestamp': '2025-10-02 00:48:49.834003', 'step': 21264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:49.897865', 'step': 21264, 'epoch': 2}
{'type': 'loss', 'content': 0.08588171750307083, 'timestamp': '2025-10-02 00:48:49.903542', 'step': 21265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:49.962004', 'step': 21265, 'epoch': 2}
{'type': 'loss', 'content': 0.009185167960822582, 'timestamp': '2025-10-02 00:48:49.967946', 'step': 21266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:50.024649', 'step': 21266, 'epoch': 2}
{'type': 'loss', 'content': 0.056474123150110245, 'timestamp': '2025-10-02 00:48:50.033979', 'step': 21267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:48:50.108098', 'step': 21267, 'epoch': 2}
{'type': 'loss', 'content': 0.020042702555656433, 'timestamp': '2025-10-02 00:48:50.121323', 'step': 21268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:50.184477', 'step': 21268, 'epoch': 2}
{'type': 'loss', 'content': 0.061162520200014114, 'timestamp': '2025-10-02 00:48:50.187803', 'step': 21269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:50.245495', 'step': 21269, 'epoch': 2}
{'type': 'loss', 'content': 0.05646543949842453, 'timestamp': '2025-10-02 00:48:50.253423', 'step': 21270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:50.310267', 'step': 21270, 'epoch': 2}
{'type': 'loss', 'content': 0.018344223499298096, 'timestamp': '2025-10-02 00:48:50.319534', 'step': 21271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:50.392745', 'step': 21271, 'epoch': 2}
{'type': 'loss', 'content': 0.06670583784580231, 'timestamp': '2025-10-02 00:48:50.403238', 'step': 21272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:50.464467', 'step': 21272, 'epoch': 2}
{'type': 'loss', 'content': 0.034926433116197586, 'timestamp': '2025-10-02 00:48:50.472186', 'step': 21273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:50.533309', 'step': 21273, 'epoch': 2}
{'type': 'loss', 'content': 0.13341359794139862, 'timestamp': '2025-10-02 00:48:50.536468', 'step': 21274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:50.608353', 'step': 21274, 'epoch': 2}
{'type': 'loss', 'content': 0.07269839197397232, 'timestamp': '2025-10-02 00:48:50.612717', 'step': 21275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:50.675332', 'step': 21275, 'epoch': 2}
{'type': 'loss', 'content': 0.0069233267568051815, 'timestamp': '2025-10-02 00:48:50.681724', 'step': 21276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:50.750501', 'step': 21276, 'epoch': 2}
{'type': 'loss', 'content': 0.2409687638282776, 'timestamp': '2025-10-02 00:48:50.760513', 'step': 21277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:50.832849', 'step': 21277, 'epoch': 2}
{'type': 'loss', 'content': 0.018368924036622047, 'timestamp': '2025-10-02 00:48:50.840438', 'step': 21278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:48:50.932954', 'step': 21278, 'epoch': 2}
{'type': 'loss', 'content': 0.04392855986952782, 'timestamp': '2025-10-02 00:48:50.946414', 'step': 21279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:51.013382', 'step': 21279, 'epoch': 2}
{'type': 'loss', 'content': 0.09148256480693817, 'timestamp': '2025-10-02 00:48:51.020091', 'step': 21280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:51.079852', 'step': 21280, 'epoch': 2}
{'type': 'loss', 'content': 0.11217253655195236, 'timestamp': '2025-10-02 00:48:51.084965', 'step': 21281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:51.142658', 'step': 21281, 'epoch': 2}
{'type': 'loss', 'content': 0.0323776938021183, 'timestamp': '2025-10-02 00:48:51.148365', 'step': 21282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:51.213268', 'step': 21282, 'epoch': 2}
{'type': 'loss', 'content': 0.0189800001680851, 'timestamp': '2025-10-02 00:48:51.219571', 'step': 21283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:51.282387', 'step': 21283, 'epoch': 2}
{'type': 'loss', 'content': 0.0662718191742897, 'timestamp': '2025-10-02 00:48:51.290057', 'step': 21284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:51.359135', 'step': 21284, 'epoch': 2}
{'type': 'loss', 'content': 0.11642370373010635, 'timestamp': '2025-10-02 00:48:51.362356', 'step': 21285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:51.430901', 'step': 21285, 'epoch': 2}
{'type': 'loss', 'content': 0.154946431517601, 'timestamp': '2025-10-02 00:48:51.437746', 'step': 21286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:51.493600', 'step': 21286, 'epoch': 2}
{'type': 'loss', 'content': 0.027794547379016876, 'timestamp': '2025-10-02 00:48:51.496830', 'step': 21287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:48:51.573770', 'step': 21287, 'epoch': 2}
{'type': 'loss', 'content': 0.03714093565940857, 'timestamp': '2025-10-02 00:48:51.586517', 'step': 21288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:51.651935', 'step': 21288, 'epoch': 2}
{'type': 'loss', 'content': 0.08772451430559158, 'timestamp': '2025-10-02 00:48:51.655047', 'step': 21289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:51.728071', 'step': 21289, 'epoch': 2}
{'type': 'loss', 'content': 0.008551709353923798, 'timestamp': '2025-10-02 00:48:51.735512', 'step': 21290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:51.811264', 'step': 21290, 'epoch': 2}
{'type': 'loss', 'content': 0.10597972571849823, 'timestamp': '2025-10-02 00:48:51.818541', 'step': 21291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:51.892892', 'step': 21291, 'epoch': 2}
{'type': 'loss', 'content': 0.017613785341382027, 'timestamp': '2025-10-02 00:48:51.903866', 'step': 21292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:51.974803', 'step': 21292, 'epoch': 2}
{'type': 'loss', 'content': 0.01665354333817959, 'timestamp': '2025-10-02 00:48:51.982325', 'step': 21293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:52.046723', 'step': 21293, 'epoch': 2}
{'type': 'loss', 'content': 0.03843863308429718, 'timestamp': '2025-10-02 00:48:52.051758', 'step': 21294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:52.111742', 'step': 21294, 'epoch': 2}
{'type': 'loss', 'content': 0.046301886439323425, 'timestamp': '2025-10-02 00:48:52.117546', 'step': 21295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:52.175042', 'step': 21295, 'epoch': 2}
{'type': 'loss', 'content': 0.07845679670572281, 'timestamp': '2025-10-02 00:48:52.183337', 'step': 21296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:52.240867', 'step': 21296, 'epoch': 2}
{'type': 'loss', 'content': 0.08177998661994934, 'timestamp': '2025-10-02 00:48:52.244901', 'step': 21297, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:52.304280', 'step': 21297, 'epoch': 2}
{'type': 'loss', 'content': 0.027194326743483543, 'timestamp': '2025-10-02 00:48:52.313602', 'step': 21298, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:52.370413', 'step': 21298, 'epoch': 2}
{'type': 'loss', 'content': 0.12813504040241241, 'timestamp': '2025-10-02 00:48:52.376113', 'step': 21299, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:52.432646', 'step': 21299, 'epoch': 2}
{'type': 'loss', 'content': 0.11737807095050812, 'timestamp': '2025-10-02 00:48:52.441759', 'step': 21300, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:52.503624', 'step': 21300, 'epoch': 2}
{'type': 'loss', 'content': 0.04543416202068329, 'timestamp': '2025-10-02 00:48:52.509510', 'step': 21301, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:52.576365', 'step': 21301, 'epoch': 2}
{'type': 'loss', 'content': 0.022132789716124535, 'timestamp': '2025-10-02 00:48:52.586836', 'step': 21302, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:52.646672', 'step': 21302, 'epoch': 2}
{'type': 'loss', 'content': 0.03491225093603134, 'timestamp': '2025-10-02 00:48:52.656165', 'step': 21303, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:48:52.719511', 'step': 21303, 'epoch': 2}
{'type': 'loss', 'content': 0.036484986543655396, 'timestamp': '2025-10-02 00:48:52.731000', 'step': 21304, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:52.793323', 'step': 21304, 'epoch': 2}
{'type': 'loss', 'content': 0.11699056625366211, 'timestamp': '2025-10-02 00:48:52.796951', 'step': 21305, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:52.873361', 'step': 21305, 'epoch': 2}
{'type': 'loss', 'content': 0.037625402212142944, 'timestamp': '2025-10-02 00:48:52.877037', 'step': 21306, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:52.936201', 'step': 21306, 'epoch': 2}
{'type': 'loss', 'content': 0.04064945504069328, 'timestamp': '2025-10-02 00:48:52.939974', 'step': 21307, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:52.998303', 'step': 21307, 'epoch': 2}
{'type': 'loss', 'content': 0.06704451143741608, 'timestamp': '2025-10-02 00:48:53.004487', 'step': 21308, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:53.060829', 'step': 21308, 'epoch': 2}
{'type': 'loss', 'content': 0.09504333883523941, 'timestamp': '2025-10-02 00:48:53.064276', 'step': 21309, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:53.123134', 'step': 21309, 'epoch': 2}
{'type': 'loss', 'content': 0.03437906131148338, 'timestamp': '2025-10-02 00:48:53.132474', 'step': 21310, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:53.190609', 'step': 21310, 'epoch': 2}
{'type': 'loss', 'content': 0.1355452537536621, 'timestamp': '2025-10-02 00:48:53.193566', 'step': 21311, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:53.249415', 'step': 21311, 'epoch': 2}
{'type': 'loss', 'content': 0.00397375738248229, 'timestamp': '2025-10-02 00:48:53.255623', 'step': 21312, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:53.313245', 'step': 21312, 'epoch': 2}
{'type': 'loss', 'content': 0.03034825809299946, 'timestamp': '2025-10-02 00:48:53.322077', 'step': 21313, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:53.378308', 'step': 21313, 'epoch': 2}
{'type': 'loss', 'content': 0.04242020100355148, 'timestamp': '2025-10-02 00:48:53.384700', 'step': 21314, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:53.456234', 'step': 21314, 'epoch': 2}
{'type': 'loss', 'content': 0.05733971297740936, 'timestamp': '2025-10-02 00:48:53.462170', 'step': 21315, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:53.525986', 'step': 21315, 'epoch': 2}
{'type': 'loss', 'content': 0.07856123894453049, 'timestamp': '2025-10-02 00:48:53.532885', 'step': 21316, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:53.599425', 'step': 21316, 'epoch': 2}
{'type': 'loss', 'content': 0.034036390483379364, 'timestamp': '2025-10-02 00:48:53.604947', 'step': 21317, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:53.677008', 'step': 21317, 'epoch': 2}
{'type': 'loss', 'content': 0.0062740701250731945, 'timestamp': '2025-10-02 00:48:53.684450', 'step': 21318, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:53.745051', 'step': 21318, 'epoch': 2}
{'type': 'loss', 'content': 0.019730787724256516, 'timestamp': '2025-10-02 00:48:53.748270', 'step': 21319, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:53.811678', 'step': 21319, 'epoch': 2}
{'type': 'loss', 'content': 0.03772993013262749, 'timestamp': '2025-10-02 00:48:53.822881', 'step': 21320, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:53.880253', 'step': 21320, 'epoch': 2}
{'type': 'loss', 'content': 0.0587460994720459, 'timestamp': '2025-10-02 00:48:53.888744', 'step': 21321, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 00:48:53.977710', 'step': 21321, 'epoch': 2}
{'type': 'loss', 'content': 0.03084752894937992, 'timestamp': '2025-10-02 00:48:53.992568', 'step': 21322, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:54.055054', 'step': 21322, 'epoch': 2}
{'type': 'loss', 'content': 0.11649972945451736, 'timestamp': '2025-10-02 00:48:54.057696', 'step': 21323, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:54.118677', 'step': 21323, 'epoch': 2}
{'type': 'loss', 'content': 0.0699753388762474, 'timestamp': '2025-10-02 00:48:54.125030', 'step': 21324, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:54.179753', 'step': 21324, 'epoch': 2}
{'type': 'loss', 'content': 0.09419462829828262, 'timestamp': '2025-10-02 00:48:54.182977', 'step': 21325, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:54.238518', 'step': 21325, 'epoch': 2}
{'type': 'loss', 'content': 0.07838165014982224, 'timestamp': '2025-10-02 00:48:54.242433', 'step': 21326, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:54.303137', 'step': 21326, 'epoch': 2}
{'type': 'loss', 'content': 0.08321405947208405, 'timestamp': '2025-10-02 00:48:54.307042', 'step': 21327, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:54.364430', 'step': 21327, 'epoch': 2}
{'type': 'loss', 'content': 0.07280265539884567, 'timestamp': '2025-10-02 00:48:54.375384', 'step': 21328, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:54.436240', 'step': 21328, 'epoch': 2}
{'type': 'loss', 'content': 0.042032115161418915, 'timestamp': '2025-10-02 00:48:54.439486', 'step': 21329, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:54.509174', 'step': 21329, 'epoch': 2}
{'type': 'loss', 'content': 0.010601403191685677, 'timestamp': '2025-10-02 00:48:54.515250', 'step': 21330, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:54.572666', 'step': 21330, 'epoch': 2}
{'type': 'loss', 'content': 0.09770657867193222, 'timestamp': '2025-10-02 00:48:54.575388', 'step': 21331, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:54.641097', 'step': 21331, 'epoch': 2}
{'type': 'loss', 'content': 0.04472691938281059, 'timestamp': '2025-10-02 00:48:54.648386', 'step': 21332, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:54.710489', 'step': 21332, 'epoch': 2}
{'type': 'loss', 'content': 0.009767943061888218, 'timestamp': '2025-10-02 00:48:54.717838', 'step': 21333, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:54.781306', 'step': 21333, 'epoch': 2}
{'type': 'loss', 'content': 0.01778273843228817, 'timestamp': '2025-10-02 00:48:54.784563', 'step': 21334, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:54.851384', 'step': 21334, 'epoch': 2}
{'type': 'loss', 'content': 0.05625404790043831, 'timestamp': '2025-10-02 00:48:54.860539', 'step': 21335, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:54.922845', 'step': 21335, 'epoch': 2}
{'type': 'loss', 'content': 0.04264044016599655, 'timestamp': '2025-10-02 00:48:54.929491', 'step': 21336, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:55.005153', 'step': 21336, 'epoch': 2}
{'type': 'loss', 'content': 0.021570591256022453, 'timestamp': '2025-10-02 00:48:55.013513', 'step': 21337, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:55.094232', 'step': 21337, 'epoch': 2}
{'type': 'loss', 'content': 0.08276087045669556, 'timestamp': '2025-10-02 00:48:55.097554', 'step': 21338, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:55.158342', 'step': 21338, 'epoch': 2}
{'type': 'loss', 'content': 0.034691061824560165, 'timestamp': '2025-10-02 00:48:55.166444', 'step': 21339, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:55.232040', 'step': 21339, 'epoch': 2}
{'type': 'loss', 'content': 0.03722763806581497, 'timestamp': '2025-10-02 00:48:55.238734', 'step': 21340, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:55.300603', 'step': 21340, 'epoch': 2}
{'type': 'loss', 'content': 0.07717525213956833, 'timestamp': '2025-10-02 00:48:55.306937', 'step': 21341, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:55.374281', 'step': 21341, 'epoch': 2}
{'type': 'loss', 'content': 0.008228987455368042, 'timestamp': '2025-10-02 00:48:55.380247', 'step': 21342, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:55.441663', 'step': 21342, 'epoch': 2}
{'type': 'loss', 'content': 0.036700159311294556, 'timestamp': '2025-10-02 00:48:55.447595', 'step': 21343, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:48:55.507529', 'step': 21343, 'epoch': 2}
{'type': 'loss', 'content': 0.1356191337108612, 'timestamp': '2025-10-02 00:48:55.513755', 'step': 21344, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:55.568861', 'step': 21344, 'epoch': 2}
{'type': 'loss', 'content': 0.09830951690673828, 'timestamp': '2025-10-02 00:48:55.572354', 'step': 21345, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:55.628756', 'step': 21345, 'epoch': 2}
{'type': 'loss', 'content': 0.10561684519052505, 'timestamp': '2025-10-02 00:48:55.633921', 'step': 21346, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:48:55.706024', 'step': 21346, 'epoch': 2}
{'type': 'loss', 'content': 0.0374477244913578, 'timestamp': '2025-10-02 00:48:55.718341', 'step': 21347, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:55.777814', 'step': 21347, 'epoch': 2}
{'type': 'loss', 'content': 0.053736839443445206, 'timestamp': '2025-10-02 00:48:55.784114', 'step': 21348, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:55.839049', 'step': 21348, 'epoch': 2}
{'type': 'loss', 'content': 0.03639400750398636, 'timestamp': '2025-10-02 00:48:55.841531', 'step': 21349, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:55.897550', 'step': 21349, 'epoch': 2}
{'type': 'loss', 'content': 0.05564318597316742, 'timestamp': '2025-10-02 00:48:55.903504', 'step': 21350, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:55.968370', 'step': 21350, 'epoch': 2}
{'type': 'loss', 'content': 0.03538377583026886, 'timestamp': '2025-10-02 00:48:55.971381', 'step': 21351, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:48:56.045142', 'step': 21351, 'epoch': 2}
{'type': 'loss', 'content': 0.0768137127161026, 'timestamp': '2025-10-02 00:48:56.056380', 'step': 21352, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:56.123301', 'step': 21352, 'epoch': 2}
{'type': 'loss', 'content': 0.17227718234062195, 'timestamp': '2025-10-02 00:48:56.127899', 'step': 21353, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:56.190909', 'step': 21353, 'epoch': 2}
{'type': 'loss', 'content': 0.0746132954955101, 'timestamp': '2025-10-02 00:48:56.194342', 'step': 21354, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:56.252999', 'step': 21354, 'epoch': 2}
{'type': 'loss', 'content': 0.10354261100292206, 'timestamp': '2025-10-02 00:48:56.259634', 'step': 21355, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:56.322749', 'step': 21355, 'epoch': 2}
{'type': 'loss', 'content': 0.061491575092077255, 'timestamp': '2025-10-02 00:48:56.330780', 'step': 21356, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:56.391173', 'step': 21356, 'epoch': 2}
{'type': 'loss', 'content': 0.0070363725535571575, 'timestamp': '2025-10-02 00:48:56.397193', 'step': 21357, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:56.461676', 'step': 21357, 'epoch': 2}
{'type': 'loss', 'content': 0.07605846971273422, 'timestamp': '2025-10-02 00:48:56.471841', 'step': 21358, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:56.529516', 'step': 21358, 'epoch': 2}
{'type': 'loss', 'content': 0.011339999735355377, 'timestamp': '2025-10-02 00:48:56.532958', 'step': 21359, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:56.587598', 'step': 21359, 'epoch': 2}
{'type': 'loss', 'content': 0.03315415978431702, 'timestamp': '2025-10-02 00:48:56.595503', 'step': 21360, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:56.651555', 'step': 21360, 'epoch': 2}
{'type': 'loss', 'content': 0.14701707661151886, 'timestamp': '2025-10-02 00:48:56.657465', 'step': 21361, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:56.716228', 'step': 21361, 'epoch': 2}
{'type': 'loss', 'content': 0.0716780498623848, 'timestamp': '2025-10-02 00:48:56.719026', 'step': 21362, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:56.784654', 'step': 21362, 'epoch': 2}
{'type': 'loss', 'content': 0.044453248381614685, 'timestamp': '2025-10-02 00:48:56.794160', 'step': 21363, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:56.852612', 'step': 21363, 'epoch': 2}
{'type': 'loss', 'content': 0.05547939985990524, 'timestamp': '2025-10-02 00:48:56.859942', 'step': 21364, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:56.918742', 'step': 21364, 'epoch': 2}
{'type': 'loss', 'content': 0.014543820172548294, 'timestamp': '2025-10-02 00:48:56.921297', 'step': 21365, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:56.976014', 'step': 21365, 'epoch': 2}
{'type': 'loss', 'content': 0.08800885826349258, 'timestamp': '2025-10-02 00:48:56.981929', 'step': 21366, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:57.038786', 'step': 21366, 'epoch': 2}
{'type': 'loss', 'content': 0.02476460300385952, 'timestamp': '2025-10-02 00:48:57.048585', 'step': 21367, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:57.119959', 'step': 21367, 'epoch': 2}
{'type': 'loss', 'content': 0.051593247801065445, 'timestamp': '2025-10-02 00:48:57.130067', 'step': 21368, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:48:57.191608', 'step': 21368, 'epoch': 2}
{'type': 'loss', 'content': 0.02452007494866848, 'timestamp': '2025-10-02 00:48:57.202551', 'step': 21369, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:48:57.268434', 'step': 21369, 'epoch': 2}
{'type': 'loss', 'content': 0.030583251267671585, 'timestamp': '2025-10-02 00:48:57.279277', 'step': 21370, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:48:57.342257', 'step': 21370, 'epoch': 2}
{'type': 'loss', 'content': 0.06745339184999466, 'timestamp': '2025-10-02 00:48:57.350678', 'step': 21371, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:48:57.413110', 'step': 21371, 'epoch': 2}
{'type': 'loss', 'content': 0.04071175307035446, 'timestamp': '2025-10-02 00:48:57.419800', 'step': 21372, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:57.475506', 'step': 21372, 'epoch': 2}
{'type': 'loss', 'content': 0.11605308204889297, 'timestamp': '2025-10-02 00:48:57.478197', 'step': 21373, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:57.540057', 'step': 21373, 'epoch': 2}
{'type': 'loss', 'content': 0.07359324395656586, 'timestamp': '2025-10-02 00:48:57.549396', 'step': 21374, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:57.605713', 'step': 21374, 'epoch': 2}
{'type': 'loss', 'content': 0.1966037154197693, 'timestamp': '2025-10-02 00:48:57.612872', 'step': 21375, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:57.672119', 'step': 21375, 'epoch': 2}
{'type': 'loss', 'content': 0.11184988170862198, 'timestamp': '2025-10-02 00:48:57.679539', 'step': 21376, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:57.738307', 'step': 21376, 'epoch': 2}
{'type': 'loss', 'content': 0.019601896405220032, 'timestamp': '2025-10-02 00:48:57.744215', 'step': 21377, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:57.803337', 'step': 21377, 'epoch': 2}
{'type': 'loss', 'content': 0.047277405858039856, 'timestamp': '2025-10-02 00:48:57.812721', 'step': 21378, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:57.870741', 'step': 21378, 'epoch': 2}
{'type': 'loss', 'content': 0.11467292904853821, 'timestamp': '2025-10-02 00:48:57.874610', 'step': 21379, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:57.932895', 'step': 21379, 'epoch': 2}
{'type': 'loss', 'content': 0.03236709535121918, 'timestamp': '2025-10-02 00:48:57.939508', 'step': 21380, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:48:57.999157', 'step': 21380, 'epoch': 2}
{'type': 'loss', 'content': 0.043483491986989975, 'timestamp': '2025-10-02 00:48:58.002450', 'step': 21381, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:58.060309', 'step': 21381, 'epoch': 2}
{'type': 'loss', 'content': 0.0799136683344841, 'timestamp': '2025-10-02 00:48:58.063517', 'step': 21382, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:58.119032', 'step': 21382, 'epoch': 2}
{'type': 'loss', 'content': 0.03767174482345581, 'timestamp': '2025-10-02 00:48:58.126602', 'step': 21383, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:58.181661', 'step': 21383, 'epoch': 2}
{'type': 'loss', 'content': 0.07234495133161545, 'timestamp': '2025-10-02 00:48:58.188358', 'step': 21384, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:58.242666', 'step': 21384, 'epoch': 2}
{'type': 'loss', 'content': 0.1564599722623825, 'timestamp': '2025-10-02 00:48:58.244820', 'step': 21385, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:58.299169', 'step': 21385, 'epoch': 2}
{'type': 'loss', 'content': 0.07455012202262878, 'timestamp': '2025-10-02 00:48:58.301559', 'step': 21386, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:58.356610', 'step': 21386, 'epoch': 2}
{'type': 'loss', 'content': 0.007571241352707148, 'timestamp': '2025-10-02 00:48:58.365922', 'step': 21387, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:48:58.431410', 'step': 21387, 'epoch': 2}
{'type': 'loss', 'content': 0.07814835011959076, 'timestamp': '2025-10-02 00:48:58.438034', 'step': 21388, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:58.502514', 'step': 21388, 'epoch': 2}
{'type': 'loss', 'content': 0.03754914924502373, 'timestamp': '2025-10-02 00:48:58.512761', 'step': 21389, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:48:58.580259', 'step': 21389, 'epoch': 2}
{'type': 'loss', 'content': 0.040773142129182816, 'timestamp': '2025-10-02 00:48:58.586296', 'step': 21390, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:58.652922', 'step': 21390, 'epoch': 2}
{'type': 'loss', 'content': 0.021759547293186188, 'timestamp': '2025-10-02 00:48:58.658827', 'step': 21391, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:48:58.736053', 'step': 21391, 'epoch': 2}
{'type': 'loss', 'content': 0.007026772480458021, 'timestamp': '2025-10-02 00:48:58.749174', 'step': 21392, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:58.804718', 'step': 21392, 'epoch': 2}
{'type': 'loss', 'content': 0.045908983796834946, 'timestamp': '2025-10-02 00:48:58.812310', 'step': 21393, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:48:58.877931', 'step': 21393, 'epoch': 2}
{'type': 'loss', 'content': 0.022346127778291702, 'timestamp': '2025-10-02 00:48:58.888570', 'step': 21394, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:58.945426', 'step': 21394, 'epoch': 2}
{'type': 'loss', 'content': 0.08889462053775787, 'timestamp': '2025-10-02 00:48:58.952779', 'step': 21395, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:59.009912', 'step': 21395, 'epoch': 2}
{'type': 'loss', 'content': 0.05116778612136841, 'timestamp': '2025-10-02 00:48:59.015920', 'step': 21396, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:59.073710', 'step': 21396, 'epoch': 2}
{'type': 'loss', 'content': 0.03798189386725426, 'timestamp': '2025-10-02 00:48:59.083996', 'step': 21397, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:59.140807', 'step': 21397, 'epoch': 2}
{'type': 'loss', 'content': 0.0912594348192215, 'timestamp': '2025-10-02 00:48:59.143478', 'step': 21398, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:48:59.201229', 'step': 21398, 'epoch': 2}
{'type': 'loss', 'content': 0.015025125816464424, 'timestamp': '2025-10-02 00:48:59.210532', 'step': 21399, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:48:59.266414', 'step': 21399, 'epoch': 2}
{'type': 'loss', 'content': 0.042733095586299896, 'timestamp': '2025-10-02 00:48:59.272509', 'step': 21400, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:59.327415', 'step': 21400, 'epoch': 2}
{'type': 'loss', 'content': 0.05811513587832451, 'timestamp': '2025-10-02 00:48:59.330558', 'step': 21401, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:59.385310', 'step': 21401, 'epoch': 2}
{'type': 'loss', 'content': 0.06204494088888168, 'timestamp': '2025-10-02 00:48:59.388381', 'step': 21402, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:48:59.449870', 'step': 21402, 'epoch': 2}
{'type': 'loss', 'content': 0.08214381337165833, 'timestamp': '2025-10-02 00:48:59.454443', 'step': 21403, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:48:59.529704', 'step': 21403, 'epoch': 2}
{'type': 'loss', 'content': 0.16847676038742065, 'timestamp': '2025-10-02 00:48:59.543420', 'step': 21404, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:48:59.619054', 'step': 21404, 'epoch': 2}
{'type': 'loss', 'content': 0.029885612428188324, 'timestamp': '2025-10-02 00:48:59.625919', 'step': 21405, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:48:59.698475', 'step': 21405, 'epoch': 2}
{'type': 'loss', 'content': 0.030228784307837486, 'timestamp': '2025-10-02 00:48:59.707998', 'step': 21406, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:48:59.770381', 'step': 21406, 'epoch': 2}
{'type': 'loss', 'content': 0.06459393352270126, 'timestamp': '2025-10-02 00:48:59.774214', 'step': 21407, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:48:59.831931', 'step': 21407, 'epoch': 2}
{'type': 'loss', 'content': 0.029642678797245026, 'timestamp': '2025-10-02 00:48:59.839463', 'step': 21408, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:48:59.897159', 'step': 21408, 'epoch': 2}
{'type': 'loss', 'content': 0.04438796639442444, 'timestamp': '2025-10-02 00:48:59.904769', 'step': 21409, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:48:59.965725', 'step': 21409, 'epoch': 2}
{'type': 'loss', 'content': 0.03628121316432953, 'timestamp': '2025-10-02 00:48:59.968848', 'step': 21410, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:00.028099', 'step': 21410, 'epoch': 2}
{'type': 'loss', 'content': 0.03789825364947319, 'timestamp': '2025-10-02 00:49:00.035665', 'step': 21411, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:00.095265', 'step': 21411, 'epoch': 2}
{'type': 'loss', 'content': 0.02263442985713482, 'timestamp': '2025-10-02 00:49:00.104154', 'step': 21412, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:00.161335', 'step': 21412, 'epoch': 2}
{'type': 'loss', 'content': 0.049875326454639435, 'timestamp': '2025-10-02 00:49:00.167322', 'step': 21413, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:00.226590', 'step': 21413, 'epoch': 2}
{'type': 'loss', 'content': 0.06366968154907227, 'timestamp': '2025-10-02 00:49:00.230052', 'step': 21414, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:00.287707', 'step': 21414, 'epoch': 2}
{'type': 'loss', 'content': 0.07770419865846634, 'timestamp': '2025-10-02 00:49:00.291853', 'step': 21415, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:00.353496', 'step': 21415, 'epoch': 2}
{'type': 'loss', 'content': 0.07536745071411133, 'timestamp': '2025-10-02 00:49:00.359851', 'step': 21416, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:00.416411', 'step': 21416, 'epoch': 2}
{'type': 'loss', 'content': 0.05410802364349365, 'timestamp': '2025-10-02 00:49:00.424051', 'step': 21417, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:00.481206', 'step': 21417, 'epoch': 2}
{'type': 'loss', 'content': 0.09942080825567245, 'timestamp': '2025-10-02 00:49:00.485160', 'step': 21418, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:00.551910', 'step': 21418, 'epoch': 2}
{'type': 'loss', 'content': 0.022602323442697525, 'timestamp': '2025-10-02 00:49:00.562314', 'step': 21419, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:00.627257', 'step': 21419, 'epoch': 2}
{'type': 'loss', 'content': 0.011443745344877243, 'timestamp': '2025-10-02 00:49:00.638489', 'step': 21420, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:00.699210', 'step': 21420, 'epoch': 2}
{'type': 'loss', 'content': 0.1217556744813919, 'timestamp': '2025-10-02 00:49:00.701828', 'step': 21421, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:00.767225', 'step': 21421, 'epoch': 2}
{'type': 'loss', 'content': 0.03525136783719063, 'timestamp': '2025-10-02 00:49:00.773174', 'step': 21422, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:00.837919', 'step': 21422, 'epoch': 2}
{'type': 'loss', 'content': 0.023118238896131516, 'timestamp': '2025-10-02 00:49:00.845424', 'step': 21423, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:00.921625', 'step': 21423, 'epoch': 2}
{'type': 'loss', 'content': 0.010532365180552006, 'timestamp': '2025-10-02 00:49:00.931563', 'step': 21424, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:00.998614', 'step': 21424, 'epoch': 2}
{'type': 'loss', 'content': 0.082892045378685, 'timestamp': '2025-10-02 00:49:01.005044', 'step': 21425, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:01.073898', 'step': 21425, 'epoch': 2}
{'type': 'loss', 'content': 0.046693313866853714, 'timestamp': '2025-10-02 00:49:01.081540', 'step': 21426, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:01.140812', 'step': 21426, 'epoch': 2}
{'type': 'loss', 'content': 0.09087791293859482, 'timestamp': '2025-10-02 00:49:01.148472', 'step': 21427, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:01.213277', 'step': 21427, 'epoch': 2}
{'type': 'loss', 'content': 0.0019164595287293196, 'timestamp': '2025-10-02 00:49:01.223431', 'step': 21428, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:01.284150', 'step': 21428, 'epoch': 2}
{'type': 'loss', 'content': 0.04776798561215401, 'timestamp': '2025-10-02 00:49:01.291874', 'step': 21429, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:01.351694', 'step': 21429, 'epoch': 2}
{'type': 'loss', 'content': 0.04033441096544266, 'timestamp': '2025-10-02 00:49:01.354453', 'step': 21430, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:01.415220', 'step': 21430, 'epoch': 2}
{'type': 'loss', 'content': 0.03581511974334717, 'timestamp': '2025-10-02 00:49:01.419256', 'step': 21431, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:01.480750', 'step': 21431, 'epoch': 2}
{'type': 'loss', 'content': 0.014061416499316692, 'timestamp': '2025-10-02 00:49:01.489197', 'step': 21432, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:01.550254', 'step': 21432, 'epoch': 2}
{'type': 'loss', 'content': 0.05988007411360741, 'timestamp': '2025-10-02 00:49:01.560445', 'step': 21433, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:01.621402', 'step': 21433, 'epoch': 2}
{'type': 'loss', 'content': 0.061011120676994324, 'timestamp': '2025-10-02 00:49:01.624719', 'step': 21434, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:01.682671', 'step': 21434, 'epoch': 2}
{'type': 'loss', 'content': 0.05134764313697815, 'timestamp': '2025-10-02 00:49:01.689227', 'step': 21435, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:01.752037', 'step': 21435, 'epoch': 2}
{'type': 'loss', 'content': 0.025960244238376617, 'timestamp': '2025-10-02 00:49:01.758297', 'step': 21436, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:01.816521', 'step': 21436, 'epoch': 2}
{'type': 'loss', 'content': 0.05320345610380173, 'timestamp': '2025-10-02 00:49:01.819025', 'step': 21437, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:01.878062', 'step': 21437, 'epoch': 2}
{'type': 'loss', 'content': 0.08933103829622269, 'timestamp': '2025-10-02 00:49:01.882708', 'step': 21438, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:01.947522', 'step': 21438, 'epoch': 2}
{'type': 'loss', 'content': 0.030584512278437614, 'timestamp': '2025-10-02 00:49:01.957072', 'step': 21439, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:02.013640', 'step': 21439, 'epoch': 2}
{'type': 'loss', 'content': 0.06419415026903152, 'timestamp': '2025-10-02 00:49:02.019867', 'step': 21440, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:02.078894', 'step': 21440, 'epoch': 2}
{'type': 'loss', 'content': 0.06508415937423706, 'timestamp': '2025-10-02 00:49:02.089163', 'step': 21441, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:02.145853', 'step': 21441, 'epoch': 2}
{'type': 'loss', 'content': 0.02370690554380417, 'timestamp': '2025-10-02 00:49:02.149674', 'step': 21442, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:02.215005', 'step': 21442, 'epoch': 2}
{'type': 'loss', 'content': 0.06947243213653564, 'timestamp': '2025-10-02 00:49:02.225479', 'step': 21443, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:02.282956', 'step': 21443, 'epoch': 2}
{'type': 'loss', 'content': 0.06024133786559105, 'timestamp': '2025-10-02 00:49:02.290065', 'step': 21444, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:02.350135', 'step': 21444, 'epoch': 2}
{'type': 'loss', 'content': 0.05672740563750267, 'timestamp': '2025-10-02 00:49:02.359845', 'step': 21445, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:02.417309', 'step': 21445, 'epoch': 2}
{'type': 'loss', 'content': 0.027367308735847473, 'timestamp': '2025-10-02 00:49:02.423427', 'step': 21446, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:02.489335', 'step': 21446, 'epoch': 2}
{'type': 'loss', 'content': 0.04926779493689537, 'timestamp': '2025-10-02 00:49:02.495922', 'step': 21447, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:02.564627', 'step': 21447, 'epoch': 2}
{'type': 'loss', 'content': 0.08984751254320145, 'timestamp': '2025-10-02 00:49:02.573521', 'step': 21448, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:02.638018', 'step': 21448, 'epoch': 2}
{'type': 'loss', 'content': 0.08188929408788681, 'timestamp': '2025-10-02 00:49:02.641980', 'step': 21449, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:02.701025', 'step': 21449, 'epoch': 2}
{'type': 'loss', 'content': 0.0032951089087873697, 'timestamp': '2025-10-02 00:49:02.708726', 'step': 21450, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:49:02.779574', 'step': 21450, 'epoch': 2}
{'type': 'loss', 'content': 0.028592750430107117, 'timestamp': '2025-10-02 00:49:02.791930', 'step': 21451, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:02.862359', 'step': 21451, 'epoch': 2}
{'type': 'loss', 'content': 0.052470847964286804, 'timestamp': '2025-10-02 00:49:02.870937', 'step': 21452, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:02.929753', 'step': 21452, 'epoch': 2}
{'type': 'loss', 'content': 0.06194488704204559, 'timestamp': '2025-10-02 00:49:02.932775', 'step': 21453, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:02.992740', 'step': 21453, 'epoch': 2}
{'type': 'loss', 'content': 0.008769532665610313, 'timestamp': '2025-10-02 00:49:03.001001', 'step': 21454, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:03.065951', 'step': 21454, 'epoch': 2}
{'type': 'loss', 'content': 0.04575943946838379, 'timestamp': '2025-10-02 00:49:03.070507', 'step': 21455, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:03.130589', 'step': 21455, 'epoch': 2}
{'type': 'loss', 'content': 0.07013072818517685, 'timestamp': '2025-10-02 00:49:03.138908', 'step': 21456, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:03.199178', 'step': 21456, 'epoch': 2}
{'type': 'loss', 'content': 0.03283339738845825, 'timestamp': '2025-10-02 00:49:03.210166', 'step': 21457, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:03.265692', 'step': 21457, 'epoch': 2}
{'type': 'loss', 'content': 0.06657425314188004, 'timestamp': '2025-10-02 00:49:03.268150', 'step': 21458, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:03.331247', 'step': 21458, 'epoch': 2}
{'type': 'loss', 'content': 0.01718846894800663, 'timestamp': '2025-10-02 00:49:03.337668', 'step': 21459, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:03.396394', 'step': 21459, 'epoch': 2}
{'type': 'loss', 'content': 0.07847050577402115, 'timestamp': '2025-10-02 00:49:03.409873', 'step': 21460, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:03.470255', 'step': 21460, 'epoch': 2}
{'type': 'loss', 'content': 0.06316962838172913, 'timestamp': '2025-10-02 00:49:03.472795', 'step': 21461, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:03.528570', 'step': 21461, 'epoch': 2}
{'type': 'loss', 'content': 0.012343904003500938, 'timestamp': '2025-10-02 00:49:03.534121', 'step': 21462, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:03.597841', 'step': 21462, 'epoch': 2}
{'type': 'loss', 'content': 0.05201652646064758, 'timestamp': '2025-10-02 00:49:03.601471', 'step': 21463, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:03.659951', 'step': 21463, 'epoch': 2}
{'type': 'loss', 'content': 0.027442313730716705, 'timestamp': '2025-10-02 00:49:03.666690', 'step': 21464, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:03.722049', 'step': 21464, 'epoch': 2}
{'type': 'loss', 'content': 0.1016334816813469, 'timestamp': '2025-10-02 00:49:03.731507', 'step': 21465, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:03.793683', 'step': 21465, 'epoch': 2}
{'type': 'loss', 'content': 0.07620058208703995, 'timestamp': '2025-10-02 00:49:03.803868', 'step': 21466, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:03.862076', 'step': 21466, 'epoch': 2}
{'type': 'loss', 'content': 0.03791281580924988, 'timestamp': '2025-10-02 00:49:03.871397', 'step': 21467, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:03.932196', 'step': 21467, 'epoch': 2}
{'type': 'loss', 'content': 0.10053356736898422, 'timestamp': '2025-10-02 00:49:03.940935', 'step': 21468, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:03.998644', 'step': 21468, 'epoch': 2}
{'type': 'loss', 'content': 0.08451462537050247, 'timestamp': '2025-10-02 00:49:04.004665', 'step': 21469, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:04.062869', 'step': 21469, 'epoch': 2}
{'type': 'loss', 'content': 0.07952665537595749, 'timestamp': '2025-10-02 00:49:04.065580', 'step': 21470, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:04.120676', 'step': 21470, 'epoch': 2}
{'type': 'loss', 'content': 0.027585210278630257, 'timestamp': '2025-10-02 00:49:04.123105', 'step': 21471, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:04.177683', 'step': 21471, 'epoch': 2}
{'type': 'loss', 'content': 0.0644613653421402, 'timestamp': '2025-10-02 00:49:04.183866', 'step': 21472, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:49:04.256250', 'step': 21472, 'epoch': 2}
{'type': 'loss', 'content': 0.005926687270402908, 'timestamp': '2025-10-02 00:49:04.270623', 'step': 21473, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:04.327069', 'step': 21473, 'epoch': 2}
{'type': 'loss', 'content': 0.11469382047653198, 'timestamp': '2025-10-02 00:49:04.330335', 'step': 21474, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:04.390384', 'step': 21474, 'epoch': 2}
{'type': 'loss', 'content': 0.06184691563248634, 'timestamp': '2025-10-02 00:49:04.397898', 'step': 21475, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:04.455869', 'step': 21475, 'epoch': 2}
{'type': 'loss', 'content': 0.045415185391902924, 'timestamp': '2025-10-02 00:49:04.465330', 'step': 21476, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:04.527484', 'step': 21476, 'epoch': 2}
{'type': 'loss', 'content': 0.048226453363895416, 'timestamp': '2025-10-02 00:49:04.537783', 'step': 21477, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:04.601598', 'step': 21477, 'epoch': 2}
{'type': 'loss', 'content': 0.0128850182518363, 'timestamp': '2025-10-02 00:49:04.611733', 'step': 21478, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:04.667246', 'step': 21478, 'epoch': 2}
{'type': 'loss', 'content': 0.05373813584446907, 'timestamp': '2025-10-02 00:49:04.670301', 'step': 21479, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:04.730137', 'step': 21479, 'epoch': 2}
{'type': 'loss', 'content': 0.047585371881723404, 'timestamp': '2025-10-02 00:49:04.736743', 'step': 21480, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:04.792628', 'step': 21480, 'epoch': 2}
{'type': 'loss', 'content': 0.16188134253025055, 'timestamp': '2025-10-02 00:49:04.795996', 'step': 21481, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:04.854631', 'step': 21481, 'epoch': 2}
{'type': 'loss', 'content': 0.08470549434423447, 'timestamp': '2025-10-02 00:49:04.858141', 'step': 21482, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:04.913300', 'step': 21482, 'epoch': 2}
{'type': 'loss', 'content': 0.09108585119247437, 'timestamp': '2025-10-02 00:49:04.918419', 'step': 21483, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:04.977854', 'step': 21483, 'epoch': 2}
{'type': 'loss', 'content': 0.02046399749815464, 'timestamp': '2025-10-02 00:49:04.989729', 'step': 21484, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:05.066965', 'step': 21484, 'epoch': 2}
{'type': 'loss', 'content': 0.024038473144173622, 'timestamp': '2025-10-02 00:49:05.077957', 'step': 21485, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:05.133789', 'step': 21485, 'epoch': 2}
{'type': 'loss', 'content': 0.004069184418767691, 'timestamp': '2025-10-02 00:49:05.136357', 'step': 21486, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:05.191300', 'step': 21486, 'epoch': 2}
{'type': 'loss', 'content': 0.006525860168039799, 'timestamp': '2025-10-02 00:49:05.198585', 'step': 21487, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:05.254123', 'step': 21487, 'epoch': 2}
{'type': 'loss', 'content': 0.05074186995625496, 'timestamp': '2025-10-02 00:49:05.259857', 'step': 21488, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:05.314010', 'step': 21488, 'epoch': 2}
{'type': 'loss', 'content': 0.05991840362548828, 'timestamp': '2025-10-02 00:49:05.318915', 'step': 21489, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:05.382469', 'step': 21489, 'epoch': 2}
{'type': 'loss', 'content': 0.046112313866615295, 'timestamp': '2025-10-02 00:49:05.388552', 'step': 21490, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:05.461859', 'step': 21490, 'epoch': 2}
{'type': 'loss', 'content': 0.022829318419098854, 'timestamp': '2025-10-02 00:49:05.472230', 'step': 21491, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:05.532487', 'step': 21491, 'epoch': 2}
{'type': 'loss', 'content': 0.13095328211784363, 'timestamp': '2025-10-02 00:49:05.541720', 'step': 21492, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:05.604105', 'step': 21492, 'epoch': 2}
{'type': 'loss', 'content': 0.0814690887928009, 'timestamp': '2025-10-02 00:49:05.606746', 'step': 21493, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:05.664293', 'step': 21493, 'epoch': 2}
{'type': 'loss', 'content': 0.006509941536933184, 'timestamp': '2025-10-02 00:49:05.671856', 'step': 21494, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:05.727833', 'step': 21494, 'epoch': 2}
{'type': 'loss', 'content': 0.0765940248966217, 'timestamp': '2025-10-02 00:49:05.730584', 'step': 21495, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:05.793376', 'step': 21495, 'epoch': 2}
{'type': 'loss', 'content': 0.07170262187719345, 'timestamp': '2025-10-02 00:49:05.801572', 'step': 21496, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:05.858805', 'step': 21496, 'epoch': 2}
{'type': 'loss', 'content': 0.09136228263378143, 'timestamp': '2025-10-02 00:49:05.863241', 'step': 21497, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:05.923165', 'step': 21497, 'epoch': 2}
{'type': 'loss', 'content': 0.034491196274757385, 'timestamp': '2025-10-02 00:49:05.930417', 'step': 21498, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:05.995964', 'step': 21498, 'epoch': 2}
{'type': 'loss', 'content': 0.0520748570561409, 'timestamp': '2025-10-02 00:49:05.999488', 'step': 21499, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:06.056395', 'step': 21499, 'epoch': 2}
{'type': 'loss', 'content': 0.06663426756858826, 'timestamp': '2025-10-02 00:49:06.062632', 'step': 21500, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 21500', 'timestamp': '2025-10-02 00:49:06.526827', 'step': 21500, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:06.584570', 'step': 21500, 'epoch': 2}
{'type': 'loss', 'content': 0.04611324518918991, 'timestamp': '2025-10-02 00:49:06.587011', 'step': 21501, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:06.642500', 'step': 21501, 'epoch': 2}
{'type': 'loss', 'content': 0.029757535085082054, 'timestamp': '2025-10-02 00:49:06.645114', 'step': 21502, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:06.702240', 'step': 21502, 'epoch': 2}
{'type': 'loss', 'content': 0.02179979719221592, 'timestamp': '2025-10-02 00:49:06.704821', 'step': 21503, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:06.760598', 'step': 21503, 'epoch': 2}
{'type': 'loss', 'content': 0.011526759713888168, 'timestamp': '2025-10-02 00:49:06.768306', 'step': 21504, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:06.823490', 'step': 21504, 'epoch': 2}
{'type': 'loss', 'content': 0.01058060023933649, 'timestamp': '2025-10-02 00:49:06.829035', 'step': 21505, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:06.885507', 'step': 21505, 'epoch': 2}
{'type': 'loss', 'content': 0.0751139372587204, 'timestamp': '2025-10-02 00:49:06.888374', 'step': 21506, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:06.943777', 'step': 21506, 'epoch': 2}
{'type': 'loss', 'content': 0.04003819823265076, 'timestamp': '2025-10-02 00:49:06.947731', 'step': 21507, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:07.003346', 'step': 21507, 'epoch': 2}
{'type': 'loss', 'content': 0.059159815311431885, 'timestamp': '2025-10-02 00:49:07.011665', 'step': 21508, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:07.066643', 'step': 21508, 'epoch': 2}
{'type': 'loss', 'content': 0.12809932231903076, 'timestamp': '2025-10-02 00:49:07.069226', 'step': 21509, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:07.128564', 'step': 21509, 'epoch': 2}
{'type': 'loss', 'content': 0.0067858085967600346, 'timestamp': '2025-10-02 00:49:07.136154', 'step': 21510, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:07.191521', 'step': 21510, 'epoch': 2}
{'type': 'loss', 'content': 0.10423500090837479, 'timestamp': '2025-10-02 00:49:07.195743', 'step': 21511, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:07.250334', 'step': 21511, 'epoch': 2}
{'type': 'loss', 'content': 0.08042639493942261, 'timestamp': '2025-10-02 00:49:07.257054', 'step': 21512, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:07.310609', 'step': 21512, 'epoch': 2}
{'type': 'loss', 'content': 0.055957719683647156, 'timestamp': '2025-10-02 00:49:07.316598', 'step': 21513, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:07.371332', 'step': 21513, 'epoch': 2}
{'type': 'loss', 'content': 0.04439838230609894, 'timestamp': '2025-10-02 00:49:07.378744', 'step': 21514, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:49:07.441432', 'step': 21514, 'epoch': 2}
{'type': 'loss', 'content': 0.024107594043016434, 'timestamp': '2025-10-02 00:49:07.452291', 'step': 21515, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:07.511843', 'step': 21515, 'epoch': 2}
{'type': 'loss', 'content': 0.023381749168038368, 'timestamp': '2025-10-02 00:49:07.522803', 'step': 21516, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:07.580096', 'step': 21516, 'epoch': 2}
{'type': 'loss', 'content': 0.09703311324119568, 'timestamp': '2025-10-02 00:49:07.582894', 'step': 21517, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:07.638228', 'step': 21517, 'epoch': 2}
{'type': 'loss', 'content': 0.03558487072587013, 'timestamp': '2025-10-02 00:49:07.641656', 'step': 21518, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:07.698215', 'step': 21518, 'epoch': 2}
{'type': 'loss', 'content': 0.06483394652605057, 'timestamp': '2025-10-02 00:49:07.707718', 'step': 21519, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:07.763402', 'step': 21519, 'epoch': 2}
{'type': 'loss', 'content': 0.12962597608566284, 'timestamp': '2025-10-02 00:49:07.770158', 'step': 21520, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:07.829853', 'step': 21520, 'epoch': 2}
{'type': 'loss', 'content': 0.06801726669073105, 'timestamp': '2025-10-02 00:49:07.840823', 'step': 21521, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:07.896896', 'step': 21521, 'epoch': 2}
{'type': 'loss', 'content': 0.06060120090842247, 'timestamp': '2025-10-02 00:49:07.904351', 'step': 21522, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:49:07.967639', 'step': 21522, 'epoch': 2}
{'type': 'loss', 'content': 0.06117185205221176, 'timestamp': '2025-10-02 00:49:07.978256', 'step': 21523, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:08.033735', 'step': 21523, 'epoch': 2}
{'type': 'loss', 'content': 0.018650854006409645, 'timestamp': '2025-10-02 00:49:08.041931', 'step': 21524, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:08.102895', 'step': 21524, 'epoch': 2}
{'type': 'loss', 'content': 0.05292865261435509, 'timestamp': '2025-10-02 00:49:08.106462', 'step': 21525, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:08.163112', 'step': 21525, 'epoch': 2}
{'type': 'loss', 'content': 0.10868509858846664, 'timestamp': '2025-10-02 00:49:08.169062', 'step': 21526, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:08.232446', 'step': 21526, 'epoch': 2}
{'type': 'loss', 'content': 0.02097136527299881, 'timestamp': '2025-10-02 00:49:08.235041', 'step': 21527, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:08.291370', 'step': 21527, 'epoch': 2}
{'type': 'loss', 'content': 0.010873693972826004, 'timestamp': '2025-10-02 00:49:08.299746', 'step': 21528, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:08.355492', 'step': 21528, 'epoch': 2}
{'type': 'loss', 'content': 0.04379177838563919, 'timestamp': '2025-10-02 00:49:08.358047', 'step': 21529, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:08.412766', 'step': 21529, 'epoch': 2}
{'type': 'loss', 'content': 0.0930815264582634, 'timestamp': '2025-10-02 00:49:08.416127', 'step': 21530, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:08.473352', 'step': 21530, 'epoch': 2}
{'type': 'loss', 'content': 0.02667045220732689, 'timestamp': '2025-10-02 00:49:08.482711', 'step': 21531, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:08.536686', 'step': 21531, 'epoch': 2}
{'type': 'loss', 'content': 0.1215517520904541, 'timestamp': '2025-10-02 00:49:08.542712', 'step': 21532, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:08.596374', 'step': 21532, 'epoch': 2}
{'type': 'loss', 'content': 0.1041412428021431, 'timestamp': '2025-10-02 00:49:08.598752', 'step': 21533, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:08.653841', 'step': 21533, 'epoch': 2}
{'type': 'loss', 'content': 0.04024814814329147, 'timestamp': '2025-10-02 00:49:08.656397', 'step': 21534, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:08.712357', 'step': 21534, 'epoch': 2}
{'type': 'loss', 'content': 0.0032922434620559216, 'timestamp': '2025-10-02 00:49:08.721888', 'step': 21535, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:08.776500', 'step': 21535, 'epoch': 2}
{'type': 'loss', 'content': 0.02291223406791687, 'timestamp': '2025-10-02 00:49:08.782399', 'step': 21536, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:08.837158', 'step': 21536, 'epoch': 2}
{'type': 'loss', 'content': 0.07380957156419754, 'timestamp': '2025-10-02 00:49:08.839684', 'step': 21537, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:08.894808', 'step': 21537, 'epoch': 2}
{'type': 'loss', 'content': 0.07887006551027298, 'timestamp': '2025-10-02 00:49:08.897217', 'step': 21538, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:08.951904', 'step': 21538, 'epoch': 2}
{'type': 'loss', 'content': 0.025629382580518723, 'timestamp': '2025-10-02 00:49:08.957857', 'step': 21539, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:09.013018', 'step': 21539, 'epoch': 2}
{'type': 'loss', 'content': 0.06067310646176338, 'timestamp': '2025-10-02 00:49:09.019177', 'step': 21540, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:09.074531', 'step': 21540, 'epoch': 2}
{'type': 'loss', 'content': 0.013072039932012558, 'timestamp': '2025-10-02 00:49:09.076921', 'step': 21541, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:09.131204', 'step': 21541, 'epoch': 2}
{'type': 'loss', 'content': 0.13863371312618256, 'timestamp': '2025-10-02 00:49:09.134888', 'step': 21542, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:09.195260', 'step': 21542, 'epoch': 2}
{'type': 'loss', 'content': 0.020991530269384384, 'timestamp': '2025-10-02 00:49:09.197803', 'step': 21543, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:09.254808', 'step': 21543, 'epoch': 2}
{'type': 'loss', 'content': 0.018510790541768074, 'timestamp': '2025-10-02 00:49:09.260856', 'step': 21544, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:09.315064', 'step': 21544, 'epoch': 2}
{'type': 'loss', 'content': 0.052378736436367035, 'timestamp': '2025-10-02 00:49:09.317876', 'step': 21545, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:09.373337', 'step': 21545, 'epoch': 2}
{'type': 'loss', 'content': 0.08012790977954865, 'timestamp': '2025-10-02 00:49:09.382685', 'step': 21546, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:09.438331', 'step': 21546, 'epoch': 2}
{'type': 'loss', 'content': 0.06987409293651581, 'timestamp': '2025-10-02 00:49:09.445838', 'step': 21547, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:09.501916', 'step': 21547, 'epoch': 2}
{'type': 'loss', 'content': 0.09939653426408768, 'timestamp': '2025-10-02 00:49:09.508244', 'step': 21548, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:09.561656', 'step': 21548, 'epoch': 2}
{'type': 'loss', 'content': 0.15175378322601318, 'timestamp': '2025-10-02 00:49:09.564680', 'step': 21549, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:09.622529', 'step': 21549, 'epoch': 2}
{'type': 'loss', 'content': 0.09258811175823212, 'timestamp': '2025-10-02 00:49:09.625734', 'step': 21550, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:49:09.690886', 'step': 21550, 'epoch': 2}
{'type': 'loss', 'content': 0.05217105895280838, 'timestamp': '2025-10-02 00:49:09.701574', 'step': 21551, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:09.758519', 'step': 21551, 'epoch': 2}
{'type': 'loss', 'content': 0.02576487697660923, 'timestamp': '2025-10-02 00:49:09.768644', 'step': 21552, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:09.824634', 'step': 21552, 'epoch': 2}
{'type': 'loss', 'content': 0.03954285755753517, 'timestamp': '2025-10-02 00:49:09.827720', 'step': 21553, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:09.886026', 'step': 21553, 'epoch': 2}
{'type': 'loss', 'content': 0.050896305590867996, 'timestamp': '2025-10-02 00:49:09.889433', 'step': 21554, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:09.946632', 'step': 21554, 'epoch': 2}
{'type': 'loss', 'content': 0.028463972732424736, 'timestamp': '2025-10-02 00:49:09.950631', 'step': 21555, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:10.007705', 'step': 21555, 'epoch': 2}
{'type': 'loss', 'content': 0.08710308372974396, 'timestamp': '2025-10-02 00:49:10.013658', 'step': 21556, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:10.069808', 'step': 21556, 'epoch': 2}
{'type': 'loss', 'content': 0.06357447057962418, 'timestamp': '2025-10-02 00:49:10.079325', 'step': 21557, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:10.137793', 'step': 21557, 'epoch': 2}
{'type': 'loss', 'content': 0.12427401542663574, 'timestamp': '2025-10-02 00:49:10.140918', 'step': 21558, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:10.196569', 'step': 21558, 'epoch': 2}
{'type': 'loss', 'content': 0.09889226406812668, 'timestamp': '2025-10-02 00:49:10.200406', 'step': 21559, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:10.260352', 'step': 21559, 'epoch': 2}
{'type': 'loss', 'content': 0.01611456647515297, 'timestamp': '2025-10-02 00:49:10.266184', 'step': 21560, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:10.322557', 'step': 21560, 'epoch': 2}
{'type': 'loss', 'content': 0.08645376563072205, 'timestamp': '2025-10-02 00:49:10.325885', 'step': 21561, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:10.382520', 'step': 21561, 'epoch': 2}
{'type': 'loss', 'content': 0.03745388239622116, 'timestamp': '2025-10-02 00:49:10.385395', 'step': 21562, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:10.443816', 'step': 21562, 'epoch': 2}
{'type': 'loss', 'content': 0.013186694122850895, 'timestamp': '2025-10-02 00:49:10.453344', 'step': 21563, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:10.515607', 'step': 21563, 'epoch': 2}
{'type': 'loss', 'content': 0.030021147802472115, 'timestamp': '2025-10-02 00:49:10.526818', 'step': 21564, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:10.583174', 'step': 21564, 'epoch': 2}
{'type': 'loss', 'content': 0.06488267332315445, 'timestamp': '2025-10-02 00:49:10.590688', 'step': 21565, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:10.647449', 'step': 21565, 'epoch': 2}
{'type': 'loss', 'content': 0.03945733234286308, 'timestamp': '2025-10-02 00:49:10.649849', 'step': 21566, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:10.706428', 'step': 21566, 'epoch': 2}
{'type': 'loss', 'content': 0.06817443668842316, 'timestamp': '2025-10-02 00:49:10.709638', 'step': 21567, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:10.766238', 'step': 21567, 'epoch': 2}
{'type': 'loss', 'content': 0.06686572730541229, 'timestamp': '2025-10-02 00:49:10.772908', 'step': 21568, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:10.828838', 'step': 21568, 'epoch': 2}
{'type': 'loss', 'content': 0.06860995292663574, 'timestamp': '2025-10-02 00:49:10.834878', 'step': 21569, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:10.892614', 'step': 21569, 'epoch': 2}
{'type': 'loss', 'content': 0.09143759310245514, 'timestamp': '2025-10-02 00:49:10.895716', 'step': 21570, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:10.952612', 'step': 21570, 'epoch': 2}
{'type': 'loss', 'content': 0.011273912154138088, 'timestamp': '2025-10-02 00:49:10.956056', 'step': 21571, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:11.013656', 'step': 21571, 'epoch': 2}
{'type': 'loss', 'content': 0.006155931390821934, 'timestamp': '2025-10-02 00:49:11.020451', 'step': 21572, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:11.076176', 'step': 21572, 'epoch': 2}
{'type': 'loss', 'content': 0.049853209406137466, 'timestamp': '2025-10-02 00:49:11.079579', 'step': 21573, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:11.140897', 'step': 21573, 'epoch': 2}
{'type': 'loss', 'content': 0.009177299216389656, 'timestamp': '2025-10-02 00:49:11.150434', 'step': 21574, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:11.205393', 'step': 21574, 'epoch': 2}
{'type': 'loss', 'content': 0.05871852487325668, 'timestamp': '2025-10-02 00:49:11.210458', 'step': 21575, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:11.271508', 'step': 21575, 'epoch': 2}
{'type': 'loss', 'content': 0.04518120735883713, 'timestamp': '2025-10-02 00:49:11.282441', 'step': 21576, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:49:38.000371', 'step': 21576, 'epoch': 2}
{'type': 'pplx', 'content': 95.55549696932692, 'timestamp': '2025-10-02 00:49:38.004775', 'step': 21576, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:38.061982', 'step': 21576, 'epoch': 2}
{'type': 'loss', 'content': 0.045225728303194046, 'timestamp': '2025-10-02 00:49:38.064815', 'step': 21577, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:38.120843', 'step': 21577, 'epoch': 2}
{'type': 'loss', 'content': 0.0509612001478672, 'timestamp': '2025-10-02 00:49:38.123394', 'step': 21578, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:38.181738', 'step': 21578, 'epoch': 2}
{'type': 'loss', 'content': 0.015896664932370186, 'timestamp': '2025-10-02 00:49:38.188755', 'step': 21579, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:38.246639', 'step': 21579, 'epoch': 2}
{'type': 'loss', 'content': 0.13761569559574127, 'timestamp': '2025-10-02 00:49:38.253190', 'step': 21580, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:49:38.316950', 'step': 21580, 'epoch': 2}
{'type': 'loss', 'content': 0.06130196899175644, 'timestamp': '2025-10-02 00:49:38.328509', 'step': 21581, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:38.385817', 'step': 21581, 'epoch': 2}
{'type': 'loss', 'content': 0.033327337354421616, 'timestamp': '2025-10-02 00:49:38.388910', 'step': 21582, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:38.446345', 'step': 21582, 'epoch': 2}
{'type': 'loss', 'content': 0.0855853334069252, 'timestamp': '2025-10-02 00:49:38.448805', 'step': 21583, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:38.504702', 'step': 21583, 'epoch': 2}
{'type': 'loss', 'content': 0.025656849145889282, 'timestamp': '2025-10-02 00:49:38.511376', 'step': 21584, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:38.567649', 'step': 21584, 'epoch': 2}
{'type': 'loss', 'content': 0.1057707890868187, 'timestamp': '2025-10-02 00:49:38.569792', 'step': 21585, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:38.626947', 'step': 21585, 'epoch': 2}
{'type': 'loss', 'content': 0.03798795863986015, 'timestamp': '2025-10-02 00:49:38.630454', 'step': 21586, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:38.687683', 'step': 21586, 'epoch': 2}
{'type': 'loss', 'content': 0.042838435620069504, 'timestamp': '2025-10-02 00:49:38.689476', 'step': 21587, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:38.745166', 'step': 21587, 'epoch': 2}
{'type': 'loss', 'content': 0.05632626265287399, 'timestamp': '2025-10-02 00:49:38.750801', 'step': 21588, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:38.808003', 'step': 21588, 'epoch': 2}
{'type': 'loss', 'content': 0.04153056815266609, 'timestamp': '2025-10-02 00:49:38.815459', 'step': 21589, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:38.874272', 'step': 21589, 'epoch': 2}
{'type': 'loss', 'content': 0.02964731864631176, 'timestamp': '2025-10-02 00:49:38.877222', 'step': 21590, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:49:38.932678', 'step': 21590, 'epoch': 2}
{'type': 'loss', 'content': 0.021992769092321396, 'timestamp': '2025-10-02 00:49:38.935668', 'step': 21591, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:38.992159', 'step': 21591, 'epoch': 2}
{'type': 'loss', 'content': 0.07391106337308884, 'timestamp': '2025-10-02 00:49:38.998659', 'step': 21592, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:39.053348', 'step': 21592, 'epoch': 2}
{'type': 'loss', 'content': 0.1834547072649002, 'timestamp': '2025-10-02 00:49:39.055634', 'step': 21593, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:39.112391', 'step': 21593, 'epoch': 2}
{'type': 'loss', 'content': 0.08547595143318176, 'timestamp': '2025-10-02 00:49:39.114903', 'step': 21594, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:39.177028', 'step': 21594, 'epoch': 2}
{'type': 'loss', 'content': 0.02850279211997986, 'timestamp': '2025-10-02 00:49:39.187311', 'step': 21595, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:39.244752', 'step': 21595, 'epoch': 2}
{'type': 'loss', 'content': 0.12678448855876923, 'timestamp': '2025-10-02 00:49:39.250952', 'step': 21596, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:39.313136', 'step': 21596, 'epoch': 2}
{'type': 'loss', 'content': 0.047619547694921494, 'timestamp': '2025-10-02 00:49:39.324451', 'step': 21597, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:39.381393', 'step': 21597, 'epoch': 2}
{'type': 'loss', 'content': 0.09990514069795609, 'timestamp': '2025-10-02 00:49:39.384323', 'step': 21598, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:39.440227', 'step': 21598, 'epoch': 2}
{'type': 'loss', 'content': 0.038508493453264236, 'timestamp': '2025-10-02 00:49:39.445870', 'step': 21599, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:39.503145', 'step': 21599, 'epoch': 2}
{'type': 'loss', 'content': 0.0373586006462574, 'timestamp': '2025-10-02 00:49:39.509994', 'step': 21600, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:39.567669', 'step': 21600, 'epoch': 2}
{'type': 'loss', 'content': 0.09007441252470016, 'timestamp': '2025-10-02 00:49:39.570505', 'step': 21601, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:39.626789', 'step': 21601, 'epoch': 2}
{'type': 'loss', 'content': 0.02707548253238201, 'timestamp': '2025-10-02 00:49:39.634349', 'step': 21602, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:39.690638', 'step': 21602, 'epoch': 2}
{'type': 'loss', 'content': 0.045703187584877014, 'timestamp': '2025-10-02 00:49:39.693362', 'step': 21603, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:39.750780', 'step': 21603, 'epoch': 2}
{'type': 'loss', 'content': 0.0008977727266028523, 'timestamp': '2025-10-02 00:49:39.760926', 'step': 21604, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:39.817192', 'step': 21604, 'epoch': 2}
{'type': 'loss', 'content': 0.04662076383829117, 'timestamp': '2025-10-02 00:49:39.826775', 'step': 21605, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:39.881774', 'step': 21605, 'epoch': 2}
{'type': 'loss', 'content': 0.050253547728061676, 'timestamp': '2025-10-02 00:49:39.887666', 'step': 21606, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:39.947424', 'step': 21606, 'epoch': 2}
{'type': 'loss', 'content': 0.06021694839000702, 'timestamp': '2025-10-02 00:49:39.956812', 'step': 21607, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:40.013040', 'step': 21607, 'epoch': 2}
{'type': 'loss', 'content': 0.03548034280538559, 'timestamp': '2025-10-02 00:49:40.021346', 'step': 21608, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:40.076198', 'step': 21608, 'epoch': 2}
{'type': 'loss', 'content': 0.07663361728191376, 'timestamp': '2025-10-02 00:49:40.078413', 'step': 21609, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:40.132392', 'step': 21609, 'epoch': 2}
{'type': 'loss', 'content': 0.09972762316465378, 'timestamp': '2025-10-02 00:49:40.134886', 'step': 21610, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:40.189669', 'step': 21610, 'epoch': 2}
{'type': 'loss', 'content': 0.11235237121582031, 'timestamp': '2025-10-02 00:49:40.192013', 'step': 21611, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:40.246527', 'step': 21611, 'epoch': 2}
{'type': 'loss', 'content': 0.047491855919361115, 'timestamp': '2025-10-02 00:49:40.253199', 'step': 21612, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:40.315908', 'step': 21612, 'epoch': 2}
{'type': 'loss', 'content': 0.07829853892326355, 'timestamp': '2025-10-02 00:49:40.318102', 'step': 21613, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:40.380332', 'step': 21613, 'epoch': 2}
{'type': 'loss', 'content': 0.047670572996139526, 'timestamp': '2025-10-02 00:49:40.390792', 'step': 21614, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:40.447511', 'step': 21614, 'epoch': 2}
{'type': 'loss', 'content': 0.030059250071644783, 'timestamp': '2025-10-02 00:49:40.456988', 'step': 21615, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:40.511883', 'step': 21615, 'epoch': 2}
{'type': 'loss', 'content': 0.07394939661026001, 'timestamp': '2025-10-02 00:49:40.518180', 'step': 21616, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:40.572576', 'step': 21616, 'epoch': 2}
{'type': 'loss', 'content': 0.047886576503515244, 'timestamp': '2025-10-02 00:49:40.575178', 'step': 21617, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:40.629997', 'step': 21617, 'epoch': 2}
{'type': 'loss', 'content': 0.05643593892455101, 'timestamp': '2025-10-02 00:49:40.632444', 'step': 21618, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:40.686981', 'step': 21618, 'epoch': 2}
{'type': 'loss', 'content': 0.08179233223199844, 'timestamp': '2025-10-02 00:49:40.689342', 'step': 21619, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:40.744304', 'step': 21619, 'epoch': 2}
{'type': 'loss', 'content': 0.023727845400571823, 'timestamp': '2025-10-02 00:49:40.750275', 'step': 21620, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:40.803779', 'step': 21620, 'epoch': 2}
{'type': 'loss', 'content': 0.20472648739814758, 'timestamp': '2025-10-02 00:49:40.805923', 'step': 21621, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:40.864434', 'step': 21621, 'epoch': 2}
{'type': 'loss', 'content': 0.05279233306646347, 'timestamp': '2025-10-02 00:49:40.866595', 'step': 21622, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:40.921032', 'step': 21622, 'epoch': 2}
{'type': 'loss', 'content': 0.03943143039941788, 'timestamp': '2025-10-02 00:49:40.923398', 'step': 21623, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:40.978377', 'step': 21623, 'epoch': 2}
{'type': 'loss', 'content': 0.06953675299882889, 'timestamp': '2025-10-02 00:49:40.985336', 'step': 21624, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:41.040624', 'step': 21624, 'epoch': 2}
{'type': 'loss', 'content': 0.04199819639325142, 'timestamp': '2025-10-02 00:49:41.050852', 'step': 21625, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:41.105795', 'step': 21625, 'epoch': 2}
{'type': 'loss', 'content': 0.12472446262836456, 'timestamp': '2025-10-02 00:49:41.108431', 'step': 21626, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:41.162890', 'step': 21626, 'epoch': 2}
{'type': 'loss', 'content': 0.15073490142822266, 'timestamp': '2025-10-02 00:49:41.165056', 'step': 21627, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:41.219005', 'step': 21627, 'epoch': 2}
{'type': 'loss', 'content': 0.040604714304208755, 'timestamp': '2025-10-02 00:49:41.224977', 'step': 21628, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:49:41.286722', 'step': 21628, 'epoch': 2}
{'type': 'loss', 'content': 0.01620975323021412, 'timestamp': '2025-10-02 00:49:41.298482', 'step': 21629, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:41.353354', 'step': 21629, 'epoch': 2}
{'type': 'loss', 'content': 0.03237592428922653, 'timestamp': '2025-10-02 00:49:41.355873', 'step': 21630, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:41.411505', 'step': 21630, 'epoch': 2}
{'type': 'loss', 'content': 0.05076843500137329, 'timestamp': '2025-10-02 00:49:41.418964', 'step': 21631, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:49:41.475037', 'step': 21631, 'epoch': 2}
{'type': 'loss', 'content': 0.09076382219791412, 'timestamp': '2025-10-02 00:49:41.481195', 'step': 21632, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:41.535784', 'step': 21632, 'epoch': 2}
{'type': 'loss', 'content': 0.011177240870893002, 'timestamp': '2025-10-02 00:49:41.543359', 'step': 21633, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:41.597832', 'step': 21633, 'epoch': 2}
{'type': 'loss', 'content': 0.10545104742050171, 'timestamp': '2025-10-02 00:49:41.600248', 'step': 21634, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:41.655225', 'step': 21634, 'epoch': 2}
{'type': 'loss', 'content': 0.020290708169341087, 'timestamp': '2025-10-02 00:49:41.662866', 'step': 21635, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:41.718371', 'step': 21635, 'epoch': 2}
{'type': 'loss', 'content': 0.10311779379844666, 'timestamp': '2025-10-02 00:49:41.724337', 'step': 21636, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:41.779755', 'step': 21636, 'epoch': 2}
{'type': 'loss', 'content': 0.02795303426682949, 'timestamp': '2025-10-02 00:49:41.782403', 'step': 21637, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:41.837000', 'step': 21637, 'epoch': 2}
{'type': 'loss', 'content': 0.022997647523880005, 'timestamp': '2025-10-02 00:49:41.839606', 'step': 21638, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:41.896108', 'step': 21638, 'epoch': 2}
{'type': 'loss', 'content': 0.046871479600667953, 'timestamp': '2025-10-02 00:49:41.905622', 'step': 21639, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:41.966242', 'step': 21639, 'epoch': 2}
{'type': 'loss', 'content': 0.006402903702110052, 'timestamp': '2025-10-02 00:49:41.977187', 'step': 21640, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:49:42.032083', 'step': 21640, 'epoch': 2}
{'type': 'loss', 'content': 0.14135412871837616, 'timestamp': '2025-10-02 00:49:42.034796', 'step': 21641, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:42.089865', 'step': 21641, 'epoch': 2}
{'type': 'loss', 'content': 0.1706935465335846, 'timestamp': '2025-10-02 00:49:42.091899', 'step': 21642, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:42.145938', 'step': 21642, 'epoch': 2}
{'type': 'loss', 'content': 0.2940676212310791, 'timestamp': '2025-10-02 00:49:42.148115', 'step': 21643, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:42.203613', 'step': 21643, 'epoch': 2}
{'type': 'loss', 'content': 0.03211633116006851, 'timestamp': '2025-10-02 00:49:42.209998', 'step': 21644, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:42.264321', 'step': 21644, 'epoch': 2}
{'type': 'loss', 'content': 0.01696554385125637, 'timestamp': '2025-10-02 00:49:42.274009', 'step': 21645, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:42.328301', 'step': 21645, 'epoch': 2}
{'type': 'loss', 'content': 0.07642301172018051, 'timestamp': '2025-10-02 00:49:42.330399', 'step': 21646, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:42.385223', 'step': 21646, 'epoch': 2}
{'type': 'loss', 'content': 0.03477790206670761, 'timestamp': '2025-10-02 00:49:42.389397', 'step': 21647, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:42.452189', 'step': 21647, 'epoch': 2}
{'type': 'loss', 'content': 0.02661898173391819, 'timestamp': '2025-10-02 00:49:42.463431', 'step': 21648, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:42.517551', 'step': 21648, 'epoch': 2}
{'type': 'loss', 'content': 0.07930190116167068, 'timestamp': '2025-10-02 00:49:42.525224', 'step': 21649, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:42.580015', 'step': 21649, 'epoch': 2}
{'type': 'loss', 'content': 0.009157126769423485, 'timestamp': '2025-10-02 00:49:42.582517', 'step': 21650, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:42.639109', 'step': 21650, 'epoch': 2}
{'type': 'loss', 'content': 0.018328173086047173, 'timestamp': '2025-10-02 00:49:42.641761', 'step': 21651, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:42.696720', 'step': 21651, 'epoch': 2}
{'type': 'loss', 'content': 0.019476527348160744, 'timestamp': '2025-10-02 00:49:42.702578', 'step': 21652, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:42.756615', 'step': 21652, 'epoch': 2}
{'type': 'loss', 'content': 0.13772717118263245, 'timestamp': '2025-10-02 00:49:42.760373', 'step': 21653, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:42.817172', 'step': 21653, 'epoch': 2}
{'type': 'loss', 'content': 0.007566317915916443, 'timestamp': '2025-10-02 00:49:42.826706', 'step': 21654, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:42.882249', 'step': 21654, 'epoch': 2}
{'type': 'loss', 'content': 0.0204825010150671, 'timestamp': '2025-10-02 00:49:42.888172', 'step': 21655, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:42.943127', 'step': 21655, 'epoch': 2}
{'type': 'loss', 'content': 0.0010202128905802965, 'timestamp': '2025-10-02 00:49:42.953273', 'step': 21656, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:43.007310', 'step': 21656, 'epoch': 2}
{'type': 'loss', 'content': 0.09224997460842133, 'timestamp': '2025-10-02 00:49:43.009460', 'step': 21657, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:43.064131', 'step': 21657, 'epoch': 2}
{'type': 'loss', 'content': 0.04318296164274216, 'timestamp': '2025-10-02 00:49:43.073490', 'step': 21658, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:49:43.127939', 'step': 21658, 'epoch': 2}
{'type': 'loss', 'content': 0.036751944571733475, 'timestamp': '2025-10-02 00:49:43.130118', 'step': 21659, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:43.189421', 'step': 21659, 'epoch': 2}
{'type': 'loss', 'content': 0.005049382336437702, 'timestamp': '2025-10-02 00:49:43.200390', 'step': 21660, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:43.255632', 'step': 21660, 'epoch': 2}
{'type': 'loss', 'content': 0.08671434223651886, 'timestamp': '2025-10-02 00:49:43.258051', 'step': 21661, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:43.313146', 'step': 21661, 'epoch': 2}
{'type': 'loss', 'content': 0.07857555150985718, 'timestamp': '2025-10-02 00:49:43.315528', 'step': 21662, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:43.373303', 'step': 21662, 'epoch': 2}
{'type': 'loss', 'content': 0.04397669434547424, 'timestamp': '2025-10-02 00:49:43.376288', 'step': 21663, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:43.431672', 'step': 21663, 'epoch': 2}
{'type': 'loss', 'content': 0.13367925584316254, 'timestamp': '2025-10-02 00:49:43.438068', 'step': 21664, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:43.492430', 'step': 21664, 'epoch': 2}
{'type': 'loss', 'content': 0.0753369927406311, 'timestamp': '2025-10-02 00:49:43.502117', 'step': 21665, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:43.556421', 'step': 21665, 'epoch': 2}
{'type': 'loss', 'content': 0.010941969230771065, 'timestamp': '2025-10-02 00:49:43.558688', 'step': 21666, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:43.613479', 'step': 21666, 'epoch': 2}
{'type': 'loss', 'content': 0.05146632343530655, 'timestamp': '2025-10-02 00:49:43.621025', 'step': 21667, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:43.675058', 'step': 21667, 'epoch': 2}
{'type': 'loss', 'content': 0.0835423395037651, 'timestamp': '2025-10-02 00:49:43.681286', 'step': 21668, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:43.734723', 'step': 21668, 'epoch': 2}
{'type': 'loss', 'content': 0.10289741307497025, 'timestamp': '2025-10-02 00:49:43.737364', 'step': 21669, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:43.792365', 'step': 21669, 'epoch': 2}
{'type': 'loss', 'content': 0.03881034627556801, 'timestamp': '2025-10-02 00:49:43.794995', 'step': 21670, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:43.850175', 'step': 21670, 'epoch': 2}
{'type': 'loss', 'content': 0.03464169055223465, 'timestamp': '2025-10-02 00:49:43.857711', 'step': 21671, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:43.913245', 'step': 21671, 'epoch': 2}
{'type': 'loss', 'content': 0.07067745923995972, 'timestamp': '2025-10-02 00:49:43.919134', 'step': 21672, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:43.973715', 'step': 21672, 'epoch': 2}
{'type': 'loss', 'content': 0.026616685092449188, 'timestamp': '2025-10-02 00:49:43.979728', 'step': 21673, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:44.035271', 'step': 21673, 'epoch': 2}
{'type': 'loss', 'content': 0.045840371400117874, 'timestamp': '2025-10-02 00:49:44.042823', 'step': 21674, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:44.097702', 'step': 21674, 'epoch': 2}
{'type': 'loss', 'content': 0.08643737435340881, 'timestamp': '2025-10-02 00:49:44.103589', 'step': 21675, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:44.158423', 'step': 21675, 'epoch': 2}
{'type': 'loss', 'content': 0.10316227376461029, 'timestamp': '2025-10-02 00:49:44.164311', 'step': 21676, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:44.217778', 'step': 21676, 'epoch': 2}
{'type': 'loss', 'content': 0.05615227669477463, 'timestamp': '2025-10-02 00:49:44.223811', 'step': 21677, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:44.278527', 'step': 21677, 'epoch': 2}
{'type': 'loss', 'content': 0.08301674574613571, 'timestamp': '2025-10-02 00:49:44.283229', 'step': 21678, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:44.339674', 'step': 21678, 'epoch': 2}
{'type': 'loss', 'content': 0.08123105764389038, 'timestamp': '2025-10-02 00:49:44.342989', 'step': 21679, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:44.399463', 'step': 21679, 'epoch': 2}
{'type': 'loss', 'content': 0.0063454522751271725, 'timestamp': '2025-10-02 00:49:44.407973', 'step': 21680, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:44.463821', 'step': 21680, 'epoch': 2}
{'type': 'loss', 'content': 0.021243732422590256, 'timestamp': '2025-10-02 00:49:44.466656', 'step': 21681, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:44.522196', 'step': 21681, 'epoch': 2}
{'type': 'loss', 'content': 0.11963825672864914, 'timestamp': '2025-10-02 00:49:44.524566', 'step': 21682, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:44.579434', 'step': 21682, 'epoch': 2}
{'type': 'loss', 'content': 0.03653064742684364, 'timestamp': '2025-10-02 00:49:44.581630', 'step': 21683, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:44.636675', 'step': 21683, 'epoch': 2}
{'type': 'loss', 'content': 0.09300137311220169, 'timestamp': '2025-10-02 00:49:44.642806', 'step': 21684, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:44.697000', 'step': 21684, 'epoch': 2}
{'type': 'loss', 'content': 0.08352719247341156, 'timestamp': '2025-10-02 00:49:44.699369', 'step': 21685, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:44.753277', 'step': 21685, 'epoch': 2}
{'type': 'loss', 'content': 0.11634969711303711, 'timestamp': '2025-10-02 00:49:44.755807', 'step': 21686, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:44.811160', 'step': 21686, 'epoch': 2}
{'type': 'loss', 'content': 0.040436454117298126, 'timestamp': '2025-10-02 00:49:44.813767', 'step': 21687, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:49:44.887046', 'step': 21687, 'epoch': 2}
{'type': 'loss', 'content': 0.04807262867689133, 'timestamp': '2025-10-02 00:49:44.898475', 'step': 21688, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:44.952514', 'step': 21688, 'epoch': 2}
{'type': 'loss', 'content': 0.040200650691986084, 'timestamp': '2025-10-02 00:49:44.954281', 'step': 21689, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:45.008769', 'step': 21689, 'epoch': 2}
{'type': 'loss', 'content': 0.06716778874397278, 'timestamp': '2025-10-02 00:49:45.016517', 'step': 21690, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:45.071966', 'step': 21690, 'epoch': 2}
{'type': 'loss', 'content': 0.002443983219563961, 'timestamp': '2025-10-02 00:49:45.081323', 'step': 21691, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:49:45.145714', 'step': 21691, 'epoch': 2}
{'type': 'loss', 'content': 0.017316468060016632, 'timestamp': '2025-10-02 00:49:45.157399', 'step': 21692, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:45.210594', 'step': 21692, 'epoch': 2}
{'type': 'loss', 'content': 0.19259287416934967, 'timestamp': '2025-10-02 00:49:45.212821', 'step': 21693, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:45.267233', 'step': 21693, 'epoch': 2}
{'type': 'loss', 'content': 0.056627023965120316, 'timestamp': '2025-10-02 00:49:45.274921', 'step': 21694, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:45.330849', 'step': 21694, 'epoch': 2}
{'type': 'loss', 'content': 0.13306711614131927, 'timestamp': '2025-10-02 00:49:45.333408', 'step': 21695, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:49:45.387561', 'step': 21695, 'epoch': 2}
{'type': 'loss', 'content': 0.039109885692596436, 'timestamp': '2025-10-02 00:49:45.393500', 'step': 21696, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:45.447642', 'step': 21696, 'epoch': 2}
{'type': 'loss', 'content': 0.09050754457712173, 'timestamp': '2025-10-02 00:49:45.449955', 'step': 21697, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:45.505359', 'step': 21697, 'epoch': 2}
{'type': 'loss', 'content': 0.08011407405138016, 'timestamp': '2025-10-02 00:49:45.507282', 'step': 21698, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:45.561119', 'step': 21698, 'epoch': 2}
{'type': 'loss', 'content': 0.0582146942615509, 'timestamp': '2025-10-02 00:49:45.563532', 'step': 21699, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:45.623117', 'step': 21699, 'epoch': 2}
{'type': 'loss', 'content': 0.034111782908439636, 'timestamp': '2025-10-02 00:49:45.634108', 'step': 21700, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:45.693776', 'step': 21700, 'epoch': 2}
{'type': 'loss', 'content': 0.02023552544414997, 'timestamp': '2025-10-02 00:49:45.704797', 'step': 21701, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:45.759314', 'step': 21701, 'epoch': 2}
{'type': 'loss', 'content': 0.14659368991851807, 'timestamp': '2025-10-02 00:49:45.761510', 'step': 21702, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:45.823631', 'step': 21702, 'epoch': 2}
{'type': 'loss', 'content': 0.025634313002228737, 'timestamp': '2025-10-02 00:49:45.834142', 'step': 21703, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:49:45.901226', 'step': 21703, 'epoch': 2}
{'type': 'loss', 'content': 0.03604884073138237, 'timestamp': '2025-10-02 00:49:45.913993', 'step': 21704, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:45.968465', 'step': 21704, 'epoch': 2}
{'type': 'loss', 'content': 0.029542388394474983, 'timestamp': '2025-10-02 00:49:45.970651', 'step': 21705, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:46.024645', 'step': 21705, 'epoch': 2}
{'type': 'loss', 'content': 0.033595792949199677, 'timestamp': '2025-10-02 00:49:46.027840', 'step': 21706, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:46.082646', 'step': 21706, 'epoch': 2}
{'type': 'loss', 'content': 0.08694193512201309, 'timestamp': '2025-10-02 00:49:46.088635', 'step': 21707, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:46.150362', 'step': 21707, 'epoch': 2}
{'type': 'loss', 'content': 0.01725396327674389, 'timestamp': '2025-10-02 00:49:46.161601', 'step': 21708, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:46.215965', 'step': 21708, 'epoch': 2}
{'type': 'loss', 'content': 0.10083428025245667, 'timestamp': '2025-10-02 00:49:46.218021', 'step': 21709, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:46.273082', 'step': 21709, 'epoch': 2}
{'type': 'loss', 'content': 0.14765168726444244, 'timestamp': '2025-10-02 00:49:46.275665', 'step': 21710, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:46.330032', 'step': 21710, 'epoch': 2}
{'type': 'loss', 'content': 0.026301661506295204, 'timestamp': '2025-10-02 00:49:46.335975', 'step': 21711, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:46.391339', 'step': 21711, 'epoch': 2}
{'type': 'loss', 'content': 0.056303322315216064, 'timestamp': '2025-10-02 00:49:46.397562', 'step': 21712, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:46.451486', 'step': 21712, 'epoch': 2}
{'type': 'loss', 'content': 0.012687522917985916, 'timestamp': '2025-10-02 00:49:46.459093', 'step': 21713, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:46.513941', 'step': 21713, 'epoch': 2}
{'type': 'loss', 'content': 0.07560154795646667, 'timestamp': '2025-10-02 00:49:46.516135', 'step': 21714, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:46.570204', 'step': 21714, 'epoch': 2}
{'type': 'loss', 'content': 0.0528317354619503, 'timestamp': '2025-10-02 00:49:46.572681', 'step': 21715, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:49:46.627297', 'step': 21715, 'epoch': 2}
{'type': 'loss', 'content': 0.04385481774806976, 'timestamp': '2025-10-02 00:49:46.632932', 'step': 21716, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:46.686603', 'step': 21716, 'epoch': 2}
{'type': 'loss', 'content': 0.026640284806489944, 'timestamp': '2025-10-02 00:49:46.688951', 'step': 21717, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:46.744090', 'step': 21717, 'epoch': 2}
{'type': 'loss', 'content': 0.1332445591688156, 'timestamp': '2025-10-02 00:49:46.746460', 'step': 21718, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:46.800888', 'step': 21718, 'epoch': 2}
{'type': 'loss', 'content': 0.1203194409608841, 'timestamp': '2025-10-02 00:49:46.803700', 'step': 21719, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:46.858831', 'step': 21719, 'epoch': 2}
{'type': 'loss', 'content': 0.07071756571531296, 'timestamp': '2025-10-02 00:49:46.864474', 'step': 21720, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:46.925146', 'step': 21720, 'epoch': 2}
{'type': 'loss', 'content': 0.06353338062763214, 'timestamp': '2025-10-02 00:49:46.936459', 'step': 21721, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:46.991210', 'step': 21721, 'epoch': 2}
{'type': 'loss', 'content': 0.049790024757385254, 'timestamp': '2025-10-02 00:49:46.998770', 'step': 21722, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:47.054344', 'step': 21722, 'epoch': 2}
{'type': 'loss', 'content': 0.08823172003030777, 'timestamp': '2025-10-02 00:49:47.056717', 'step': 21723, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:47.112372', 'step': 21723, 'epoch': 2}
{'type': 'loss', 'content': 0.037574660032987595, 'timestamp': '2025-10-02 00:49:47.122682', 'step': 21724, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:49:47.176473', 'step': 21724, 'epoch': 2}
{'type': 'loss', 'content': 0.07284499704837799, 'timestamp': '2025-10-02 00:49:47.179054', 'step': 21725, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:47.233348', 'step': 21725, 'epoch': 2}
{'type': 'loss', 'content': 0.07195699214935303, 'timestamp': '2025-10-02 00:49:47.235472', 'step': 21726, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:47.290679', 'step': 21726, 'epoch': 2}
{'type': 'loss', 'content': 0.040455568581819534, 'timestamp': '2025-10-02 00:49:47.292904', 'step': 21727, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:49:47.347083', 'step': 21727, 'epoch': 2}
{'type': 'loss', 'content': 0.1217789426445961, 'timestamp': '2025-10-02 00:49:47.352847', 'step': 21728, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:47.407122', 'step': 21728, 'epoch': 2}
{'type': 'loss', 'content': 0.06482129544019699, 'timestamp': '2025-10-02 00:49:47.409178', 'step': 21729, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:47.463086', 'step': 21729, 'epoch': 2}
{'type': 'loss', 'content': 0.1520417034626007, 'timestamp': '2025-10-02 00:49:47.465637', 'step': 21730, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:47.521844', 'step': 21730, 'epoch': 2}
{'type': 'loss', 'content': 0.05934276431798935, 'timestamp': '2025-10-02 00:49:47.527753', 'step': 21731, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:47.583909', 'step': 21731, 'epoch': 2}
{'type': 'loss', 'content': 0.15906791388988495, 'timestamp': '2025-10-02 00:49:47.590543', 'step': 21732, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:47.648178', 'step': 21732, 'epoch': 2}
{'type': 'loss', 'content': 0.016710082069039345, 'timestamp': '2025-10-02 00:49:47.650455', 'step': 21733, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:49:47.717295', 'step': 21733, 'epoch': 2}
{'type': 'loss', 'content': 0.023559007793664932, 'timestamp': '2025-10-02 00:49:47.728138', 'step': 21734, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:47.784190', 'step': 21734, 'epoch': 2}
{'type': 'loss', 'content': 0.04156406596302986, 'timestamp': '2025-10-02 00:49:47.787363', 'step': 21735, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:47.845390', 'step': 21735, 'epoch': 2}
{'type': 'loss', 'content': 0.10368771851062775, 'timestamp': '2025-10-02 00:49:47.851876', 'step': 21736, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:47.907368', 'step': 21736, 'epoch': 2}
{'type': 'loss', 'content': 0.04408189654350281, 'timestamp': '2025-10-02 00:49:47.914997', 'step': 21737, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:47.972210', 'step': 21737, 'epoch': 2}
{'type': 'loss', 'content': 0.08456610888242722, 'timestamp': '2025-10-02 00:49:47.979734', 'step': 21738, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:48.036723', 'step': 21738, 'epoch': 2}
{'type': 'loss', 'content': 0.048582326620817184, 'timestamp': '2025-10-02 00:49:48.044288', 'step': 21739, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:48.100168', 'step': 21739, 'epoch': 2}
{'type': 'loss', 'content': 0.03332698717713356, 'timestamp': '2025-10-02 00:49:48.107002', 'step': 21740, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:48.164488', 'step': 21740, 'epoch': 2}
{'type': 'loss', 'content': 0.07339078933000565, 'timestamp': '2025-10-02 00:49:48.167658', 'step': 21741, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:48.224151', 'step': 21741, 'epoch': 2}
{'type': 'loss', 'content': 0.05169553682208061, 'timestamp': '2025-10-02 00:49:48.228245', 'step': 21742, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:48.286814', 'step': 21742, 'epoch': 2}
{'type': 'loss', 'content': 0.091941237449646, 'timestamp': '2025-10-02 00:49:48.289411', 'step': 21743, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:48.345097', 'step': 21743, 'epoch': 2}
{'type': 'loss', 'content': 0.15100587904453278, 'timestamp': '2025-10-02 00:49:48.351597', 'step': 21744, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:48.406688', 'step': 21744, 'epoch': 2}
{'type': 'loss', 'content': 0.03519483655691147, 'timestamp': '2025-10-02 00:49:48.409519', 'step': 21745, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:48.466266', 'step': 21745, 'epoch': 2}
{'type': 'loss', 'content': 0.013642203994095325, 'timestamp': '2025-10-02 00:49:48.468621', 'step': 21746, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:48.524508', 'step': 21746, 'epoch': 2}
{'type': 'loss', 'content': 0.009289195761084557, 'timestamp': '2025-10-02 00:49:48.527233', 'step': 21747, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:48.582300', 'step': 21747, 'epoch': 2}
{'type': 'loss', 'content': 0.06819228827953339, 'timestamp': '2025-10-02 00:49:48.589209', 'step': 21748, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:48.644196', 'step': 21748, 'epoch': 2}
{'type': 'loss', 'content': 0.058044590055942535, 'timestamp': '2025-10-02 00:49:48.648017', 'step': 21749, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:49:48.718856', 'step': 21749, 'epoch': 2}
{'type': 'loss', 'content': 0.030345482751727104, 'timestamp': '2025-10-02 00:49:48.731123', 'step': 21750, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:48.788135', 'step': 21750, 'epoch': 2}
{'type': 'loss', 'content': 0.045114122331142426, 'timestamp': '2025-10-02 00:49:48.797665', 'step': 21751, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:48.854183', 'step': 21751, 'epoch': 2}
{'type': 'loss', 'content': 0.03289613872766495, 'timestamp': '2025-10-02 00:49:48.860657', 'step': 21752, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:48.921512', 'step': 21752, 'epoch': 2}
{'type': 'loss', 'content': 0.02989276498556137, 'timestamp': '2025-10-02 00:49:48.932875', 'step': 21753, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:48.990161', 'step': 21753, 'epoch': 2}
{'type': 'loss', 'content': 0.054207805544137955, 'timestamp': '2025-10-02 00:49:48.993144', 'step': 21754, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:49.050129', 'step': 21754, 'epoch': 2}
{'type': 'loss', 'content': 0.01118906307965517, 'timestamp': '2025-10-02 00:49:49.059652', 'step': 21755, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:49:49.134693', 'step': 21755, 'epoch': 2}
{'type': 'loss', 'content': 0.027682622894644737, 'timestamp': '2025-10-02 00:49:49.148949', 'step': 21756, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:49.203803', 'step': 21756, 'epoch': 2}
{'type': 'loss', 'content': 0.07520433515310287, 'timestamp': '2025-10-02 00:49:49.211409', 'step': 21757, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:49.264748', 'step': 21757, 'epoch': 2}
{'type': 'loss', 'content': 0.06702214479446411, 'timestamp': '2025-10-02 00:49:49.266887', 'step': 21758, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:49.321319', 'step': 21758, 'epoch': 2}
{'type': 'loss', 'content': 0.021114766597747803, 'timestamp': '2025-10-02 00:49:49.328758', 'step': 21759, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:49:49.402118', 'step': 21759, 'epoch': 2}
{'type': 'loss', 'content': 0.04478151723742485, 'timestamp': '2025-10-02 00:49:49.416084', 'step': 21760, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:49.470594', 'step': 21760, 'epoch': 2}
{'type': 'loss', 'content': 0.02987237088382244, 'timestamp': '2025-10-02 00:49:49.472879', 'step': 21761, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:49:49.526977', 'step': 21761, 'epoch': 2}
{'type': 'loss', 'content': 0.08203326910734177, 'timestamp': '2025-10-02 00:49:49.529108', 'step': 21762, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:49.584235', 'step': 21762, 'epoch': 2}
{'type': 'loss', 'content': 0.11837603896856308, 'timestamp': '2025-10-02 00:49:49.587152', 'step': 21763, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:49.641851', 'step': 21763, 'epoch': 2}
{'type': 'loss', 'content': 0.033793237060308456, 'timestamp': '2025-10-02 00:49:49.650268', 'step': 21764, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:49.704488', 'step': 21764, 'epoch': 2}
{'type': 'loss', 'content': 0.12959152460098267, 'timestamp': '2025-10-02 00:49:49.707109', 'step': 21765, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:49.760410', 'step': 21765, 'epoch': 2}
{'type': 'loss', 'content': 0.12079251557588577, 'timestamp': '2025-10-02 00:49:49.762728', 'step': 21766, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:49.818270', 'step': 21766, 'epoch': 2}
{'type': 'loss', 'content': 0.06748752295970917, 'timestamp': '2025-10-02 00:49:49.820501', 'step': 21767, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:49.874743', 'step': 21767, 'epoch': 2}
{'type': 'loss', 'content': 0.09749295562505722, 'timestamp': '2025-10-02 00:49:49.880636', 'step': 21768, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:49.933905', 'step': 21768, 'epoch': 2}
{'type': 'loss', 'content': 0.09284579753875732, 'timestamp': '2025-10-02 00:49:49.936089', 'step': 21769, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:49.990141', 'step': 21769, 'epoch': 2}
{'type': 'loss', 'content': 0.07362425327301025, 'timestamp': '2025-10-02 00:49:49.992536', 'step': 21770, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:50.046835', 'step': 21770, 'epoch': 2}
{'type': 'loss', 'content': 0.15361250936985016, 'timestamp': '2025-10-02 00:49:50.052410', 'step': 21771, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:50.109965', 'step': 21771, 'epoch': 2}
{'type': 'loss', 'content': 0.13809546828269958, 'timestamp': '2025-10-02 00:49:50.118316', 'step': 21772, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:50.172488', 'step': 21772, 'epoch': 2}
{'type': 'loss', 'content': 0.038303129374980927, 'timestamp': '2025-10-02 00:49:50.177955', 'step': 21773, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:50.232673', 'step': 21773, 'epoch': 2}
{'type': 'loss', 'content': 0.01957029476761818, 'timestamp': '2025-10-02 00:49:50.240266', 'step': 21774, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:50.297316', 'step': 21774, 'epoch': 2}
{'type': 'loss', 'content': 0.0035046429838985205, 'timestamp': '2025-10-02 00:49:50.306861', 'step': 21775, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:50.362666', 'step': 21775, 'epoch': 2}
{'type': 'loss', 'content': 0.01544495765119791, 'timestamp': '2025-10-02 00:49:50.373001', 'step': 21776, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:50.427252', 'step': 21776, 'epoch': 2}
{'type': 'loss', 'content': 0.09814730286598206, 'timestamp': '2025-10-02 00:49:50.429563', 'step': 21777, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:50.484315', 'step': 21777, 'epoch': 2}
{'type': 'loss', 'content': 0.053521547466516495, 'timestamp': '2025-10-02 00:49:50.486562', 'step': 21778, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:50.541101', 'step': 21778, 'epoch': 2}
{'type': 'loss', 'content': 0.15014714002609253, 'timestamp': '2025-10-02 00:49:50.543283', 'step': 21779, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:50.604114', 'step': 21779, 'epoch': 2}
{'type': 'loss', 'content': 0.00574877206236124, 'timestamp': '2025-10-02 00:49:50.609925', 'step': 21780, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:50.663887', 'step': 21780, 'epoch': 2}
{'type': 'loss', 'content': 0.051578424870967865, 'timestamp': '2025-10-02 00:49:50.666434', 'step': 21781, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:49:50.721866', 'step': 21781, 'epoch': 2}
{'type': 'loss', 'content': 0.1761038601398468, 'timestamp': '2025-10-02 00:49:50.724566', 'step': 21782, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:50.779050', 'step': 21782, 'epoch': 2}
{'type': 'loss', 'content': 0.07598839700222015, 'timestamp': '2025-10-02 00:49:50.781687', 'step': 21783, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:50.836381', 'step': 21783, 'epoch': 2}
{'type': 'loss', 'content': 0.13223819434642792, 'timestamp': '2025-10-02 00:49:50.842464', 'step': 21784, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:50.897020', 'step': 21784, 'epoch': 2}
{'type': 'loss', 'content': 0.009057261049747467, 'timestamp': '2025-10-02 00:49:50.899304', 'step': 21785, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:50.952776', 'step': 21785, 'epoch': 2}
{'type': 'loss', 'content': 0.0712977945804596, 'timestamp': '2025-10-02 00:49:50.954684', 'step': 21786, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:51.009395', 'step': 21786, 'epoch': 2}
{'type': 'loss', 'content': 0.0194945577532053, 'timestamp': '2025-10-02 00:49:51.015479', 'step': 21787, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:51.069494', 'step': 21787, 'epoch': 2}
{'type': 'loss', 'content': 0.0056391516700387, 'timestamp': '2025-10-02 00:49:51.076430', 'step': 21788, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:51.129863', 'step': 21788, 'epoch': 2}
{'type': 'loss', 'content': 0.1305035501718521, 'timestamp': '2025-10-02 00:49:51.132287', 'step': 21789, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:51.186273', 'step': 21789, 'epoch': 2}
{'type': 'loss', 'content': 0.0793113261461258, 'timestamp': '2025-10-02 00:49:51.188943', 'step': 21790, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:51.243810', 'step': 21790, 'epoch': 2}
{'type': 'loss', 'content': 0.03318262845277786, 'timestamp': '2025-10-02 00:49:51.249644', 'step': 21791, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:51.303419', 'step': 21791, 'epoch': 2}
{'type': 'loss', 'content': 0.16641350090503693, 'timestamp': '2025-10-02 00:49:51.309147', 'step': 21792, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:51.364377', 'step': 21792, 'epoch': 2}
{'type': 'loss', 'content': 0.039459191262722015, 'timestamp': '2025-10-02 00:49:51.366915', 'step': 21793, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:49:51.421087', 'step': 21793, 'epoch': 2}
{'type': 'loss', 'content': 0.12223736196756363, 'timestamp': '2025-10-02 00:49:51.423466', 'step': 21794, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:51.478890', 'step': 21794, 'epoch': 2}
{'type': 'loss', 'content': 0.16170133650302887, 'timestamp': '2025-10-02 00:49:51.481185', 'step': 21795, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:51.536330', 'step': 21795, 'epoch': 2}
{'type': 'loss', 'content': 0.012227457016706467, 'timestamp': '2025-10-02 00:49:51.543117', 'step': 21796, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:51.602708', 'step': 21796, 'epoch': 2}
{'type': 'loss', 'content': 0.08718982338905334, 'timestamp': '2025-10-02 00:49:51.614061', 'step': 21797, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:51.672119', 'step': 21797, 'epoch': 2}
{'type': 'loss', 'content': 0.017366409301757812, 'timestamp': '2025-10-02 00:49:51.681629', 'step': 21798, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:51.735915', 'step': 21798, 'epoch': 2}
{'type': 'loss', 'content': 0.012923686765134335, 'timestamp': '2025-10-02 00:49:51.738075', 'step': 21799, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:51.792732', 'step': 21799, 'epoch': 2}
{'type': 'loss', 'content': 0.07403811067342758, 'timestamp': '2025-10-02 00:49:51.798452', 'step': 21800, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:51.851882', 'step': 21800, 'epoch': 2}
{'type': 'loss', 'content': 0.06606695801019669, 'timestamp': '2025-10-02 00:49:51.857668', 'step': 21801, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:51.911919', 'step': 21801, 'epoch': 2}
{'type': 'loss', 'content': 0.05711278319358826, 'timestamp': '2025-10-02 00:49:51.914473', 'step': 21802, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:51.980320', 'step': 21802, 'epoch': 2}
{'type': 'loss', 'content': 0.0642678365111351, 'timestamp': '2025-10-02 00:49:51.982759', 'step': 21803, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:52.036680', 'step': 21803, 'epoch': 2}
{'type': 'loss', 'content': 0.16302448511123657, 'timestamp': '2025-10-02 00:49:52.043087', 'step': 21804, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:52.097039', 'step': 21804, 'epoch': 2}
{'type': 'loss', 'content': 0.057126305997371674, 'timestamp': '2025-10-02 00:49:52.099300', 'step': 21805, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:52.152948', 'step': 21805, 'epoch': 2}
{'type': 'loss', 'content': 0.044573117047548294, 'timestamp': '2025-10-02 00:49:52.155308', 'step': 21806, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:52.216514', 'step': 21806, 'epoch': 2}
{'type': 'loss', 'content': 0.0242883563041687, 'timestamp': '2025-10-02 00:49:52.226684', 'step': 21807, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:49:52.289263', 'step': 21807, 'epoch': 2}
{'type': 'loss', 'content': 0.06477950513362885, 'timestamp': '2025-10-02 00:49:52.300683', 'step': 21808, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:52.355137', 'step': 21808, 'epoch': 2}
{'type': 'loss', 'content': 0.04329207167029381, 'timestamp': '2025-10-02 00:49:52.357394', 'step': 21809, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:52.412224', 'step': 21809, 'epoch': 2}
{'type': 'loss', 'content': 0.042850296944379807, 'timestamp': '2025-10-02 00:49:52.421789', 'step': 21810, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:52.477680', 'step': 21810, 'epoch': 2}
{'type': 'loss', 'content': 0.006703260354697704, 'timestamp': '2025-10-02 00:49:52.486961', 'step': 21811, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:52.541608', 'step': 21811, 'epoch': 2}
{'type': 'loss', 'content': 0.023869602009654045, 'timestamp': '2025-10-02 00:49:52.550027', 'step': 21812, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:52.603700', 'step': 21812, 'epoch': 2}
{'type': 'loss', 'content': 0.046795789152383804, 'timestamp': '2025-10-02 00:49:52.606360', 'step': 21813, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:52.663311', 'step': 21813, 'epoch': 2}
{'type': 'loss', 'content': 0.03213421627879143, 'timestamp': '2025-10-02 00:49:52.672944', 'step': 21814, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:52.728193', 'step': 21814, 'epoch': 2}
{'type': 'loss', 'content': 0.07350459694862366, 'timestamp': '2025-10-02 00:49:52.737743', 'step': 21815, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:52.793663', 'step': 21815, 'epoch': 2}
{'type': 'loss', 'content': 0.005767454393208027, 'timestamp': '2025-10-02 00:49:52.803791', 'step': 21816, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:52.857054', 'step': 21816, 'epoch': 2}
{'type': 'loss', 'content': 0.042998675256967545, 'timestamp': '2025-10-02 00:49:52.859735', 'step': 21817, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:52.926502', 'step': 21817, 'epoch': 2}
{'type': 'loss', 'content': 0.1053999736905098, 'timestamp': '2025-10-02 00:49:52.928805', 'step': 21818, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:52.985780', 'step': 21818, 'epoch': 2}
{'type': 'loss', 'content': 0.02318326197564602, 'timestamp': '2025-10-02 00:49:52.993141', 'step': 21819, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:53.047403', 'step': 21819, 'epoch': 2}
{'type': 'loss', 'content': 0.05460633710026741, 'timestamp': '2025-10-02 00:49:53.053175', 'step': 21820, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:53.107323', 'step': 21820, 'epoch': 2}
{'type': 'loss', 'content': 0.032279230654239655, 'timestamp': '2025-10-02 00:49:53.110171', 'step': 21821, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:53.164439', 'step': 21821, 'epoch': 2}
{'type': 'loss', 'content': 0.07894719392061234, 'timestamp': '2025-10-02 00:49:53.172113', 'step': 21822, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:53.227124', 'step': 21822, 'epoch': 2}
{'type': 'loss', 'content': 0.08058124780654907, 'timestamp': '2025-10-02 00:49:53.229243', 'step': 21823, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:53.283740', 'step': 21823, 'epoch': 2}
{'type': 'loss', 'content': 0.056237198412418365, 'timestamp': '2025-10-02 00:49:53.289387', 'step': 21824, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:53.344752', 'step': 21824, 'epoch': 2}
{'type': 'loss', 'content': 0.019373299553990364, 'timestamp': '2025-10-02 00:49:53.347364', 'step': 21825, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:53.402759', 'step': 21825, 'epoch': 2}
{'type': 'loss', 'content': 0.027622124180197716, 'timestamp': '2025-10-02 00:49:53.405233', 'step': 21826, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:53.459245', 'step': 21826, 'epoch': 2}
{'type': 'loss', 'content': 0.07228697091341019, 'timestamp': '2025-10-02 00:49:53.466695', 'step': 21827, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:53.520674', 'step': 21827, 'epoch': 2}
{'type': 'loss', 'content': 0.034368857741355896, 'timestamp': '2025-10-02 00:49:53.527458', 'step': 21828, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:49:53.588321', 'step': 21828, 'epoch': 2}
{'type': 'loss', 'content': 0.013651862740516663, 'timestamp': '2025-10-02 00:49:53.599843', 'step': 21829, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:53.655260', 'step': 21829, 'epoch': 2}
{'type': 'loss', 'content': 0.07327655702829361, 'timestamp': '2025-10-02 00:49:53.657633', 'step': 21830, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:53.711414', 'step': 21830, 'epoch': 2}
{'type': 'loss', 'content': 0.12542212009429932, 'timestamp': '2025-10-02 00:49:53.713792', 'step': 21831, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:53.768914', 'step': 21831, 'epoch': 2}
{'type': 'loss', 'content': 0.006434205919504166, 'timestamp': '2025-10-02 00:49:53.775821', 'step': 21832, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:53.829433', 'step': 21832, 'epoch': 2}
{'type': 'loss', 'content': 0.06251632422208786, 'timestamp': '2025-10-02 00:49:53.831563', 'step': 21833, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:53.885666', 'step': 21833, 'epoch': 2}
{'type': 'loss', 'content': 0.04873456805944443, 'timestamp': '2025-10-02 00:49:53.888089', 'step': 21834, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:53.942513', 'step': 21834, 'epoch': 2}
{'type': 'loss', 'content': 0.034948673099279404, 'timestamp': '2025-10-02 00:49:53.944847', 'step': 21835, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:49:53.999351', 'step': 21835, 'epoch': 2}
{'type': 'loss', 'content': 0.07545185089111328, 'timestamp': '2025-10-02 00:49:54.006495', 'step': 21836, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:54.072673', 'step': 21836, 'epoch': 2}
{'type': 'loss', 'content': 0.0860133245587349, 'timestamp': '2025-10-02 00:49:54.074863', 'step': 21837, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:54.130235', 'step': 21837, 'epoch': 2}
{'type': 'loss', 'content': 0.03423710912466049, 'timestamp': '2025-10-02 00:49:54.132473', 'step': 21838, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:54.186545', 'step': 21838, 'epoch': 2}
{'type': 'loss', 'content': 0.04730745404958725, 'timestamp': '2025-10-02 00:49:54.192040', 'step': 21839, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:54.246621', 'step': 21839, 'epoch': 2}
{'type': 'loss', 'content': 0.12194732576608658, 'timestamp': '2025-10-02 00:49:54.252881', 'step': 21840, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:54.310890', 'step': 21840, 'epoch': 2}
{'type': 'loss', 'content': 0.03227875381708145, 'timestamp': '2025-10-02 00:49:54.321874', 'step': 21841, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:54.377612', 'step': 21841, 'epoch': 2}
{'type': 'loss', 'content': 0.05593869462609291, 'timestamp': '2025-10-02 00:49:54.387142', 'step': 21842, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:54.442294', 'step': 21842, 'epoch': 2}
{'type': 'loss', 'content': 0.04650428518652916, 'timestamp': '2025-10-02 00:49:54.444751', 'step': 21843, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:54.499187', 'step': 21843, 'epoch': 2}
{'type': 'loss', 'content': 0.05437486618757248, 'timestamp': '2025-10-02 00:49:54.507478', 'step': 21844, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:54.560804', 'step': 21844, 'epoch': 2}
{'type': 'loss', 'content': 0.1236286535859108, 'timestamp': '2025-10-02 00:49:54.562547', 'step': 21845, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:49:54.617509', 'step': 21845, 'epoch': 2}
{'type': 'loss', 'content': 0.08938577771186829, 'timestamp': '2025-10-02 00:49:54.619502', 'step': 21846, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:49:54.681031', 'step': 21846, 'epoch': 2}
{'type': 'loss', 'content': 0.15070129930973053, 'timestamp': '2025-10-02 00:49:54.691655', 'step': 21847, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:54.747268', 'step': 21847, 'epoch': 2}
{'type': 'loss', 'content': 0.03755974397063255, 'timestamp': '2025-10-02 00:49:54.753852', 'step': 21848, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:54.807704', 'step': 21848, 'epoch': 2}
{'type': 'loss', 'content': 0.11287388205528259, 'timestamp': '2025-10-02 00:49:54.810024', 'step': 21849, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:54.865675', 'step': 21849, 'epoch': 2}
{'type': 'loss', 'content': 0.10896259546279907, 'timestamp': '2025-10-02 00:49:54.867726', 'step': 21850, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:49:54.922245', 'step': 21850, 'epoch': 2}
{'type': 'loss', 'content': 0.0944395437836647, 'timestamp': '2025-10-02 00:49:54.924757', 'step': 21851, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:54.979748', 'step': 21851, 'epoch': 2}
{'type': 'loss', 'content': 0.06464239954948425, 'timestamp': '2025-10-02 00:49:54.985706', 'step': 21852, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:55.040474', 'step': 21852, 'epoch': 2}
{'type': 'loss', 'content': 0.07481475174427032, 'timestamp': '2025-10-02 00:49:55.042823', 'step': 21853, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:55.097317', 'step': 21853, 'epoch': 2}
{'type': 'loss', 'content': 0.04014340043067932, 'timestamp': '2025-10-02 00:49:55.099598', 'step': 21854, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:49:55.166854', 'step': 21854, 'epoch': 2}
{'type': 'loss', 'content': 0.08164682239294052, 'timestamp': '2025-10-02 00:49:55.178843', 'step': 21855, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:55.237115', 'step': 21855, 'epoch': 2}
{'type': 'loss', 'content': 0.03125845268368721, 'timestamp': '2025-10-02 00:49:55.242902', 'step': 21856, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:49:55.303841', 'step': 21856, 'epoch': 2}
{'type': 'loss', 'content': 0.05877506732940674, 'timestamp': '2025-10-02 00:49:55.315379', 'step': 21857, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:55.370446', 'step': 21857, 'epoch': 2}
{'type': 'loss', 'content': 0.06749150156974792, 'timestamp': '2025-10-02 00:49:55.373108', 'step': 21858, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:55.427443', 'step': 21858, 'epoch': 2}
{'type': 'loss', 'content': 0.10355153679847717, 'timestamp': '2025-10-02 00:49:55.429566', 'step': 21859, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:55.484120', 'step': 21859, 'epoch': 2}
{'type': 'loss', 'content': 0.036763086915016174, 'timestamp': '2025-10-02 00:49:55.494092', 'step': 21860, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:55.548054', 'step': 21860, 'epoch': 2}
{'type': 'loss', 'content': 0.02335025928914547, 'timestamp': '2025-10-02 00:49:55.550286', 'step': 21861, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:49:55.617869', 'step': 21861, 'epoch': 2}
{'type': 'loss', 'content': 0.02444540150463581, 'timestamp': '2025-10-02 00:49:55.629895', 'step': 21862, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:55.684303', 'step': 21862, 'epoch': 2}
{'type': 'loss', 'content': 0.09887878596782684, 'timestamp': '2025-10-02 00:49:55.686913', 'step': 21863, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:55.741730', 'step': 21863, 'epoch': 2}
{'type': 'loss', 'content': 0.04761364683508873, 'timestamp': '2025-10-02 00:49:55.750038', 'step': 21864, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:55.804556', 'step': 21864, 'epoch': 2}
{'type': 'loss', 'content': 0.029326390475034714, 'timestamp': '2025-10-02 00:49:55.814117', 'step': 21865, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:55.870062', 'step': 21865, 'epoch': 2}
{'type': 'loss', 'content': 0.1000453308224678, 'timestamp': '2025-10-02 00:49:55.879609', 'step': 21866, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:49:55.941571', 'step': 21866, 'epoch': 2}
{'type': 'loss', 'content': 0.036685384809970856, 'timestamp': '2025-10-02 00:49:55.952210', 'step': 21867, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:56.006685', 'step': 21867, 'epoch': 2}
{'type': 'loss', 'content': 0.02426241710782051, 'timestamp': '2025-10-02 00:49:56.012890', 'step': 21868, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:56.067249', 'step': 21868, 'epoch': 2}
{'type': 'loss', 'content': 0.10548055917024612, 'timestamp': '2025-10-02 00:49:56.074911', 'step': 21869, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:56.136955', 'step': 21869, 'epoch': 2}
{'type': 'loss', 'content': 0.06931547075510025, 'timestamp': '2025-10-02 00:49:56.147447', 'step': 21870, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:56.202669', 'step': 21870, 'epoch': 2}
{'type': 'loss', 'content': 0.018247252330183983, 'timestamp': '2025-10-02 00:49:56.205235', 'step': 21871, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:56.260285', 'step': 21871, 'epoch': 2}
{'type': 'loss', 'content': 0.055862974375486374, 'timestamp': '2025-10-02 00:49:56.266175', 'step': 21872, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:56.321160', 'step': 21872, 'epoch': 2}
{'type': 'loss', 'content': 0.14280816912651062, 'timestamp': '2025-10-02 00:49:56.323636', 'step': 21873, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:56.377914', 'step': 21873, 'epoch': 2}
{'type': 'loss', 'content': 0.09048768132925034, 'timestamp': '2025-10-02 00:49:56.380069', 'step': 21874, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:56.435352', 'step': 21874, 'epoch': 2}
{'type': 'loss', 'content': 0.07332921028137207, 'timestamp': '2025-10-02 00:49:56.437638', 'step': 21875, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:56.492163', 'step': 21875, 'epoch': 2}
{'type': 'loss', 'content': 0.06205752491950989, 'timestamp': '2025-10-02 00:49:56.498197', 'step': 21876, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:56.551928', 'step': 21876, 'epoch': 2}
{'type': 'loss', 'content': 0.021924274042248726, 'timestamp': '2025-10-02 00:49:56.555411', 'step': 21877, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:56.610180', 'step': 21877, 'epoch': 2}
{'type': 'loss', 'content': 0.1028563380241394, 'timestamp': '2025-10-02 00:49:56.612747', 'step': 21878, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:56.667358', 'step': 21878, 'epoch': 2}
{'type': 'loss', 'content': 0.06311983615159988, 'timestamp': '2025-10-02 00:49:56.670601', 'step': 21879, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:49:56.727622', 'step': 21879, 'epoch': 2}
{'type': 'loss', 'content': 0.1587778627872467, 'timestamp': '2025-10-02 00:49:56.733670', 'step': 21880, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:49:56.790466', 'step': 21880, 'epoch': 2}
{'type': 'loss', 'content': 0.015850817784667015, 'timestamp': '2025-10-02 00:49:56.800730', 'step': 21881, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:56.856248', 'step': 21881, 'epoch': 2}
{'type': 'loss', 'content': 0.012612846679985523, 'timestamp': '2025-10-02 00:49:56.864198', 'step': 21882, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:56.919829', 'step': 21882, 'epoch': 2}
{'type': 'loss', 'content': 0.06615644693374634, 'timestamp': '2025-10-02 00:49:56.922639', 'step': 21883, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:49:56.998330', 'step': 21883, 'epoch': 2}
{'type': 'loss', 'content': 0.04312857985496521, 'timestamp': '2025-10-02 00:49:57.012326', 'step': 21884, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:57.074582', 'step': 21884, 'epoch': 2}
{'type': 'loss', 'content': 0.04541713744401932, 'timestamp': '2025-10-02 00:49:57.085942', 'step': 21885, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:49:57.149796', 'step': 21885, 'epoch': 2}
{'type': 'loss', 'content': 0.014069651253521442, 'timestamp': '2025-10-02 00:49:57.160461', 'step': 21886, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:49:57.234296', 'step': 21886, 'epoch': 2}
{'type': 'loss', 'content': 0.056736886501312256, 'timestamp': '2025-10-02 00:49:57.245129', 'step': 21887, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:57.302920', 'step': 21887, 'epoch': 2}
{'type': 'loss', 'content': 0.019731473177671432, 'timestamp': '2025-10-02 00:49:57.309642', 'step': 21888, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:57.364137', 'step': 21888, 'epoch': 2}
{'type': 'loss', 'content': 0.02538326010107994, 'timestamp': '2025-10-02 00:49:57.370282', 'step': 21889, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:57.427061', 'step': 21889, 'epoch': 2}
{'type': 'loss', 'content': 0.03120899200439453, 'timestamp': '2025-10-02 00:49:57.429667', 'step': 21890, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:49:57.493193', 'step': 21890, 'epoch': 2}
{'type': 'loss', 'content': 0.029690269380807877, 'timestamp': '2025-10-02 00:49:57.504033', 'step': 21891, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:49:57.564760', 'step': 21891, 'epoch': 2}
{'type': 'loss', 'content': 0.050895463675260544, 'timestamp': '2025-10-02 00:49:57.575828', 'step': 21892, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:49:57.652369', 'step': 21892, 'epoch': 2}
{'type': 'loss', 'content': 0.07171899080276489, 'timestamp': '2025-10-02 00:49:57.667504', 'step': 21893, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:49:57.722964', 'step': 21893, 'epoch': 2}
{'type': 'loss', 'content': 0.13792507350444794, 'timestamp': '2025-10-02 00:49:57.725619', 'step': 21894, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:57.782403', 'step': 21894, 'epoch': 2}
{'type': 'loss', 'content': 0.02127247117459774, 'timestamp': '2025-10-02 00:49:57.788159', 'step': 21895, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:57.843996', 'step': 21895, 'epoch': 2}
{'type': 'loss', 'content': 0.016929609701037407, 'timestamp': '2025-10-02 00:49:57.851144', 'step': 21896, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:57.908353', 'step': 21896, 'epoch': 2}
{'type': 'loss', 'content': 0.06744831800460815, 'timestamp': '2025-10-02 00:49:57.911304', 'step': 21897, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:57.967203', 'step': 21897, 'epoch': 2}
{'type': 'loss', 'content': 0.051881417632102966, 'timestamp': '2025-10-02 00:49:57.969908', 'step': 21898, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:58.025323', 'step': 21898, 'epoch': 2}
{'type': 'loss', 'content': 0.02497452311217785, 'timestamp': '2025-10-02 00:49:58.027416', 'step': 21899, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:58.084332', 'step': 21899, 'epoch': 2}
{'type': 'loss', 'content': 0.09969253838062286, 'timestamp': '2025-10-02 00:49:58.090478', 'step': 21900, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:58.145179', 'step': 21900, 'epoch': 2}
{'type': 'loss', 'content': 0.06625814735889435, 'timestamp': '2025-10-02 00:49:58.148864', 'step': 21901, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:49:58.213010', 'step': 21901, 'epoch': 2}
{'type': 'loss', 'content': 0.0188909824937582, 'timestamp': '2025-10-02 00:49:58.223843', 'step': 21902, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:58.281293', 'step': 21902, 'epoch': 2}
{'type': 'loss', 'content': 0.03696637600660324, 'timestamp': '2025-10-02 00:49:58.287259', 'step': 21903, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:58.342212', 'step': 21903, 'epoch': 2}
{'type': 'loss', 'content': 0.12062384188175201, 'timestamp': '2025-10-02 00:49:58.348557', 'step': 21904, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:58.404282', 'step': 21904, 'epoch': 2}
{'type': 'loss', 'content': 0.02187671698629856, 'timestamp': '2025-10-02 00:49:58.406932', 'step': 21905, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:58.462678', 'step': 21905, 'epoch': 2}
{'type': 'loss', 'content': 0.008574430830776691, 'timestamp': '2025-10-02 00:49:58.466335', 'step': 21906, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:58.523830', 'step': 21906, 'epoch': 2}
{'type': 'loss', 'content': 0.17213787138462067, 'timestamp': '2025-10-02 00:49:58.526837', 'step': 21907, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:49:58.584933', 'step': 21907, 'epoch': 2}
{'type': 'loss', 'content': 0.21999821066856384, 'timestamp': '2025-10-02 00:49:58.591580', 'step': 21908, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:58.647489', 'step': 21908, 'epoch': 2}
{'type': 'loss', 'content': 0.08164060860872269, 'timestamp': '2025-10-02 00:49:58.649916', 'step': 21909, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:58.705440', 'step': 21909, 'epoch': 2}
{'type': 'loss', 'content': 0.09610924869775772, 'timestamp': '2025-10-02 00:49:58.714763', 'step': 21910, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:58.774194', 'step': 21910, 'epoch': 2}
{'type': 'loss', 'content': 0.03113076277077198, 'timestamp': '2025-10-02 00:49:58.777121', 'step': 21911, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:49:58.831927', 'step': 21911, 'epoch': 2}
{'type': 'loss', 'content': 0.027361463755369186, 'timestamp': '2025-10-02 00:49:58.837601', 'step': 21912, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:58.890937', 'step': 21912, 'epoch': 2}
{'type': 'loss', 'content': 0.03449968621134758, 'timestamp': '2025-10-02 00:49:58.898141', 'step': 21913, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:58.952438', 'step': 21913, 'epoch': 2}
{'type': 'loss', 'content': 0.051090437918901443, 'timestamp': '2025-10-02 00:49:58.961753', 'step': 21914, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:59.016214', 'step': 21914, 'epoch': 2}
{'type': 'loss', 'content': 0.026012877002358437, 'timestamp': '2025-10-02 00:49:59.021641', 'step': 21915, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:59.076172', 'step': 21915, 'epoch': 2}
{'type': 'loss', 'content': 0.08097776770591736, 'timestamp': '2025-10-02 00:49:59.082047', 'step': 21916, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:59.136046', 'step': 21916, 'epoch': 2}
{'type': 'loss', 'content': 0.046032678335905075, 'timestamp': '2025-10-02 00:49:59.138789', 'step': 21917, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:49:59.193038', 'step': 21917, 'epoch': 2}
{'type': 'loss', 'content': 0.02494638040661812, 'timestamp': '2025-10-02 00:49:59.195640', 'step': 21918, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:59.249392', 'step': 21918, 'epoch': 2}
{'type': 'loss', 'content': 0.029994023963809013, 'timestamp': '2025-10-02 00:49:59.256994', 'step': 21919, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:49:59.310990', 'step': 21919, 'epoch': 2}
{'type': 'loss', 'content': 0.04054364934563637, 'timestamp': '2025-10-02 00:49:59.317947', 'step': 21920, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:49:59.371357', 'step': 21920, 'epoch': 2}
{'type': 'loss', 'content': 0.06499988585710526, 'timestamp': '2025-10-02 00:49:59.373977', 'step': 21921, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:49:59.429517', 'step': 21921, 'epoch': 2}
{'type': 'loss', 'content': 0.04477017745375633, 'timestamp': '2025-10-02 00:49:59.438922', 'step': 21922, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:49:59.494177', 'step': 21922, 'epoch': 2}
{'type': 'loss', 'content': 0.05295417830348015, 'timestamp': '2025-10-02 00:49:59.496585', 'step': 21923, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:59.550533', 'step': 21923, 'epoch': 2}
{'type': 'loss', 'content': 0.015016932971775532, 'timestamp': '2025-10-02 00:49:59.558944', 'step': 21924, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:59.618511', 'step': 21924, 'epoch': 2}
{'type': 'loss', 'content': 0.037632960826158524, 'timestamp': '2025-10-02 00:49:59.629855', 'step': 21925, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:49:59.684318', 'step': 21925, 'epoch': 2}
{'type': 'loss', 'content': 0.09016601741313934, 'timestamp': '2025-10-02 00:49:59.686532', 'step': 21926, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:49:59.741158', 'step': 21926, 'epoch': 2}
{'type': 'loss', 'content': 0.02011941373348236, 'timestamp': '2025-10-02 00:49:59.748624', 'step': 21927, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:49:59.803312', 'step': 21927, 'epoch': 2}
{'type': 'loss', 'content': 0.06781952828168869, 'timestamp': '2025-10-02 00:49:59.809107', 'step': 21928, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:49:59.869144', 'step': 21928, 'epoch': 2}
{'type': 'loss', 'content': 0.005221334286034107, 'timestamp': '2025-10-02 00:49:59.880565', 'step': 21929, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:49:59.952240', 'step': 21929, 'epoch': 2}
{'type': 'loss', 'content': 0.047226566821336746, 'timestamp': '2025-10-02 00:49:59.964877', 'step': 21930, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:00.019517', 'step': 21930, 'epoch': 2}
{'type': 'loss', 'content': 0.011459294706583023, 'timestamp': '2025-10-02 00:50:00.027240', 'step': 21931, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:00.081434', 'step': 21931, 'epoch': 2}
{'type': 'loss', 'content': 0.04251978546380997, 'timestamp': '2025-10-02 00:50:00.087215', 'step': 21932, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:00.141606', 'step': 21932, 'epoch': 2}
{'type': 'loss', 'content': 0.019493140280246735, 'timestamp': '2025-10-02 00:50:00.143838', 'step': 21933, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:00.198531', 'step': 21933, 'epoch': 2}
{'type': 'loss', 'content': 0.012203086167573929, 'timestamp': '2025-10-02 00:50:00.207886', 'step': 21934, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:00.263460', 'step': 21934, 'epoch': 2}
{'type': 'loss', 'content': 0.04864027351140976, 'timestamp': '2025-10-02 00:50:00.265819', 'step': 21935, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:00.319664', 'step': 21935, 'epoch': 2}
{'type': 'loss', 'content': 0.019827958196401596, 'timestamp': '2025-10-02 00:50:00.325451', 'step': 21936, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:00.378659', 'step': 21936, 'epoch': 2}
{'type': 'loss', 'content': 0.10413987189531326, 'timestamp': '2025-10-02 00:50:00.380798', 'step': 21937, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:00.434533', 'step': 21937, 'epoch': 2}
{'type': 'loss', 'content': 0.018100686371326447, 'timestamp': '2025-10-02 00:50:00.442187', 'step': 21938, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:00.496975', 'step': 21938, 'epoch': 2}
{'type': 'loss', 'content': 0.0694088265299797, 'timestamp': '2025-10-02 00:50:00.499492', 'step': 21939, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:00.554106', 'step': 21939, 'epoch': 2}
{'type': 'loss', 'content': 0.053103379905223846, 'timestamp': '2025-10-02 00:50:00.560171', 'step': 21940, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:00.613398', 'step': 21940, 'epoch': 2}
{'type': 'loss', 'content': 0.1050652414560318, 'timestamp': '2025-10-02 00:50:00.615966', 'step': 21941, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:00.674443', 'step': 21941, 'epoch': 2}
{'type': 'loss', 'content': 0.011448705568909645, 'timestamp': '2025-10-02 00:50:00.684707', 'step': 21942, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:00.739133', 'step': 21942, 'epoch': 2}
{'type': 'loss', 'content': 0.05463740974664688, 'timestamp': '2025-10-02 00:50:00.745100', 'step': 21943, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:00.799298', 'step': 21943, 'epoch': 2}
{'type': 'loss', 'content': 0.027856826782226562, 'timestamp': '2025-10-02 00:50:00.805834', 'step': 21944, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:00.859068', 'step': 21944, 'epoch': 2}
{'type': 'loss', 'content': 0.032821811735630035, 'timestamp': '2025-10-02 00:50:00.861684', 'step': 21945, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:00.915721', 'step': 21945, 'epoch': 2}
{'type': 'loss', 'content': 0.03232327848672867, 'timestamp': '2025-10-02 00:50:00.918455', 'step': 21946, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:00.973648', 'step': 21946, 'epoch': 2}
{'type': 'loss', 'content': 0.05211485177278519, 'timestamp': '2025-10-02 00:50:00.983032', 'step': 21947, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:01.038417', 'step': 21947, 'epoch': 2}
{'type': 'loss', 'content': 0.030104584991931915, 'timestamp': '2025-10-02 00:50:01.048718', 'step': 21948, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:01.103855', 'step': 21948, 'epoch': 2}
{'type': 'loss', 'content': 0.0325283482670784, 'timestamp': '2025-10-02 00:50:01.106124', 'step': 21949, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:01.160084', 'step': 21949, 'epoch': 2}
{'type': 'loss', 'content': 0.13408130407333374, 'timestamp': '2025-10-02 00:50:01.162647', 'step': 21950, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:01.218273', 'step': 21950, 'epoch': 2}
{'type': 'loss', 'content': 0.01268280204385519, 'timestamp': '2025-10-02 00:50:01.227810', 'step': 21951, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:01.282508', 'step': 21951, 'epoch': 2}
{'type': 'loss', 'content': 0.13568954169750214, 'timestamp': '2025-10-02 00:50:01.289284', 'step': 21952, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:01.343172', 'step': 21952, 'epoch': 2}
{'type': 'loss', 'content': 0.035385631024837494, 'timestamp': '2025-10-02 00:50:01.345869', 'step': 21953, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:01.401759', 'step': 21953, 'epoch': 2}
{'type': 'loss', 'content': 0.046487435698509216, 'timestamp': '2025-10-02 00:50:01.411290', 'step': 21954, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:01.465815', 'step': 21954, 'epoch': 2}
{'type': 'loss', 'content': 0.04340559244155884, 'timestamp': '2025-10-02 00:50:01.467758', 'step': 21955, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:01.522250', 'step': 21955, 'epoch': 2}
{'type': 'loss', 'content': 0.017249109223484993, 'timestamp': '2025-10-02 00:50:01.528237', 'step': 21956, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:01.581590', 'step': 21956, 'epoch': 2}
{'type': 'loss', 'content': 0.09610123187303543, 'timestamp': '2025-10-02 00:50:01.583754', 'step': 21957, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:01.645515', 'step': 21957, 'epoch': 2}
{'type': 'loss', 'content': 0.04480091109871864, 'timestamp': '2025-10-02 00:50:01.656095', 'step': 21958, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:01.710634', 'step': 21958, 'epoch': 2}
{'type': 'loss', 'content': 0.07283784449100494, 'timestamp': '2025-10-02 00:50:01.713343', 'step': 21959, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:01.767394', 'step': 21959, 'epoch': 2}
{'type': 'loss', 'content': 0.06256300210952759, 'timestamp': '2025-10-02 00:50:01.773178', 'step': 21960, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:01.826549', 'step': 21960, 'epoch': 2}
{'type': 'loss', 'content': 0.13783256709575653, 'timestamp': '2025-10-02 00:50:01.828959', 'step': 21961, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:01.884054', 'step': 21961, 'epoch': 2}
{'type': 'loss', 'content': 0.08037912100553513, 'timestamp': '2025-10-02 00:50:01.886318', 'step': 21962, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:01.941368', 'step': 21962, 'epoch': 2}
{'type': 'loss', 'content': 0.020436126738786697, 'timestamp': '2025-10-02 00:50:01.947285', 'step': 21963, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:02.003788', 'step': 21963, 'epoch': 2}
{'type': 'loss', 'content': 0.008825212717056274, 'timestamp': '2025-10-02 00:50:02.010138', 'step': 21964, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:02.064495', 'step': 21964, 'epoch': 2}
{'type': 'loss', 'content': 0.07697287201881409, 'timestamp': '2025-10-02 00:50:02.067042', 'step': 21965, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:02.120602', 'step': 21965, 'epoch': 2}
{'type': 'loss', 'content': 0.07261376827955246, 'timestamp': '2025-10-02 00:50:02.122831', 'step': 21966, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:02.176811', 'step': 21966, 'epoch': 2}
{'type': 'loss', 'content': 0.11915504932403564, 'timestamp': '2025-10-02 00:50:02.178731', 'step': 21967, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:02.232678', 'step': 21967, 'epoch': 2}
{'type': 'loss', 'content': 0.0707664042711258, 'timestamp': '2025-10-02 00:50:02.238694', 'step': 21968, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:02.294050', 'step': 21968, 'epoch': 2}
{'type': 'loss', 'content': 0.029719984158873558, 'timestamp': '2025-10-02 00:50:02.301481', 'step': 21969, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:02.356206', 'step': 21969, 'epoch': 2}
{'type': 'loss', 'content': 0.01790093444287777, 'timestamp': '2025-10-02 00:50:02.364012', 'step': 21970, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:02.419110', 'step': 21970, 'epoch': 2}
{'type': 'loss', 'content': 0.0010166221763938665, 'timestamp': '2025-10-02 00:50:02.421778', 'step': 21971, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:02.475754', 'step': 21971, 'epoch': 2}
{'type': 'loss', 'content': 0.10672090202569962, 'timestamp': '2025-10-02 00:50:02.482044', 'step': 21972, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:02.536772', 'step': 21972, 'epoch': 2}
{'type': 'loss', 'content': 0.06337754428386688, 'timestamp': '2025-10-02 00:50:02.547016', 'step': 21973, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:02.602206', 'step': 21973, 'epoch': 2}
{'type': 'loss', 'content': 0.07594746351242065, 'timestamp': '2025-10-02 00:50:02.604593', 'step': 21974, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:02.659363', 'step': 21974, 'epoch': 2}
{'type': 'loss', 'content': 0.050859998911619186, 'timestamp': '2025-10-02 00:50:02.661624', 'step': 21975, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:02.716382', 'step': 21975, 'epoch': 2}
{'type': 'loss', 'content': 0.056712083518505096, 'timestamp': '2025-10-02 00:50:02.723094', 'step': 21976, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:02.777486', 'step': 21976, 'epoch': 2}
{'type': 'loss', 'content': 0.004101085010915995, 'timestamp': '2025-10-02 00:50:02.783596', 'step': 21977, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:02.839544', 'step': 21977, 'epoch': 2}
{'type': 'loss', 'content': 0.0216381773352623, 'timestamp': '2025-10-02 00:50:02.845573', 'step': 21978, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:02.900346', 'step': 21978, 'epoch': 2}
{'type': 'loss', 'content': 0.017129283398389816, 'timestamp': '2025-10-02 00:50:02.902611', 'step': 21979, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:02.956672', 'step': 21979, 'epoch': 2}
{'type': 'loss', 'content': 0.06985561549663544, 'timestamp': '2025-10-02 00:50:02.962986', 'step': 21980, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:03.016804', 'step': 21980, 'epoch': 2}
{'type': 'loss', 'content': 0.09727510064840317, 'timestamp': '2025-10-02 00:50:03.019299', 'step': 21981, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:03.074983', 'step': 21981, 'epoch': 2}
{'type': 'loss', 'content': 0.0700046643614769, 'timestamp': '2025-10-02 00:50:03.077554', 'step': 21982, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:03.131987', 'step': 21982, 'epoch': 2}
{'type': 'loss', 'content': 0.01612483523786068, 'timestamp': '2025-10-02 00:50:03.134230', 'step': 21983, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:03.187902', 'step': 21983, 'epoch': 2}
{'type': 'loss', 'content': 0.04385698586702347, 'timestamp': '2025-10-02 00:50:03.194719', 'step': 21984, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:03.248461', 'step': 21984, 'epoch': 2}
{'type': 'loss', 'content': 0.013743557967245579, 'timestamp': '2025-10-02 00:50:03.254412', 'step': 21985, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:03.310894', 'step': 21985, 'epoch': 2}
{'type': 'loss', 'content': 0.09495850652456284, 'timestamp': '2025-10-02 00:50:03.313167', 'step': 21986, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:03.367645', 'step': 21986, 'epoch': 2}
{'type': 'loss', 'content': 0.05065251141786575, 'timestamp': '2025-10-02 00:50:03.369487', 'step': 21987, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:03.422845', 'step': 21987, 'epoch': 2}
{'type': 'loss', 'content': 0.02635195292532444, 'timestamp': '2025-10-02 00:50:03.428922', 'step': 21988, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:03.482901', 'step': 21988, 'epoch': 2}
{'type': 'loss', 'content': 0.05033743381500244, 'timestamp': '2025-10-02 00:50:03.485007', 'step': 21989, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:03.539565', 'step': 21989, 'epoch': 2}
{'type': 'loss', 'content': 0.13415925204753876, 'timestamp': '2025-10-02 00:50:03.541998', 'step': 21990, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:03.597042', 'step': 21990, 'epoch': 2}
{'type': 'loss', 'content': 0.07277470827102661, 'timestamp': '2025-10-02 00:50:03.602864', 'step': 21991, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:03.657405', 'step': 21991, 'epoch': 2}
{'type': 'loss', 'content': 0.06180400028824806, 'timestamp': '2025-10-02 00:50:03.663289', 'step': 21992, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:03.717670', 'step': 21992, 'epoch': 2}
{'type': 'loss', 'content': 0.11256308108568192, 'timestamp': '2025-10-02 00:50:03.720115', 'step': 21993, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:03.774709', 'step': 21993, 'epoch': 2}
{'type': 'loss', 'content': 0.07497308403253555, 'timestamp': '2025-10-02 00:50:03.784243', 'step': 21994, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:03.839329', 'step': 21994, 'epoch': 2}
{'type': 'loss', 'content': 0.022048741579055786, 'timestamp': '2025-10-02 00:50:03.848670', 'step': 21995, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:03.903340', 'step': 21995, 'epoch': 2}
{'type': 'loss', 'content': 0.013291941024363041, 'timestamp': '2025-10-02 00:50:03.910094', 'step': 21996, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:03.963876', 'step': 21996, 'epoch': 2}
{'type': 'loss', 'content': 0.029324283823370934, 'timestamp': '2025-10-02 00:50:03.971710', 'step': 21997, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:04.026298', 'step': 21997, 'epoch': 2}
{'type': 'loss', 'content': 0.007998515851795673, 'timestamp': '2025-10-02 00:50:04.035635', 'step': 21998, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:04.090282', 'step': 21998, 'epoch': 2}
{'type': 'loss', 'content': 0.11043133586645126, 'timestamp': '2025-10-02 00:50:04.097823', 'step': 21999, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:04.152364', 'step': 21999, 'epoch': 2}
{'type': 'loss', 'content': 0.020735107362270355, 'timestamp': '2025-10-02 00:50:04.162454', 'step': 22000, 'epoch': 2}
{'type': 'info', 'content': 'Checkpoint saved at step 22000', 'timestamp': '2025-10-02 00:50:04.584353', 'step': 22000, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:04.641303', 'step': 22000, 'epoch': 2}
{'type': 'loss', 'content': 0.05250111222267151, 'timestamp': '2025-10-02 00:50:04.645073', 'step': 22001, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:04.700073', 'step': 22001, 'epoch': 2}
{'type': 'loss', 'content': 0.054132815450429916, 'timestamp': '2025-10-02 00:50:04.702449', 'step': 22002, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:04.756623', 'step': 22002, 'epoch': 2}
{'type': 'loss', 'content': 0.04511618986725807, 'timestamp': '2025-10-02 00:50:04.759168', 'step': 22003, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:04.814764', 'step': 22003, 'epoch': 2}
{'type': 'loss', 'content': 0.04184183478355408, 'timestamp': '2025-10-02 00:50:04.825081', 'step': 22004, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:04.879165', 'step': 22004, 'epoch': 2}
{'type': 'loss', 'content': 0.035666704177856445, 'timestamp': '2025-10-02 00:50:04.887789', 'step': 22005, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:04.947317', 'step': 22005, 'epoch': 2}
{'type': 'loss', 'content': 0.0009669091086834669, 'timestamp': '2025-10-02 00:50:04.956846', 'step': 22006, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:05.011063', 'step': 22006, 'epoch': 2}
{'type': 'loss', 'content': 0.025988459587097168, 'timestamp': '2025-10-02 00:50:05.020394', 'step': 22007, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:05.076560', 'step': 22007, 'epoch': 2}
{'type': 'loss', 'content': 0.04613330587744713, 'timestamp': '2025-10-02 00:50:05.086928', 'step': 22008, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:05.141413', 'step': 22008, 'epoch': 2}
{'type': 'loss', 'content': 0.003854949725791812, 'timestamp': '2025-10-02 00:50:05.144029', 'step': 22009, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:05.199411', 'step': 22009, 'epoch': 2}
{'type': 'loss', 'content': 0.0822749212384224, 'timestamp': '2025-10-02 00:50:05.201575', 'step': 22010, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:05.255367', 'step': 22010, 'epoch': 2}
{'type': 'loss', 'content': 0.37333178520202637, 'timestamp': '2025-10-02 00:50:05.257767', 'step': 22011, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:05.312178', 'step': 22011, 'epoch': 2}
{'type': 'loss', 'content': 0.030001144856214523, 'timestamp': '2025-10-02 00:50:05.320898', 'step': 22012, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:05.382165', 'step': 22012, 'epoch': 2}
{'type': 'loss', 'content': 0.10236754268407822, 'timestamp': '2025-10-02 00:50:05.384483', 'step': 22013, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:05.438962', 'step': 22013, 'epoch': 2}
{'type': 'loss', 'content': 0.16075235605239868, 'timestamp': '2025-10-02 00:50:05.441090', 'step': 22014, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:05.495092', 'step': 22014, 'epoch': 2}
{'type': 'loss', 'content': 0.12797942757606506, 'timestamp': '2025-10-02 00:50:05.497314', 'step': 22015, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:05.550309', 'step': 22015, 'epoch': 2}
{'type': 'loss', 'content': 0.06328145414590836, 'timestamp': '2025-10-02 00:50:05.556068', 'step': 22016, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:05.609464', 'step': 22016, 'epoch': 2}
{'type': 'loss', 'content': 0.057117216289043427, 'timestamp': '2025-10-02 00:50:05.611767', 'step': 22017, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:05.665821', 'step': 22017, 'epoch': 2}
{'type': 'loss', 'content': 0.17659541964530945, 'timestamp': '2025-10-02 00:50:05.668026', 'step': 22018, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:05.723160', 'step': 22018, 'epoch': 2}
{'type': 'loss', 'content': 0.025436174124479294, 'timestamp': '2025-10-02 00:50:05.725472', 'step': 22019, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:05.780140', 'step': 22019, 'epoch': 2}
{'type': 'loss', 'content': 0.11688584834337234, 'timestamp': '2025-10-02 00:50:05.786565', 'step': 22020, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:05.841349', 'step': 22020, 'epoch': 2}
{'type': 'loss', 'content': 0.0816950798034668, 'timestamp': '2025-10-02 00:50:05.845112', 'step': 22021, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:05.899503', 'step': 22021, 'epoch': 2}
{'type': 'loss', 'content': 0.06440684199333191, 'timestamp': '2025-10-02 00:50:05.902333', 'step': 22022, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:05.959450', 'step': 22022, 'epoch': 2}
{'type': 'loss', 'content': 0.0309835746884346, 'timestamp': '2025-10-02 00:50:05.965367', 'step': 22023, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:06.022127', 'step': 22023, 'epoch': 2}
{'type': 'loss', 'content': 0.03048139065504074, 'timestamp': '2025-10-02 00:50:06.032252', 'step': 22024, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:06.093106', 'step': 22024, 'epoch': 2}
{'type': 'loss', 'content': 0.043704185634851456, 'timestamp': '2025-10-02 00:50:06.104380', 'step': 22025, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:06.166843', 'step': 22025, 'epoch': 2}
{'type': 'loss', 'content': 0.008991911076009274, 'timestamp': '2025-10-02 00:50:06.177329', 'step': 22026, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:06.234178', 'step': 22026, 'epoch': 2}
{'type': 'loss', 'content': 0.004586043301969767, 'timestamp': '2025-10-02 00:50:06.240134', 'step': 22027, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:06.303059', 'step': 22027, 'epoch': 2}
{'type': 'loss', 'content': 0.013706283643841743, 'timestamp': '2025-10-02 00:50:06.314313', 'step': 22028, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:06.370700', 'step': 22028, 'epoch': 2}
{'type': 'loss', 'content': 0.018840843811631203, 'timestamp': '2025-10-02 00:50:06.376795', 'step': 22029, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:06.432052', 'step': 22029, 'epoch': 2}
{'type': 'loss', 'content': 0.1039709746837616, 'timestamp': '2025-10-02 00:50:06.438000', 'step': 22030, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:06.493724', 'step': 22030, 'epoch': 2}
{'type': 'loss', 'content': 0.10079987347126007, 'timestamp': '2025-10-02 00:50:06.497184', 'step': 22031, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:06.552892', 'step': 22031, 'epoch': 2}
{'type': 'loss', 'content': 0.08911195397377014, 'timestamp': '2025-10-02 00:50:06.559489', 'step': 22032, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:06.614849', 'step': 22032, 'epoch': 2}
{'type': 'loss', 'content': 0.023465683683753014, 'timestamp': '2025-10-02 00:50:06.620906', 'step': 22033, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:06.686254', 'step': 22033, 'epoch': 2}
{'type': 'loss', 'content': 0.020786911249160767, 'timestamp': '2025-10-02 00:50:06.696881', 'step': 22034, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:06.752136', 'step': 22034, 'epoch': 2}
{'type': 'loss', 'content': 0.11923660337924957, 'timestamp': '2025-10-02 00:50:06.755411', 'step': 22035, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:06.812467', 'step': 22035, 'epoch': 2}
{'type': 'loss', 'content': 0.018667656928300858, 'timestamp': '2025-10-02 00:50:06.820808', 'step': 22036, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:06.881342', 'step': 22036, 'epoch': 2}
{'type': 'loss', 'content': 0.16233472526073456, 'timestamp': '2025-10-02 00:50:06.884420', 'step': 22037, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:06.941985', 'step': 22037, 'epoch': 2}
{'type': 'loss', 'content': 0.05099561810493469, 'timestamp': '2025-10-02 00:50:06.951508', 'step': 22038, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:07.010326', 'step': 22038, 'epoch': 2}
{'type': 'loss', 'content': 0.014333556406199932, 'timestamp': '2025-10-02 00:50:07.012761', 'step': 22039, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:07.072483', 'step': 22039, 'epoch': 2}
{'type': 'loss', 'content': 0.13146451115608215, 'timestamp': '2025-10-02 00:50:07.079631', 'step': 22040, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:50:07.151116', 'step': 22040, 'epoch': 2}
{'type': 'loss', 'content': 0.03086291439831257, 'timestamp': '2025-10-02 00:50:07.164888', 'step': 22041, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:07.222154', 'step': 22041, 'epoch': 2}
{'type': 'loss', 'content': 0.04544874653220177, 'timestamp': '2025-10-02 00:50:07.231710', 'step': 22042, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:07.305203', 'step': 22042, 'epoch': 2}
{'type': 'loss', 'content': 0.07602023333311081, 'timestamp': '2025-10-02 00:50:07.309449', 'step': 22043, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:07.382498', 'step': 22043, 'epoch': 2}
{'type': 'loss', 'content': 0.015262140892446041, 'timestamp': '2025-10-02 00:50:07.391594', 'step': 22044, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:07.464264', 'step': 22044, 'epoch': 2}
{'type': 'loss', 'content': 0.10839229822158813, 'timestamp': '2025-10-02 00:50:07.474095', 'step': 22045, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:07.568562', 'step': 22045, 'epoch': 2}
{'type': 'loss', 'content': 0.040505945682525635, 'timestamp': '2025-10-02 00:50:07.571539', 'step': 22046, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:07.630893', 'step': 22046, 'epoch': 2}
{'type': 'loss', 'content': 0.13168151676654816, 'timestamp': '2025-10-02 00:50:07.634769', 'step': 22047, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:07.704047', 'step': 22047, 'epoch': 2}
{'type': 'loss', 'content': 0.07485336065292358, 'timestamp': '2025-10-02 00:50:07.715931', 'step': 22048, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:07.785591', 'step': 22048, 'epoch': 2}
{'type': 'loss', 'content': 0.03889380767941475, 'timestamp': '2025-10-02 00:50:07.796533', 'step': 22049, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:07.860464', 'step': 22049, 'epoch': 2}
{'type': 'loss', 'content': 0.12442634254693985, 'timestamp': '2025-10-02 00:50:07.866733', 'step': 22050, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:07.934005', 'step': 22050, 'epoch': 2}
{'type': 'loss', 'content': 0.08658590167760849, 'timestamp': '2025-10-02 00:50:07.936804', 'step': 22051, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:07.997635', 'step': 22051, 'epoch': 2}
{'type': 'loss', 'content': 0.02612120658159256, 'timestamp': '2025-10-02 00:50:08.005436', 'step': 22052, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:08.064380', 'step': 22052, 'epoch': 2}
{'type': 'loss', 'content': 0.031164156273007393, 'timestamp': '2025-10-02 00:50:08.067112', 'step': 22053, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:08.129611', 'step': 22053, 'epoch': 2}
{'type': 'loss', 'content': 0.099171943962574, 'timestamp': '2025-10-02 00:50:08.132973', 'step': 22054, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:08.195167', 'step': 22054, 'epoch': 2}
{'type': 'loss', 'content': 0.06968475133180618, 'timestamp': '2025-10-02 00:50:08.204669', 'step': 22055, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:08.261011', 'step': 22055, 'epoch': 2}
{'type': 'loss', 'content': 0.027813775464892387, 'timestamp': '2025-10-02 00:50:08.271312', 'step': 22056, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:08.336606', 'step': 22056, 'epoch': 2}
{'type': 'loss', 'content': 0.08230312913656235, 'timestamp': '2025-10-02 00:50:08.342558', 'step': 22057, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:08.402625', 'step': 22057, 'epoch': 2}
{'type': 'loss', 'content': 0.04156680032610893, 'timestamp': '2025-10-02 00:50:08.410322', 'step': 22058, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:08.469705', 'step': 22058, 'epoch': 2}
{'type': 'loss', 'content': 0.1521671861410141, 'timestamp': '2025-10-02 00:50:08.472558', 'step': 22059, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:08.538712', 'step': 22059, 'epoch': 2}
{'type': 'loss', 'content': 0.017252078279852867, 'timestamp': '2025-10-02 00:50:08.548294', 'step': 22060, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:08.611822', 'step': 22060, 'epoch': 2}
{'type': 'loss', 'content': 0.13326646387577057, 'timestamp': '2025-10-02 00:50:08.617702', 'step': 22061, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:08.680650', 'step': 22061, 'epoch': 2}
{'type': 'loss', 'content': 0.017889423295855522, 'timestamp': '2025-10-02 00:50:08.690015', 'step': 22062, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:08.759875', 'step': 22062, 'epoch': 2}
{'type': 'loss', 'content': 0.1302369087934494, 'timestamp': '2025-10-02 00:50:08.764536', 'step': 22063, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:08.833221', 'step': 22063, 'epoch': 2}
{'type': 'loss', 'content': 0.04814170300960541, 'timestamp': '2025-10-02 00:50:08.844663', 'step': 22064, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:08.905149', 'step': 22064, 'epoch': 2}
{'type': 'loss', 'content': 0.0728149339556694, 'timestamp': '2025-10-02 00:50:08.908042', 'step': 22065, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:08.968799', 'step': 22065, 'epoch': 2}
{'type': 'loss', 'content': 0.052263662219047546, 'timestamp': '2025-10-02 00:50:08.973596', 'step': 22066, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:09.031561', 'step': 22066, 'epoch': 2}
{'type': 'loss', 'content': 0.09130867570638657, 'timestamp': '2025-10-02 00:50:09.040932', 'step': 22067, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:09.105187', 'step': 22067, 'epoch': 2}
{'type': 'loss', 'content': 0.03962724655866623, 'timestamp': '2025-10-02 00:50:09.112407', 'step': 22068, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:09.167892', 'step': 22068, 'epoch': 2}
{'type': 'loss', 'content': 0.021710749715566635, 'timestamp': '2025-10-02 00:50:09.177561', 'step': 22069, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:09.236197', 'step': 22069, 'epoch': 2}
{'type': 'loss', 'content': 0.05633612722158432, 'timestamp': '2025-10-02 00:50:09.246159', 'step': 22070, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:09.325146', 'step': 22070, 'epoch': 2}
{'type': 'loss', 'content': 0.053066566586494446, 'timestamp': '2025-10-02 00:50:09.328776', 'step': 22071, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:09.400293', 'step': 22071, 'epoch': 2}
{'type': 'loss', 'content': 0.01355423592031002, 'timestamp': '2025-10-02 00:50:09.408738', 'step': 22072, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:09.473886', 'step': 22072, 'epoch': 2}
{'type': 'loss', 'content': 0.04601861536502838, 'timestamp': '2025-10-02 00:50:09.485425', 'step': 22073, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:09.543716', 'step': 22073, 'epoch': 2}
{'type': 'loss', 'content': 0.06998982280492783, 'timestamp': '2025-10-02 00:50:09.549348', 'step': 22074, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:09.611447', 'step': 22074, 'epoch': 2}
{'type': 'loss', 'content': 0.053753599524497986, 'timestamp': '2025-10-02 00:50:09.617541', 'step': 22075, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:09.684616', 'step': 22075, 'epoch': 2}
{'type': 'loss', 'content': 0.0334065705537796, 'timestamp': '2025-10-02 00:50:09.695676', 'step': 22076, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:09.762940', 'step': 22076, 'epoch': 2}
{'type': 'loss', 'content': 0.03675590083003044, 'timestamp': '2025-10-02 00:50:09.768997', 'step': 22077, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:09.839300', 'step': 22077, 'epoch': 2}
{'type': 'loss', 'content': 0.1780930608510971, 'timestamp': '2025-10-02 00:50:09.842239', 'step': 22078, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:09.907622', 'step': 22078, 'epoch': 2}
{'type': 'loss', 'content': 0.09138372540473938, 'timestamp': '2025-10-02 00:50:09.910539', 'step': 22079, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:09.971274', 'step': 22079, 'epoch': 2}
{'type': 'loss', 'content': 0.00272433552891016, 'timestamp': '2025-10-02 00:50:09.982807', 'step': 22080, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:10.040162', 'step': 22080, 'epoch': 2}
{'type': 'loss', 'content': 0.24846652150154114, 'timestamp': '2025-10-02 00:50:10.045962', 'step': 22081, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:10.110111', 'step': 22081, 'epoch': 2}
{'type': 'loss', 'content': 0.11587049812078476, 'timestamp': '2025-10-02 00:50:10.115437', 'step': 22082, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:10.180343', 'step': 22082, 'epoch': 2}
{'type': 'loss', 'content': 0.024663083255290985, 'timestamp': '2025-10-02 00:50:10.189831', 'step': 22083, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:10.248724', 'step': 22083, 'epoch': 2}
{'type': 'loss', 'content': 0.026017915457487106, 'timestamp': '2025-10-02 00:50:10.255021', 'step': 22084, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:10.311132', 'step': 22084, 'epoch': 2}
{'type': 'loss', 'content': 0.05499539524316788, 'timestamp': '2025-10-02 00:50:10.321072', 'step': 22085, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:10.382190', 'step': 22085, 'epoch': 2}
{'type': 'loss', 'content': 0.023309918120503426, 'timestamp': '2025-10-02 00:50:10.386960', 'step': 22086, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:10.448363', 'step': 22086, 'epoch': 2}
{'type': 'loss', 'content': 0.10131457448005676, 'timestamp': '2025-10-02 00:50:10.454546', 'step': 22087, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:10.514297', 'step': 22087, 'epoch': 2}
{'type': 'loss', 'content': 0.0387706495821476, 'timestamp': '2025-10-02 00:50:10.524514', 'step': 22088, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:10.592997', 'step': 22088, 'epoch': 2}
{'type': 'loss', 'content': 0.09795526415109634, 'timestamp': '2025-10-02 00:50:10.598758', 'step': 22089, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:10.662164', 'step': 22089, 'epoch': 2}
{'type': 'loss', 'content': 0.1691148281097412, 'timestamp': '2025-10-02 00:50:10.664727', 'step': 22090, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:10.731274', 'step': 22090, 'epoch': 2}
{'type': 'loss', 'content': 0.045177020132541656, 'timestamp': '2025-10-02 00:50:10.734286', 'step': 22091, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:10.807764', 'step': 22091, 'epoch': 2}
{'type': 'loss', 'content': 0.026075689122080803, 'timestamp': '2025-10-02 00:50:10.819167', 'step': 22092, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:10.881312', 'step': 22092, 'epoch': 2}
{'type': 'loss', 'content': 0.06998386234045029, 'timestamp': '2025-10-02 00:50:10.892047', 'step': 22093, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:10.957054', 'step': 22093, 'epoch': 2}
{'type': 'loss', 'content': 0.07044554501771927, 'timestamp': '2025-10-02 00:50:10.959903', 'step': 22094, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:11.020922', 'step': 22094, 'epoch': 2}
{'type': 'loss', 'content': 0.06491129845380783, 'timestamp': '2025-10-02 00:50:11.032052', 'step': 22095, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:11.103149', 'step': 22095, 'epoch': 2}
{'type': 'loss', 'content': 0.013134301640093327, 'timestamp': '2025-10-02 00:50:11.112308', 'step': 22096, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:11.169113', 'step': 22096, 'epoch': 2}
{'type': 'loss', 'content': 0.007111850660294294, 'timestamp': '2025-10-02 00:50:11.178860', 'step': 22097, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:11.245635', 'step': 22097, 'epoch': 2}
{'type': 'loss', 'content': 0.09502464532852173, 'timestamp': '2025-10-02 00:50:11.249145', 'step': 22098, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:11.320279', 'step': 22098, 'epoch': 2}
{'type': 'loss', 'content': 0.07573643326759338, 'timestamp': '2025-10-02 00:50:11.322534', 'step': 22099, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:11.391584', 'step': 22099, 'epoch': 2}
{'type': 'loss', 'content': 0.040044866502285004, 'timestamp': '2025-10-02 00:50:11.398099', 'step': 22100, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:11.457480', 'step': 22100, 'epoch': 2}
{'type': 'loss', 'content': 0.013743150979280472, 'timestamp': '2025-10-02 00:50:11.463562', 'step': 22101, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:11.525381', 'step': 22101, 'epoch': 2}
{'type': 'loss', 'content': 0.07425029575824738, 'timestamp': '2025-10-02 00:50:11.535571', 'step': 22102, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:11.602416', 'step': 22102, 'epoch': 2}
{'type': 'loss', 'content': 0.05633879825472832, 'timestamp': '2025-10-02 00:50:11.604773', 'step': 22103, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:11.667423', 'step': 22103, 'epoch': 2}
{'type': 'loss', 'content': 0.0699043869972229, 'timestamp': '2025-10-02 00:50:11.676109', 'step': 22104, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:11.739698', 'step': 22104, 'epoch': 2}
{'type': 'loss', 'content': 0.04999278485774994, 'timestamp': '2025-10-02 00:50:11.747267', 'step': 22105, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:11.814196', 'step': 22105, 'epoch': 2}
{'type': 'loss', 'content': 0.021345289424061775, 'timestamp': '2025-10-02 00:50:11.824791', 'step': 22106, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:11.889296', 'step': 22106, 'epoch': 2}
{'type': 'loss', 'content': 0.04181313514709473, 'timestamp': '2025-10-02 00:50:11.893853', 'step': 22107, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:11.957699', 'step': 22107, 'epoch': 2}
{'type': 'loss', 'content': 0.1408444046974182, 'timestamp': '2025-10-02 00:50:11.964978', 'step': 22108, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:12.030133', 'step': 22108, 'epoch': 2}
{'type': 'loss', 'content': 0.03222408890724182, 'timestamp': '2025-10-02 00:50:12.033407', 'step': 22109, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:50:12.117663', 'step': 22109, 'epoch': 2}
{'type': 'loss', 'content': 0.027686672285199165, 'timestamp': '2025-10-02 00:50:12.128536', 'step': 22110, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:12.193911', 'step': 22110, 'epoch': 2}
{'type': 'loss', 'content': 0.11431707441806793, 'timestamp': '2025-10-02 00:50:12.204367', 'step': 22111, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:12.265444', 'step': 22111, 'epoch': 2}
{'type': 'loss', 'content': 0.020640740171074867, 'timestamp': '2025-10-02 00:50:12.273234', 'step': 22112, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:12.374379', 'step': 22112, 'epoch': 2}
{'type': 'loss', 'content': 0.03434012457728386, 'timestamp': '2025-10-02 00:50:12.390458', 'step': 22113, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:12.477246', 'step': 22113, 'epoch': 2}
{'type': 'loss', 'content': 0.12181838601827621, 'timestamp': '2025-10-02 00:50:12.482027', 'step': 22114, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:12.564721', 'step': 22114, 'epoch': 2}
{'type': 'loss', 'content': 0.013710195198655128, 'timestamp': '2025-10-02 00:50:12.567950', 'step': 22115, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:12.634146', 'step': 22115, 'epoch': 2}
{'type': 'loss', 'content': 0.08299580961465836, 'timestamp': '2025-10-02 00:50:12.640679', 'step': 22116, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:12.704045', 'step': 22116, 'epoch': 2}
{'type': 'loss', 'content': 0.03456675633788109, 'timestamp': '2025-10-02 00:50:12.715372', 'step': 22117, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:12.778352', 'step': 22117, 'epoch': 2}
{'type': 'loss', 'content': 0.10552386194467545, 'timestamp': '2025-10-02 00:50:12.780874', 'step': 22118, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:12.843438', 'step': 22118, 'epoch': 2}
{'type': 'loss', 'content': 0.03010771982371807, 'timestamp': '2025-10-02 00:50:12.854079', 'step': 22119, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:12.911512', 'step': 22119, 'epoch': 2}
{'type': 'loss', 'content': 0.027064617723226547, 'timestamp': '2025-10-02 00:50:12.921803', 'step': 22120, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:12.977041', 'step': 22120, 'epoch': 2}
{'type': 'loss', 'content': 0.022314073517918587, 'timestamp': '2025-10-02 00:50:12.980524', 'step': 22121, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:13.035825', 'step': 22121, 'epoch': 2}
{'type': 'loss', 'content': 0.046163659542798996, 'timestamp': '2025-10-02 00:50:13.043469', 'step': 22122, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:13.099682', 'step': 22122, 'epoch': 2}
{'type': 'loss', 'content': 0.024606775492429733, 'timestamp': '2025-10-02 00:50:13.108960', 'step': 22123, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:13.163707', 'step': 22123, 'epoch': 2}
{'type': 'loss', 'content': 0.07826250046491623, 'timestamp': '2025-10-02 00:50:13.170288', 'step': 22124, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:13.224602', 'step': 22124, 'epoch': 2}
{'type': 'loss', 'content': 0.015892919152975082, 'timestamp': '2025-10-02 00:50:13.234510', 'step': 22125, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:13.291138', 'step': 22125, 'epoch': 2}
{'type': 'loss', 'content': 0.03982209786772728, 'timestamp': '2025-10-02 00:50:13.300481', 'step': 22126, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:13.356014', 'step': 22126, 'epoch': 2}
{'type': 'loss', 'content': 0.060769736766815186, 'timestamp': '2025-10-02 00:50:13.361852', 'step': 22127, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:13.416959', 'step': 22127, 'epoch': 2}
{'type': 'loss', 'content': 0.09771349281072617, 'timestamp': '2025-10-02 00:50:13.423768', 'step': 22128, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:13.485271', 'step': 22128, 'epoch': 2}
{'type': 'loss', 'content': 0.03302046284079552, 'timestamp': '2025-10-02 00:50:13.487946', 'step': 22129, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:13.550788', 'step': 22129, 'epoch': 2}
{'type': 'loss', 'content': 0.03714495897293091, 'timestamp': '2025-10-02 00:50:13.560187', 'step': 22130, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:13.616536', 'step': 22130, 'epoch': 2}
{'type': 'loss', 'content': 0.10696011036634445, 'timestamp': '2025-10-02 00:50:13.623047', 'step': 22131, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:13.682811', 'step': 22131, 'epoch': 2}
{'type': 'loss', 'content': 0.0354892872273922, 'timestamp': '2025-10-02 00:50:13.688871', 'step': 22132, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:13.748598', 'step': 22132, 'epoch': 2}
{'type': 'loss', 'content': 0.019064951688051224, 'timestamp': '2025-10-02 00:50:13.755970', 'step': 22133, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:13.831528', 'step': 22133, 'epoch': 2}
{'type': 'loss', 'content': 0.14304225146770477, 'timestamp': '2025-10-02 00:50:13.838821', 'step': 22134, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:13.903256', 'step': 22134, 'epoch': 2}
{'type': 'loss', 'content': 0.05400519445538521, 'timestamp': '2025-10-02 00:50:13.909124', 'step': 22135, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:13.966231', 'step': 22135, 'epoch': 2}
{'type': 'loss', 'content': 0.12822990119457245, 'timestamp': '2025-10-02 00:50:13.972839', 'step': 22136, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:14.028551', 'step': 22136, 'epoch': 2}
{'type': 'loss', 'content': 0.0331403985619545, 'timestamp': '2025-10-02 00:50:14.038115', 'step': 22137, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:14.109662', 'step': 22137, 'epoch': 2}
{'type': 'loss', 'content': 0.0660424456000328, 'timestamp': '2025-10-02 00:50:14.116476', 'step': 22138, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:14.182777', 'step': 22138, 'epoch': 2}
{'type': 'loss', 'content': 0.1071685180068016, 'timestamp': '2025-10-02 00:50:14.193700', 'step': 22139, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:14.267232', 'step': 22139, 'epoch': 2}
{'type': 'loss', 'content': 0.03572661429643631, 'timestamp': '2025-10-02 00:50:14.278203', 'step': 22140, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:14.341901', 'step': 22140, 'epoch': 2}
{'type': 'loss', 'content': 0.05198788642883301, 'timestamp': '2025-10-02 00:50:14.349550', 'step': 22141, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:50:14.431860', 'step': 22141, 'epoch': 2}
{'type': 'loss', 'content': 0.07112932205200195, 'timestamp': '2025-10-02 00:50:14.445671', 'step': 22142, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:14.517664', 'step': 22142, 'epoch': 2}
{'type': 'loss', 'content': 0.012479526922106743, 'timestamp': '2025-10-02 00:50:14.528122', 'step': 22143, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:14.593299', 'step': 22143, 'epoch': 2}
{'type': 'loss', 'content': 0.08898445218801498, 'timestamp': '2025-10-02 00:50:14.602594', 'step': 22144, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:14.668678', 'step': 22144, 'epoch': 2}
{'type': 'loss', 'content': 0.05151623487472534, 'timestamp': '2025-10-02 00:50:14.671905', 'step': 22145, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:14.743130', 'step': 22145, 'epoch': 2}
{'type': 'loss', 'content': 0.1346338391304016, 'timestamp': '2025-10-02 00:50:14.746587', 'step': 22146, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:14.810052', 'step': 22146, 'epoch': 2}
{'type': 'loss', 'content': 0.03453071787953377, 'timestamp': '2025-10-02 00:50:14.819337', 'step': 22147, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:14.876355', 'step': 22147, 'epoch': 2}
{'type': 'loss', 'content': 0.0968627780675888, 'timestamp': '2025-10-02 00:50:14.885260', 'step': 22148, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:14.953471', 'step': 22148, 'epoch': 2}
{'type': 'loss', 'content': 0.11558959633111954, 'timestamp': '2025-10-02 00:50:14.956634', 'step': 22149, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:50:15.021822', 'step': 22149, 'epoch': 2}
{'type': 'loss', 'content': 0.2910864055156708, 'timestamp': '2025-10-02 00:50:15.030274', 'step': 22150, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:15.090451', 'step': 22150, 'epoch': 2}
{'type': 'loss', 'content': 0.021846959367394447, 'timestamp': '2025-10-02 00:50:15.098114', 'step': 22151, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:15.160425', 'step': 22151, 'epoch': 2}
{'type': 'loss', 'content': 0.14679238200187683, 'timestamp': '2025-10-02 00:50:15.166602', 'step': 22152, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:15.223279', 'step': 22152, 'epoch': 2}
{'type': 'loss', 'content': 0.15150189399719238, 'timestamp': '2025-10-02 00:50:15.230710', 'step': 22153, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:15.299509', 'step': 22153, 'epoch': 2}
{'type': 'loss', 'content': 0.018330182880163193, 'timestamp': '2025-10-02 00:50:15.308877', 'step': 22154, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:15.366282', 'step': 22154, 'epoch': 2}
{'type': 'loss', 'content': 0.11171247810125351, 'timestamp': '2025-10-02 00:50:15.369878', 'step': 22155, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:50:15.448711', 'step': 22155, 'epoch': 2}
{'type': 'loss', 'content': 0.04699413478374481, 'timestamp': '2025-10-02 00:50:15.460274', 'step': 22156, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:15.518653', 'step': 22156, 'epoch': 2}
{'type': 'loss', 'content': 0.06474877148866653, 'timestamp': '2025-10-02 00:50:15.522003', 'step': 22157, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:15.578025', 'step': 22157, 'epoch': 2}
{'type': 'loss', 'content': 0.010397873818874359, 'timestamp': '2025-10-02 00:50:15.585578', 'step': 22158, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:15.648053', 'step': 22158, 'epoch': 2}
{'type': 'loss', 'content': 0.03276512026786804, 'timestamp': '2025-10-02 00:50:15.654584', 'step': 22159, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:15.718747', 'step': 22159, 'epoch': 2}
{'type': 'loss', 'content': 0.0482538677752018, 'timestamp': '2025-10-02 00:50:15.724776', 'step': 22160, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:15.782953', 'step': 22160, 'epoch': 2}
{'type': 'loss', 'content': 0.13384471833705902, 'timestamp': '2025-10-02 00:50:15.786406', 'step': 22161, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:15.854727', 'step': 22161, 'epoch': 2}
{'type': 'loss', 'content': 0.012977680191397667, 'timestamp': '2025-10-02 00:50:15.862374', 'step': 22162, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:15.920283', 'step': 22162, 'epoch': 2}
{'type': 'loss', 'content': 0.11252274364233017, 'timestamp': '2025-10-02 00:50:15.922987', 'step': 22163, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:15.992391', 'step': 22163, 'epoch': 2}
{'type': 'loss', 'content': 0.03668862581253052, 'timestamp': '2025-10-02 00:50:15.998319', 'step': 22164, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:16.053362', 'step': 22164, 'epoch': 2}
{'type': 'loss', 'content': 0.057042088359594345, 'timestamp': '2025-10-02 00:50:16.059369', 'step': 22165, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:50:16.114995', 'step': 22165, 'epoch': 2}
{'type': 'loss', 'content': 0.04181212559342384, 'timestamp': '2025-10-02 00:50:16.117838', 'step': 22166, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:16.175436', 'step': 22166, 'epoch': 2}
{'type': 'loss', 'content': 0.04117861017584801, 'timestamp': '2025-10-02 00:50:16.184862', 'step': 22167, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:16.249377', 'step': 22167, 'epoch': 2}
{'type': 'loss', 'content': 0.007537380326539278, 'timestamp': '2025-10-02 00:50:16.260817', 'step': 22168, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:16.316250', 'step': 22168, 'epoch': 2}
{'type': 'loss', 'content': 0.06309644877910614, 'timestamp': '2025-10-02 00:50:16.322207', 'step': 22169, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:16.377439', 'step': 22169, 'epoch': 2}
{'type': 'loss', 'content': 0.10126108676195145, 'timestamp': '2025-10-02 00:50:16.380378', 'step': 22170, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:16.440767', 'step': 22170, 'epoch': 2}
{'type': 'loss', 'content': 0.021647859364748, 'timestamp': '2025-10-02 00:50:16.450914', 'step': 22171, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:16.507352', 'step': 22171, 'epoch': 2}
{'type': 'loss', 'content': 0.0866895318031311, 'timestamp': '2025-10-02 00:50:16.513383', 'step': 22172, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:16.568200', 'step': 22172, 'epoch': 2}
{'type': 'loss', 'content': 0.054808977991342545, 'timestamp': '2025-10-02 00:50:16.571603', 'step': 22173, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:16.626413', 'step': 22173, 'epoch': 2}
{'type': 'loss', 'content': 0.06456689536571503, 'timestamp': '2025-10-02 00:50:16.628673', 'step': 22174, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:16.685272', 'step': 22174, 'epoch': 2}
{'type': 'loss', 'content': 0.16678239405155182, 'timestamp': '2025-10-02 00:50:16.689243', 'step': 22175, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:16.743145', 'step': 22175, 'epoch': 2}
{'type': 'loss', 'content': 0.056554630398750305, 'timestamp': '2025-10-02 00:50:16.750323', 'step': 22176, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:16.805055', 'step': 22176, 'epoch': 2}
{'type': 'loss', 'content': 0.030129818245768547, 'timestamp': '2025-10-02 00:50:16.807867', 'step': 22177, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:16.864101', 'step': 22177, 'epoch': 2}
{'type': 'loss', 'content': 0.08614396303892136, 'timestamp': '2025-10-02 00:50:16.871688', 'step': 22178, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:16.926749', 'step': 22178, 'epoch': 2}
{'type': 'loss', 'content': 0.07062532752752304, 'timestamp': '2025-10-02 00:50:16.929563', 'step': 22179, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:16.986907', 'step': 22179, 'epoch': 2}
{'type': 'loss', 'content': 0.028566258028149605, 'timestamp': '2025-10-02 00:50:16.994036', 'step': 22180, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:17.049125', 'step': 22180, 'epoch': 2}
{'type': 'loss', 'content': 0.12267697602510452, 'timestamp': '2025-10-02 00:50:17.053100', 'step': 22181, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:17.110337', 'step': 22181, 'epoch': 2}
{'type': 'loss', 'content': 0.0679040402173996, 'timestamp': '2025-10-02 00:50:17.112733', 'step': 22182, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:17.169925', 'step': 22182, 'epoch': 2}
{'type': 'loss', 'content': 0.07500844448804855, 'timestamp': '2025-10-02 00:50:17.172387', 'step': 22183, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:17.228035', 'step': 22183, 'epoch': 2}
{'type': 'loss', 'content': 0.031271614134311676, 'timestamp': '2025-10-02 00:50:17.236330', 'step': 22184, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:50:17.306614', 'step': 22184, 'epoch': 2}
{'type': 'loss', 'content': 0.038488246500492096, 'timestamp': '2025-10-02 00:50:17.319995', 'step': 22185, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:17.375070', 'step': 22185, 'epoch': 2}
{'type': 'loss', 'content': 0.04050411283969879, 'timestamp': '2025-10-02 00:50:17.382579', 'step': 22186, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:17.439232', 'step': 22186, 'epoch': 2}
{'type': 'loss', 'content': 0.09678743034601212, 'timestamp': '2025-10-02 00:50:17.443813', 'step': 22187, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:17.500744', 'step': 22187, 'epoch': 2}
{'type': 'loss', 'content': 0.026479879394173622, 'timestamp': '2025-10-02 00:50:17.506934', 'step': 22188, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:17.565536', 'step': 22188, 'epoch': 2}
{'type': 'loss', 'content': 0.00651012547314167, 'timestamp': '2025-10-02 00:50:17.568452', 'step': 22189, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:17.623689', 'step': 22189, 'epoch': 2}
{'type': 'loss', 'content': 0.11352565884590149, 'timestamp': '2025-10-02 00:50:17.626575', 'step': 22190, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:17.684081', 'step': 22190, 'epoch': 2}
{'type': 'loss', 'content': 0.12229078263044357, 'timestamp': '2025-10-02 00:50:17.686811', 'step': 22191, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:17.749699', 'step': 22191, 'epoch': 2}
{'type': 'loss', 'content': 0.03548305854201317, 'timestamp': '2025-10-02 00:50:17.760907', 'step': 22192, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:17.815184', 'step': 22192, 'epoch': 2}
{'type': 'loss', 'content': 0.11426958441734314, 'timestamp': '2025-10-02 00:50:17.817646', 'step': 22193, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:17.873613', 'step': 22193, 'epoch': 2}
{'type': 'loss', 'content': 0.06129514425992966, 'timestamp': '2025-10-02 00:50:17.876301', 'step': 22194, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:17.936555', 'step': 22194, 'epoch': 2}
{'type': 'loss', 'content': 0.08531337231397629, 'timestamp': '2025-10-02 00:50:17.946751', 'step': 22195, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:18.003172', 'step': 22195, 'epoch': 2}
{'type': 'loss', 'content': 0.11013883352279663, 'timestamp': '2025-10-02 00:50:18.009606', 'step': 22196, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:50:18.080700', 'step': 22196, 'epoch': 2}
{'type': 'loss', 'content': 0.03838038444519043, 'timestamp': '2025-10-02 00:50:18.094083', 'step': 22197, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:18.150399', 'step': 22197, 'epoch': 2}
{'type': 'loss', 'content': 0.0494937002658844, 'timestamp': '2025-10-02 00:50:18.159712', 'step': 22198, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:18.213506', 'step': 22198, 'epoch': 2}
{'type': 'loss', 'content': 0.026139745488762856, 'timestamp': '2025-10-02 00:50:18.215940', 'step': 22199, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:18.271854', 'step': 22199, 'epoch': 2}
{'type': 'loss', 'content': 0.0338323600590229, 'timestamp': '2025-10-02 00:50:18.278297', 'step': 22200, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:18.332139', 'step': 22200, 'epoch': 2}
{'type': 'loss', 'content': 0.0706491619348526, 'timestamp': '2025-10-02 00:50:18.334698', 'step': 22201, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:18.389859', 'step': 22201, 'epoch': 2}
{'type': 'loss', 'content': 0.08153974264860153, 'timestamp': '2025-10-02 00:50:18.395965', 'step': 22202, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:18.449750', 'step': 22202, 'epoch': 2}
{'type': 'loss', 'content': 0.15821707248687744, 'timestamp': '2025-10-02 00:50:18.452547', 'step': 22203, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:18.505945', 'step': 22203, 'epoch': 2}
{'type': 'loss', 'content': 0.12928156554698944, 'timestamp': '2025-10-02 00:50:18.511700', 'step': 22204, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:18.564917', 'step': 22204, 'epoch': 2}
{'type': 'loss', 'content': 0.14433063566684723, 'timestamp': '2025-10-02 00:50:18.567391', 'step': 22205, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:18.621393', 'step': 22205, 'epoch': 2}
{'type': 'loss', 'content': 0.06082043796777725, 'timestamp': '2025-10-02 00:50:18.623887', 'step': 22206, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:18.678042', 'step': 22206, 'epoch': 2}
{'type': 'loss', 'content': 0.06238780543208122, 'timestamp': '2025-10-02 00:50:18.680051', 'step': 22207, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:18.734760', 'step': 22207, 'epoch': 2}
{'type': 'loss', 'content': 0.06842643767595291, 'timestamp': '2025-10-02 00:50:18.741146', 'step': 22208, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:18.795351', 'step': 22208, 'epoch': 2}
{'type': 'loss', 'content': 0.08527886867523193, 'timestamp': '2025-10-02 00:50:18.797543', 'step': 22209, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:18.851978', 'step': 22209, 'epoch': 2}
{'type': 'loss', 'content': 0.04887402430176735, 'timestamp': '2025-10-02 00:50:18.857902', 'step': 22210, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:18.914239', 'step': 22210, 'epoch': 2}
{'type': 'loss', 'content': 0.045923199504613876, 'timestamp': '2025-10-02 00:50:18.916791', 'step': 22211, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:18.972265', 'step': 22211, 'epoch': 2}
{'type': 'loss', 'content': 0.02522747591137886, 'timestamp': '2025-10-02 00:50:18.982379', 'step': 22212, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:19.036142', 'step': 22212, 'epoch': 2}
{'type': 'loss', 'content': 0.08616674691438675, 'timestamp': '2025-10-02 00:50:19.038689', 'step': 22213, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:19.093653', 'step': 22213, 'epoch': 2}
{'type': 'loss', 'content': 0.012719321995973587, 'timestamp': '2025-10-02 00:50:19.102997', 'step': 22214, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:19.157806', 'step': 22214, 'epoch': 2}
{'type': 'loss', 'content': 0.1347292810678482, 'timestamp': '2025-10-02 00:50:19.160118', 'step': 22215, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:19.214352', 'step': 22215, 'epoch': 2}
{'type': 'loss', 'content': 0.020030390471220016, 'timestamp': '2025-10-02 00:50:19.220510', 'step': 22216, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:19.274664', 'step': 22216, 'epoch': 2}
{'type': 'loss', 'content': 0.06595447659492493, 'timestamp': '2025-10-02 00:50:19.284933', 'step': 22217, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:19.340227', 'step': 22217, 'epoch': 2}
{'type': 'loss', 'content': 0.06359734386205673, 'timestamp': '2025-10-02 00:50:19.342440', 'step': 22218, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:19.396907', 'step': 22218, 'epoch': 2}
{'type': 'loss', 'content': 0.03932236507534981, 'timestamp': '2025-10-02 00:50:19.401112', 'step': 22219, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:19.458600', 'step': 22219, 'epoch': 2}
{'type': 'loss', 'content': 0.07093723118305206, 'timestamp': '2025-10-02 00:50:19.479984', 'step': 22220, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:19.548491', 'step': 22220, 'epoch': 2}
{'type': 'loss', 'content': 0.10280069708824158, 'timestamp': '2025-10-02 00:50:19.556522', 'step': 22221, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:19.644998', 'step': 22221, 'epoch': 2}
{'type': 'loss', 'content': 0.03655124455690384, 'timestamp': '2025-10-02 00:50:19.648955', 'step': 22222, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:19.747384', 'step': 22222, 'epoch': 2}
{'type': 'loss', 'content': 0.08619458973407745, 'timestamp': '2025-10-02 00:50:19.754321', 'step': 22223, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:19.816083', 'step': 22223, 'epoch': 2}
{'type': 'loss', 'content': 0.03729993849992752, 'timestamp': '2025-10-02 00:50:19.825592', 'step': 22224, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:50:19.904390', 'step': 22224, 'epoch': 2}
{'type': 'loss', 'content': 0.02974516525864601, 'timestamp': '2025-10-02 00:50:19.917939', 'step': 22225, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:19.974189', 'step': 22225, 'epoch': 2}
{'type': 'loss', 'content': 0.1556788980960846, 'timestamp': '2025-10-02 00:50:19.977255', 'step': 22226, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:20.039674', 'step': 22226, 'epoch': 2}
{'type': 'loss', 'content': 0.06862308084964752, 'timestamp': '2025-10-02 00:50:20.046041', 'step': 22227, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:20.109774', 'step': 22227, 'epoch': 2}
{'type': 'loss', 'content': 0.0030462441500276327, 'timestamp': '2025-10-02 00:50:20.115942', 'step': 22228, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:20.179582', 'step': 22228, 'epoch': 2}
{'type': 'loss', 'content': 0.06936601549386978, 'timestamp': '2025-10-02 00:50:20.181896', 'step': 22229, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:20.239099', 'step': 22229, 'epoch': 2}
{'type': 'loss', 'content': 0.07850669324398041, 'timestamp': '2025-10-02 00:50:20.242194', 'step': 22230, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:20.313118', 'step': 22230, 'epoch': 2}
{'type': 'loss', 'content': 0.07734998315572739, 'timestamp': '2025-10-02 00:50:20.323267', 'step': 22231, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:20.385774', 'step': 22231, 'epoch': 2}
{'type': 'loss', 'content': 0.037726398557424545, 'timestamp': '2025-10-02 00:50:20.397784', 'step': 22232, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:20.466445', 'step': 22232, 'epoch': 2}
{'type': 'loss', 'content': 0.0979890450835228, 'timestamp': '2025-10-02 00:50:20.474398', 'step': 22233, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:20.541687', 'step': 22233, 'epoch': 2}
{'type': 'loss', 'content': 0.04655866324901581, 'timestamp': '2025-10-02 00:50:20.548079', 'step': 22234, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:20.610887', 'step': 22234, 'epoch': 2}
{'type': 'loss', 'content': 0.003344896947965026, 'timestamp': '2025-10-02 00:50:20.614407', 'step': 22235, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:20.673874', 'step': 22235, 'epoch': 2}
{'type': 'loss', 'content': 0.0802605152130127, 'timestamp': '2025-10-02 00:50:20.680384', 'step': 22236, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:20.749888', 'step': 22236, 'epoch': 2}
{'type': 'loss', 'content': 0.019299620762467384, 'timestamp': '2025-10-02 00:50:20.759942', 'step': 22237, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:20.830787', 'step': 22237, 'epoch': 2}
{'type': 'loss', 'content': 0.029689336195588112, 'timestamp': '2025-10-02 00:50:20.840312', 'step': 22238, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:20.902825', 'step': 22238, 'epoch': 2}
{'type': 'loss', 'content': 0.03036721795797348, 'timestamp': '2025-10-02 00:50:20.909447', 'step': 22239, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:20.980112', 'step': 22239, 'epoch': 2}
{'type': 'loss', 'content': 0.09286382049322128, 'timestamp': '2025-10-02 00:50:20.991048', 'step': 22240, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:21.051921', 'step': 22240, 'epoch': 2}
{'type': 'loss', 'content': 0.08729510754346848, 'timestamp': '2025-10-02 00:50:21.058845', 'step': 22241, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:21.124177', 'step': 22241, 'epoch': 2}
{'type': 'loss', 'content': 0.11104778200387955, 'timestamp': '2025-10-02 00:50:21.127840', 'step': 22242, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:50:21.212345', 'step': 22242, 'epoch': 2}
{'type': 'loss', 'content': 0.003552141599357128, 'timestamp': '2025-10-02 00:50:21.224328', 'step': 22243, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:21.285772', 'step': 22243, 'epoch': 2}
{'type': 'loss', 'content': 0.0879921168088913, 'timestamp': '2025-10-02 00:50:21.296166', 'step': 22244, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:21.361736', 'step': 22244, 'epoch': 2}
{'type': 'loss', 'content': 0.048520803451538086, 'timestamp': '2025-10-02 00:50:21.368587', 'step': 22245, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:21.432508', 'step': 22245, 'epoch': 2}
{'type': 'loss', 'content': 0.029975054785609245, 'timestamp': '2025-10-02 00:50:21.434538', 'step': 22246, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:21.490697', 'step': 22246, 'epoch': 2}
{'type': 'loss', 'content': 0.015311663039028645, 'timestamp': '2025-10-02 00:50:21.497447', 'step': 22247, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:21.573167', 'step': 22247, 'epoch': 2}
{'type': 'loss', 'content': 0.030309045687317848, 'timestamp': '2025-10-02 00:50:21.584625', 'step': 22248, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:21.641612', 'step': 22248, 'epoch': 2}
{'type': 'loss', 'content': 0.14575336873531342, 'timestamp': '2025-10-02 00:50:21.647569', 'step': 22249, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:21.713993', 'step': 22249, 'epoch': 2}
{'type': 'loss', 'content': 0.009037867188453674, 'timestamp': '2025-10-02 00:50:21.721624', 'step': 22250, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:21.786069', 'step': 22250, 'epoch': 2}
{'type': 'loss', 'content': 0.015234825201332569, 'timestamp': '2025-10-02 00:50:21.796704', 'step': 22251, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:21.864337', 'step': 22251, 'epoch': 2}
{'type': 'loss', 'content': 0.03063633106648922, 'timestamp': '2025-10-02 00:50:21.876173', 'step': 22252, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:21.936780', 'step': 22252, 'epoch': 2}
{'type': 'loss', 'content': 0.03518011420965195, 'timestamp': '2025-10-02 00:50:21.947044', 'step': 22253, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:22.015296', 'step': 22253, 'epoch': 2}
{'type': 'loss', 'content': 0.09300900995731354, 'timestamp': '2025-10-02 00:50:22.022516', 'step': 22254, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:22.088392', 'step': 22254, 'epoch': 2}
{'type': 'loss', 'content': 0.012304098345339298, 'timestamp': '2025-10-02 00:50:22.095913', 'step': 22255, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:22.164107', 'step': 22255, 'epoch': 2}
{'type': 'loss', 'content': 0.012704337015748024, 'timestamp': '2025-10-02 00:50:22.173587', 'step': 22256, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:22.230172', 'step': 22256, 'epoch': 2}
{'type': 'loss', 'content': 0.06776490062475204, 'timestamp': '2025-10-02 00:50:22.235963', 'step': 22257, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:22.303053', 'step': 22257, 'epoch': 2}
{'type': 'loss', 'content': 0.041228387504816055, 'timestamp': '2025-10-02 00:50:22.307225', 'step': 22258, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:22.367704', 'step': 22258, 'epoch': 2}
{'type': 'loss', 'content': 0.08924955874681473, 'timestamp': '2025-10-02 00:50:22.374702', 'step': 22259, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:22.443321', 'step': 22259, 'epoch': 2}
{'type': 'loss', 'content': 0.041912999004125595, 'timestamp': '2025-10-02 00:50:22.449261', 'step': 22260, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:22.517803', 'step': 22260, 'epoch': 2}
{'type': 'loss', 'content': 0.05237497389316559, 'timestamp': '2025-10-02 00:50:22.524025', 'step': 22261, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:22.594380', 'step': 22261, 'epoch': 2}
{'type': 'loss', 'content': 0.03429087623953819, 'timestamp': '2025-10-02 00:50:22.598880', 'step': 22262, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:22.657848', 'step': 22262, 'epoch': 2}
{'type': 'loss', 'content': 0.0699455738067627, 'timestamp': '2025-10-02 00:50:22.661122', 'step': 22263, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:22.729909', 'step': 22263, 'epoch': 2}
{'type': 'loss', 'content': 0.072395920753479, 'timestamp': '2025-10-02 00:50:22.738386', 'step': 22264, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:22.806444', 'step': 22264, 'epoch': 2}
{'type': 'loss', 'content': 0.05025945231318474, 'timestamp': '2025-10-02 00:50:22.816406', 'step': 22265, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:22.875493', 'step': 22265, 'epoch': 2}
{'type': 'loss', 'content': 0.07798263430595398, 'timestamp': '2025-10-02 00:50:22.879871', 'step': 22266, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:22.941930', 'step': 22266, 'epoch': 2}
{'type': 'loss', 'content': 0.004001940134912729, 'timestamp': '2025-10-02 00:50:22.945696', 'step': 22267, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:23.011910', 'step': 22267, 'epoch': 2}
{'type': 'loss', 'content': 0.03445328772068024, 'timestamp': '2025-10-02 00:50:23.020762', 'step': 22268, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:23.084593', 'step': 22268, 'epoch': 2}
{'type': 'loss', 'content': 0.13224920630455017, 'timestamp': '2025-10-02 00:50:23.088477', 'step': 22269, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:23.176775', 'step': 22269, 'epoch': 2}
{'type': 'loss', 'content': 0.04386784881353378, 'timestamp': '2025-10-02 00:50:23.187273', 'step': 22270, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:23.246019', 'step': 22270, 'epoch': 2}
{'type': 'loss', 'content': 0.06380128860473633, 'timestamp': '2025-10-02 00:50:23.248547', 'step': 22271, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:23.309193', 'step': 22271, 'epoch': 2}
{'type': 'loss', 'content': 0.030196480453014374, 'timestamp': '2025-10-02 00:50:23.320173', 'step': 22272, 'epoch': 2}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:50:50.803360', 'step': 22272, 'epoch': 2}
{'type': 'pplx', 'content': 89.6902402487006, 'timestamp': '2025-10-02 00:50:50.807546', 'step': 22272, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:50.863227', 'step': 22272, 'epoch': 2}
{'type': 'loss', 'content': 0.022111663594841957, 'timestamp': '2025-10-02 00:50:50.868479', 'step': 22273, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:50.924133', 'step': 22273, 'epoch': 2}
{'type': 'loss', 'content': 0.14555728435516357, 'timestamp': '2025-10-02 00:50:50.926392', 'step': 22274, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:50.981071', 'step': 22274, 'epoch': 2}
{'type': 'loss', 'content': 0.06285054981708527, 'timestamp': '2025-10-02 00:50:50.983281', 'step': 22275, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:51.038360', 'step': 22275, 'epoch': 2}
{'type': 'loss', 'content': 0.008699939586222172, 'timestamp': '2025-10-02 00:50:51.044547', 'step': 22276, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:51.100103', 'step': 22276, 'epoch': 2}
{'type': 'loss', 'content': 0.0926516130566597, 'timestamp': '2025-10-02 00:50:51.102300', 'step': 22277, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:51.164993', 'step': 22277, 'epoch': 2}
{'type': 'loss', 'content': 0.0115231703966856, 'timestamp': '2025-10-02 00:50:51.175651', 'step': 22278, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:51.230851', 'step': 22278, 'epoch': 2}
{'type': 'loss', 'content': 0.06063103675842285, 'timestamp': '2025-10-02 00:50:51.233265', 'step': 22279, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:51.290126', 'step': 22279, 'epoch': 2}
{'type': 'loss', 'content': 0.07015763223171234, 'timestamp': '2025-10-02 00:50:51.296355', 'step': 22280, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:51.350490', 'step': 22280, 'epoch': 2}
{'type': 'loss', 'content': 0.07130306959152222, 'timestamp': '2025-10-02 00:50:51.352775', 'step': 22281, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:51.407509', 'step': 22281, 'epoch': 2}
{'type': 'loss', 'content': 0.11217008531093597, 'timestamp': '2025-10-02 00:50:51.410132', 'step': 22282, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:51.465149', 'step': 22282, 'epoch': 2}
{'type': 'loss', 'content': 0.0475931279361248, 'timestamp': '2025-10-02 00:50:51.467349', 'step': 22283, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:51.521629', 'step': 22283, 'epoch': 2}
{'type': 'loss', 'content': 0.0367390401661396, 'timestamp': '2025-10-02 00:50:51.527439', 'step': 22284, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:51.583015', 'step': 22284, 'epoch': 2}
{'type': 'loss', 'content': 0.047058749943971634, 'timestamp': '2025-10-02 00:50:51.585628', 'step': 22285, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:51.640855', 'step': 22285, 'epoch': 2}
{'type': 'loss', 'content': 0.14612266421318054, 'timestamp': '2025-10-02 00:50:51.643769', 'step': 22286, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:51.706608', 'step': 22286, 'epoch': 2}
{'type': 'loss', 'content': 0.019229426980018616, 'timestamp': '2025-10-02 00:50:51.717242', 'step': 22287, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:50:51.780511', 'step': 22287, 'epoch': 2}
{'type': 'loss', 'content': 0.013608698733150959, 'timestamp': '2025-10-02 00:50:51.792161', 'step': 22288, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:51.847301', 'step': 22288, 'epoch': 2}
{'type': 'loss', 'content': 0.10365339368581772, 'timestamp': '2025-10-02 00:50:51.849747', 'step': 22289, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:51.905727', 'step': 22289, 'epoch': 2}
{'type': 'loss', 'content': 0.02040278911590576, 'timestamp': '2025-10-02 00:50:51.913432', 'step': 22290, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:51.969182', 'step': 22290, 'epoch': 2}
{'type': 'loss', 'content': 0.04239429160952568, 'timestamp': '2025-10-02 00:50:51.978524', 'step': 22291, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:52.035699', 'step': 22291, 'epoch': 2}
{'type': 'loss', 'content': 0.0645141676068306, 'timestamp': '2025-10-02 00:50:52.041734', 'step': 22292, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:52.096489', 'step': 22292, 'epoch': 2}
{'type': 'loss', 'content': 0.022075830027461052, 'timestamp': '2025-10-02 00:50:52.098579', 'step': 22293, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:52.153302', 'step': 22293, 'epoch': 2}
{'type': 'loss', 'content': 0.03958551585674286, 'timestamp': '2025-10-02 00:50:52.155357', 'step': 22294, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:52.210326', 'step': 22294, 'epoch': 2}
{'type': 'loss', 'content': 0.06772133708000183, 'timestamp': '2025-10-02 00:50:52.216194', 'step': 22295, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 112], 'flops': 560003483248.0}, 'timestamp': '2025-10-02 00:50:52.270379', 'step': 22295, 'epoch': 2}
{'type': 'loss', 'content': 0.029100749641656876, 'timestamp': '2025-10-02 00:50:52.276321', 'step': 22296, 'epoch': 2}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:52.330872', 'step': 22296, 'epoch': 3}
{'type': 'loss', 'content': 0.024832159280776978, 'timestamp': '2025-10-02 00:50:52.340373', 'step': 22297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:52.395839', 'step': 22297, 'epoch': 3}
{'type': 'loss', 'content': 0.0644736960530281, 'timestamp': '2025-10-02 00:50:52.398414', 'step': 22298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:52.453386', 'step': 22298, 'epoch': 3}
{'type': 'loss', 'content': 0.13973617553710938, 'timestamp': '2025-10-02 00:50:52.455444', 'step': 22299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:52.509630', 'step': 22299, 'epoch': 3}
{'type': 'loss', 'content': 0.041179172694683075, 'timestamp': '2025-10-02 00:50:52.515613', 'step': 22300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:52.570520', 'step': 22300, 'epoch': 3}
{'type': 'loss', 'content': 0.04952184110879898, 'timestamp': '2025-10-02 00:50:52.580794', 'step': 22301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:52.636129', 'step': 22301, 'epoch': 3}
{'type': 'loss', 'content': 0.058843497186899185, 'timestamp': '2025-10-02 00:50:52.638750', 'step': 22302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:52.694019', 'step': 22302, 'epoch': 3}
{'type': 'loss', 'content': 0.034364886581897736, 'timestamp': '2025-10-02 00:50:52.701554', 'step': 22303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:52.755857', 'step': 22303, 'epoch': 3}
{'type': 'loss', 'content': 0.020479822531342506, 'timestamp': '2025-10-02 00:50:52.761489', 'step': 22304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:52.815593', 'step': 22304, 'epoch': 3}
{'type': 'loss', 'content': 0.09025079011917114, 'timestamp': '2025-10-02 00:50:52.817751', 'step': 22305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:52.874130', 'step': 22305, 'epoch': 3}
{'type': 'loss', 'content': 0.08852118998765945, 'timestamp': '2025-10-02 00:50:52.876408', 'step': 22306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:52.930692', 'step': 22306, 'epoch': 3}
{'type': 'loss', 'content': 0.056803103536367416, 'timestamp': '2025-10-02 00:50:52.934041', 'step': 22307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:52.989828', 'step': 22307, 'epoch': 3}
{'type': 'loss', 'content': 0.07593335211277008, 'timestamp': '2025-10-02 00:50:52.995733', 'step': 22308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:53.049749', 'step': 22308, 'epoch': 3}
{'type': 'loss', 'content': 0.0543486662209034, 'timestamp': '2025-10-02 00:50:53.059997', 'step': 22309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:53.123107', 'step': 22309, 'epoch': 3}
{'type': 'loss', 'content': 0.002153141191229224, 'timestamp': '2025-10-02 00:50:53.133752', 'step': 22310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:53.187870', 'step': 22310, 'epoch': 3}
{'type': 'loss', 'content': 0.04425332695245743, 'timestamp': '2025-10-02 00:50:53.195251', 'step': 22311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:53.249696', 'step': 22311, 'epoch': 3}
{'type': 'loss', 'content': 0.1468847244977951, 'timestamp': '2025-10-02 00:50:53.256353', 'step': 22312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:53.309958', 'step': 22312, 'epoch': 3}
{'type': 'loss', 'content': 0.03381757810711861, 'timestamp': '2025-10-02 00:50:53.312476', 'step': 22313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:53.371659', 'step': 22313, 'epoch': 3}
{'type': 'loss', 'content': 0.01763932779431343, 'timestamp': '2025-10-02 00:50:53.373619', 'step': 22314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:53.428732', 'step': 22314, 'epoch': 3}
{'type': 'loss', 'content': 0.015425634570419788, 'timestamp': '2025-10-02 00:50:53.431393', 'step': 22315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:53.485912', 'step': 22315, 'epoch': 3}
{'type': 'loss', 'content': 0.07632691413164139, 'timestamp': '2025-10-02 00:50:53.491974', 'step': 22316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:53.547345', 'step': 22316, 'epoch': 3}
{'type': 'loss', 'content': 0.06672791391611099, 'timestamp': '2025-10-02 00:50:53.550687', 'step': 22317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:53.607797', 'step': 22317, 'epoch': 3}
{'type': 'loss', 'content': 0.06377661228179932, 'timestamp': '2025-10-02 00:50:53.610059', 'step': 22318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:53.666208', 'step': 22318, 'epoch': 3}
{'type': 'loss', 'content': 0.023203842341899872, 'timestamp': '2025-10-02 00:50:53.674150', 'step': 22319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:53.729593', 'step': 22319, 'epoch': 3}
{'type': 'loss', 'content': 0.009426884353160858, 'timestamp': '2025-10-02 00:50:53.735448', 'step': 22320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:53.789720', 'step': 22320, 'epoch': 3}
{'type': 'loss', 'content': 0.05947897955775261, 'timestamp': '2025-10-02 00:50:53.792304', 'step': 22321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:53.847345', 'step': 22321, 'epoch': 3}
{'type': 'loss', 'content': 0.003542433027178049, 'timestamp': '2025-10-02 00:50:53.853217', 'step': 22322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:53.908966', 'step': 22322, 'epoch': 3}
{'type': 'loss', 'content': 0.05274267867207527, 'timestamp': '2025-10-02 00:50:53.911214', 'step': 22323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:53.966181', 'step': 22323, 'epoch': 3}
{'type': 'loss', 'content': 0.13277499377727509, 'timestamp': '2025-10-02 00:50:53.972527', 'step': 22324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:54.027892', 'step': 22324, 'epoch': 3}
{'type': 'loss', 'content': 0.023973513394594193, 'timestamp': '2025-10-02 00:50:54.030815', 'step': 22325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:54.086628', 'step': 22325, 'epoch': 3}
{'type': 'loss', 'content': 0.06916727870702744, 'timestamp': '2025-10-02 00:50:54.089064', 'step': 22326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:54.145008', 'step': 22326, 'epoch': 3}
{'type': 'loss', 'content': 0.06710542738437653, 'timestamp': '2025-10-02 00:50:54.147135', 'step': 22327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:54.209170', 'step': 22327, 'epoch': 3}
{'type': 'loss', 'content': 0.06409081071615219, 'timestamp': '2025-10-02 00:50:54.220542', 'step': 22328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:54.277122', 'step': 22328, 'epoch': 3}
{'type': 'loss', 'content': 0.026119455695152283, 'timestamp': '2025-10-02 00:50:54.279517', 'step': 22329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:54.335316', 'step': 22329, 'epoch': 3}
{'type': 'loss', 'content': 0.08700147271156311, 'timestamp': '2025-10-02 00:50:54.337637', 'step': 22330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:50:54.392336', 'step': 22330, 'epoch': 3}
{'type': 'loss', 'content': 0.06551385670900345, 'timestamp': '2025-10-02 00:50:54.394567', 'step': 22331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:54.449377', 'step': 22331, 'epoch': 3}
{'type': 'loss', 'content': 0.09528753906488419, 'timestamp': '2025-10-02 00:50:54.454956', 'step': 22332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:54.508663', 'step': 22332, 'epoch': 3}
{'type': 'loss', 'content': 0.06595304608345032, 'timestamp': '2025-10-02 00:50:54.511035', 'step': 22333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:54.566060', 'step': 22333, 'epoch': 3}
{'type': 'loss', 'content': 0.08755964040756226, 'timestamp': '2025-10-02 00:50:54.571695', 'step': 22334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:50:54.642844', 'step': 22334, 'epoch': 3}
{'type': 'loss', 'content': 0.0050392248667776585, 'timestamp': '2025-10-02 00:50:54.655184', 'step': 22335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:54.710676', 'step': 22335, 'epoch': 3}
{'type': 'loss', 'content': 0.08064999431371689, 'timestamp': '2025-10-02 00:50:54.720710', 'step': 22336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:54.775632', 'step': 22336, 'epoch': 3}
{'type': 'loss', 'content': 0.03761141747236252, 'timestamp': '2025-10-02 00:50:54.777865', 'step': 22337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:54.842101', 'step': 22337, 'epoch': 3}
{'type': 'loss', 'content': 0.08074833452701569, 'timestamp': '2025-10-02 00:50:54.844618', 'step': 22338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:50:54.912910', 'step': 22338, 'epoch': 3}
{'type': 'loss', 'content': 0.0318245105445385, 'timestamp': '2025-10-02 00:50:54.924814', 'step': 22339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:54.981072', 'step': 22339, 'epoch': 3}
{'type': 'loss', 'content': 0.08261130750179291, 'timestamp': '2025-10-02 00:50:54.987168', 'step': 22340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:55.042117', 'step': 22340, 'epoch': 3}
{'type': 'loss', 'content': 0.052384935319423676, 'timestamp': '2025-10-02 00:50:55.044176', 'step': 22341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:55.100137', 'step': 22341, 'epoch': 3}
{'type': 'loss', 'content': 0.06696396321058273, 'timestamp': '2025-10-02 00:50:55.107182', 'step': 22342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:55.164405', 'step': 22342, 'epoch': 3}
{'type': 'loss', 'content': 0.02904663421213627, 'timestamp': '2025-10-02 00:50:55.166836', 'step': 22343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:55.223040', 'step': 22343, 'epoch': 3}
{'type': 'loss', 'content': 0.027912117540836334, 'timestamp': '2025-10-02 00:50:55.228738', 'step': 22344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:55.283133', 'step': 22344, 'epoch': 3}
{'type': 'loss', 'content': 0.06068415567278862, 'timestamp': '2025-10-02 00:50:55.293082', 'step': 22345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:55.349358', 'step': 22345, 'epoch': 3}
{'type': 'loss', 'content': 0.04574083536863327, 'timestamp': '2025-10-02 00:50:55.352355', 'step': 22346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:55.413109', 'step': 22346, 'epoch': 3}
{'type': 'loss', 'content': 0.12029764801263809, 'timestamp': '2025-10-02 00:50:55.415752', 'step': 22347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:55.471552', 'step': 22347, 'epoch': 3}
{'type': 'loss', 'content': 0.014146977104246616, 'timestamp': '2025-10-02 00:50:55.478533', 'step': 22348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:55.538491', 'step': 22348, 'epoch': 3}
{'type': 'loss', 'content': 0.09434476494789124, 'timestamp': '2025-10-02 00:50:55.541287', 'step': 22349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:55.606691', 'step': 22349, 'epoch': 3}
{'type': 'loss', 'content': 0.021197015419602394, 'timestamp': '2025-10-02 00:50:55.617345', 'step': 22350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:55.674764', 'step': 22350, 'epoch': 3}
{'type': 'loss', 'content': 0.031250208616256714, 'timestamp': '2025-10-02 00:50:55.677391', 'step': 22351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:55.736477', 'step': 22351, 'epoch': 3}
{'type': 'loss', 'content': 0.009607754647731781, 'timestamp': '2025-10-02 00:50:55.746574', 'step': 22352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:55.805083', 'step': 22352, 'epoch': 3}
{'type': 'loss', 'content': 0.03971477970480919, 'timestamp': '2025-10-02 00:50:55.808601', 'step': 22353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:55.868052', 'step': 22353, 'epoch': 3}
{'type': 'loss', 'content': 0.03218565136194229, 'timestamp': '2025-10-02 00:50:55.871452', 'step': 22354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:55.927781', 'step': 22354, 'epoch': 3}
{'type': 'loss', 'content': 0.053962595760822296, 'timestamp': '2025-10-02 00:50:55.930376', 'step': 22355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:55.987011', 'step': 22355, 'epoch': 3}
{'type': 'loss', 'content': 0.08493375778198242, 'timestamp': '2025-10-02 00:50:55.993835', 'step': 22356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:50:56.051506', 'step': 22356, 'epoch': 3}
{'type': 'loss', 'content': 0.049734316766262054, 'timestamp': '2025-10-02 00:50:56.054445', 'step': 22357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:56.112853', 'step': 22357, 'epoch': 3}
{'type': 'loss', 'content': 0.02847317047417164, 'timestamp': '2025-10-02 00:50:56.122133', 'step': 22358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:56.181698', 'step': 22358, 'epoch': 3}
{'type': 'loss', 'content': 0.0510491319000721, 'timestamp': '2025-10-02 00:50:56.190693', 'step': 22359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:56.251511', 'step': 22359, 'epoch': 3}
{'type': 'loss', 'content': 0.07561860233545303, 'timestamp': '2025-10-02 00:50:56.257578', 'step': 22360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:56.311156', 'step': 22360, 'epoch': 3}
{'type': 'loss', 'content': 0.12379299849271774, 'timestamp': '2025-10-02 00:50:56.313453', 'step': 22361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:56.368439', 'step': 22361, 'epoch': 3}
{'type': 'loss', 'content': 0.02202269807457924, 'timestamp': '2025-10-02 00:50:56.375232', 'step': 22362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:56.432638', 'step': 22362, 'epoch': 3}
{'type': 'loss', 'content': 0.07698474824428558, 'timestamp': '2025-10-02 00:50:56.439815', 'step': 22363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:56.496106', 'step': 22363, 'epoch': 3}
{'type': 'loss', 'content': 0.1596824824810028, 'timestamp': '2025-10-02 00:50:56.502281', 'step': 22364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:56.558107', 'step': 22364, 'epoch': 3}
{'type': 'loss', 'content': 0.09715182334184647, 'timestamp': '2025-10-02 00:50:56.560854', 'step': 22365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:56.620882', 'step': 22365, 'epoch': 3}
{'type': 'loss', 'content': 0.1351836919784546, 'timestamp': '2025-10-02 00:50:56.625924', 'step': 22366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:56.683117', 'step': 22366, 'epoch': 3}
{'type': 'loss', 'content': 0.01579359918832779, 'timestamp': '2025-10-02 00:50:56.688523', 'step': 22367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:56.748574', 'step': 22367, 'epoch': 3}
{'type': 'loss', 'content': 0.0535980686545372, 'timestamp': '2025-10-02 00:50:56.755362', 'step': 22368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:56.813294', 'step': 22368, 'epoch': 3}
{'type': 'loss', 'content': 0.014615180902183056, 'timestamp': '2025-10-02 00:50:56.816464', 'step': 22369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:56.873820', 'step': 22369, 'epoch': 3}
{'type': 'loss', 'content': 0.05217529088258743, 'timestamp': '2025-10-02 00:50:56.876943', 'step': 22370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:56.946379', 'step': 22370, 'epoch': 3}
{'type': 'loss', 'content': 0.009599355980753899, 'timestamp': '2025-10-02 00:50:56.955895', 'step': 22371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:57.014471', 'step': 22371, 'epoch': 3}
{'type': 'loss', 'content': 0.13645146787166595, 'timestamp': '2025-10-02 00:50:57.023815', 'step': 22372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:57.082104', 'step': 22372, 'epoch': 3}
{'type': 'loss', 'content': 0.07802163064479828, 'timestamp': '2025-10-02 00:50:57.087736', 'step': 22373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:57.148084', 'step': 22373, 'epoch': 3}
{'type': 'loss', 'content': 0.02702246978878975, 'timestamp': '2025-10-02 00:50:57.158201', 'step': 22374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:57.215951', 'step': 22374, 'epoch': 3}
{'type': 'loss', 'content': 0.06724760681390762, 'timestamp': '2025-10-02 00:50:57.218913', 'step': 22375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:57.278352', 'step': 22375, 'epoch': 3}
{'type': 'loss', 'content': 0.11129900813102722, 'timestamp': '2025-10-02 00:50:57.284951', 'step': 22376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:57.341807', 'step': 22376, 'epoch': 3}
{'type': 'loss', 'content': 0.16965612769126892, 'timestamp': '2025-10-02 00:50:57.345386', 'step': 22377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:57.409194', 'step': 22377, 'epoch': 3}
{'type': 'loss', 'content': 0.020791424438357353, 'timestamp': '2025-10-02 00:50:57.419370', 'step': 22378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:57.477091', 'step': 22378, 'epoch': 3}
{'type': 'loss', 'content': 0.15012314915657043, 'timestamp': '2025-10-02 00:50:57.480411', 'step': 22379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:57.539814', 'step': 22379, 'epoch': 3}
{'type': 'loss', 'content': 0.042525116354227066, 'timestamp': '2025-10-02 00:50:57.546176', 'step': 22380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:57.601931', 'step': 22380, 'epoch': 3}
{'type': 'loss', 'content': 0.06806647032499313, 'timestamp': '2025-10-02 00:50:57.604975', 'step': 22381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:50:57.663667', 'step': 22381, 'epoch': 3}
{'type': 'loss', 'content': 0.07742307335138321, 'timestamp': '2025-10-02 00:50:57.673126', 'step': 22382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:57.731071', 'step': 22382, 'epoch': 3}
{'type': 'loss', 'content': 0.041033871471881866, 'timestamp': '2025-10-02 00:50:57.733820', 'step': 22383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:57.789411', 'step': 22383, 'epoch': 3}
{'type': 'loss', 'content': 0.018859686329960823, 'timestamp': '2025-10-02 00:50:57.795421', 'step': 22384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:57.853067', 'step': 22384, 'epoch': 3}
{'type': 'loss', 'content': 0.0010869564721360803, 'timestamp': '2025-10-02 00:50:57.860442', 'step': 22385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:57.916516', 'step': 22385, 'epoch': 3}
{'type': 'loss', 'content': 0.01681639440357685, 'timestamp': '2025-10-02 00:50:57.918721', 'step': 22386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:57.975430', 'step': 22386, 'epoch': 3}
{'type': 'loss', 'content': 0.05329198017716408, 'timestamp': '2025-10-02 00:50:57.978503', 'step': 22387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:58.033406', 'step': 22387, 'epoch': 3}
{'type': 'loss', 'content': 0.05170295387506485, 'timestamp': '2025-10-02 00:50:58.043172', 'step': 22388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:58.098298', 'step': 22388, 'epoch': 3}
{'type': 'loss', 'content': 0.0044157179072499275, 'timestamp': '2025-10-02 00:50:58.101250', 'step': 22389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:58.157520', 'step': 22389, 'epoch': 3}
{'type': 'loss', 'content': 0.01696167141199112, 'timestamp': '2025-10-02 00:50:58.166805', 'step': 22390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:58.222530', 'step': 22390, 'epoch': 3}
{'type': 'loss', 'content': 0.07130282372236252, 'timestamp': '2025-10-02 00:50:58.227959', 'step': 22391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:50:58.285136', 'step': 22391, 'epoch': 3}
{'type': 'loss', 'content': 0.10529842972755432, 'timestamp': '2025-10-02 00:50:58.291113', 'step': 22392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:58.346660', 'step': 22392, 'epoch': 3}
{'type': 'loss', 'content': 0.047299809753894806, 'timestamp': '2025-10-02 00:50:58.355842', 'step': 22393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:50:58.411892', 'step': 22393, 'epoch': 3}
{'type': 'loss', 'content': 0.0157981738448143, 'timestamp': '2025-10-02 00:50:58.414436', 'step': 22394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:58.471216', 'step': 22394, 'epoch': 3}
{'type': 'loss', 'content': 0.04401635378599167, 'timestamp': '2025-10-02 00:50:58.473822', 'step': 22395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:58.529742', 'step': 22395, 'epoch': 3}
{'type': 'loss', 'content': 0.06346557289361954, 'timestamp': '2025-10-02 00:50:58.536152', 'step': 22396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:50:58.591340', 'step': 22396, 'epoch': 3}
{'type': 'loss', 'content': 0.08746291697025299, 'timestamp': '2025-10-02 00:50:58.593897', 'step': 22397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:58.648797', 'step': 22397, 'epoch': 3}
{'type': 'loss', 'content': 0.036864250898361206, 'timestamp': '2025-10-02 00:50:58.651339', 'step': 22398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:50:58.706261', 'step': 22398, 'epoch': 3}
{'type': 'loss', 'content': 0.10826003551483154, 'timestamp': '2025-10-02 00:50:58.708148', 'step': 22399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:50:58.770737', 'step': 22399, 'epoch': 3}
{'type': 'loss', 'content': 0.027672167867422104, 'timestamp': '2025-10-02 00:50:58.782100', 'step': 22400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:58.843391', 'step': 22400, 'epoch': 3}
{'type': 'loss', 'content': 0.009535218589007854, 'timestamp': '2025-10-02 00:50:58.854715', 'step': 22401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:50:58.917488', 'step': 22401, 'epoch': 3}
{'type': 'loss', 'content': 0.008521889336407185, 'timestamp': '2025-10-02 00:50:58.927982', 'step': 22402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:50:58.983697', 'step': 22402, 'epoch': 3}
{'type': 'loss', 'content': 0.048889756202697754, 'timestamp': '2025-10-02 00:50:58.992739', 'step': 22403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:50:59.048262', 'step': 22403, 'epoch': 3}
{'type': 'loss', 'content': 0.037551701068878174, 'timestamp': '2025-10-02 00:50:59.054345', 'step': 22404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:59.108280', 'step': 22404, 'epoch': 3}
{'type': 'loss', 'content': 0.06973039358854294, 'timestamp': '2025-10-02 00:50:59.110773', 'step': 22405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:59.167081', 'step': 22405, 'epoch': 3}
{'type': 'loss', 'content': 0.07119034975767136, 'timestamp': '2025-10-02 00:50:59.169339', 'step': 22406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:50:59.223795', 'step': 22406, 'epoch': 3}
{'type': 'loss', 'content': 0.09042195975780487, 'timestamp': '2025-10-02 00:50:59.228249', 'step': 22407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:59.284431', 'step': 22407, 'epoch': 3}
{'type': 'loss', 'content': 0.13887682557106018, 'timestamp': '2025-10-02 00:50:59.290285', 'step': 22408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:59.345008', 'step': 22408, 'epoch': 3}
{'type': 'loss', 'content': 0.04284777492284775, 'timestamp': '2025-10-02 00:50:59.347498', 'step': 22409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:59.402667', 'step': 22409, 'epoch': 3}
{'type': 'loss', 'content': 0.11069182306528091, 'timestamp': '2025-10-02 00:50:59.405206', 'step': 22410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:50:59.461175', 'step': 22410, 'epoch': 3}
{'type': 'loss', 'content': 0.013846985064446926, 'timestamp': '2025-10-02 00:50:59.463578', 'step': 22411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:50:59.519138', 'step': 22411, 'epoch': 3}
{'type': 'loss', 'content': 0.06452615559101105, 'timestamp': '2025-10-02 00:50:59.525468', 'step': 22412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:59.580814', 'step': 22412, 'epoch': 3}
{'type': 'loss', 'content': 0.15632092952728271, 'timestamp': '2025-10-02 00:50:59.583668', 'step': 22413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:50:59.641778', 'step': 22413, 'epoch': 3}
{'type': 'loss', 'content': 0.03196658566594124, 'timestamp': '2025-10-02 00:50:59.648653', 'step': 22414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:50:59.708636', 'step': 22414, 'epoch': 3}
{'type': 'loss', 'content': 0.02243267372250557, 'timestamp': '2025-10-02 00:50:59.718822', 'step': 22415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:50:59.774594', 'step': 22415, 'epoch': 3}
{'type': 'loss', 'content': 0.18017104268074036, 'timestamp': '2025-10-02 00:50:59.780853', 'step': 22416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:50:59.836027', 'step': 22416, 'epoch': 3}
{'type': 'loss', 'content': 0.11256109178066254, 'timestamp': '2025-10-02 00:50:59.839387', 'step': 22417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:50:59.895155', 'step': 22417, 'epoch': 3}
{'type': 'loss', 'content': 0.04104681685566902, 'timestamp': '2025-10-02 00:50:59.897585', 'step': 22418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:50:59.953319', 'step': 22418, 'epoch': 3}
{'type': 'loss', 'content': 0.046324472874403, 'timestamp': '2025-10-02 00:50:59.956036', 'step': 22419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:00.011375', 'step': 22419, 'epoch': 3}
{'type': 'loss', 'content': 0.07491853833198547, 'timestamp': '2025-10-02 00:51:00.017389', 'step': 22420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:00.072690', 'step': 22420, 'epoch': 3}
{'type': 'loss', 'content': 0.011389550752937794, 'timestamp': '2025-10-02 00:51:00.082155', 'step': 22421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:00.137387', 'step': 22421, 'epoch': 3}
{'type': 'loss', 'content': 0.1109270378947258, 'timestamp': '2025-10-02 00:51:00.140012', 'step': 22422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:00.196681', 'step': 22422, 'epoch': 3}
{'type': 'loss', 'content': 0.026246903464198112, 'timestamp': '2025-10-02 00:51:00.205761', 'step': 22423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:00.262036', 'step': 22423, 'epoch': 3}
{'type': 'loss', 'content': 0.06629397720098495, 'timestamp': '2025-10-02 00:51:00.269278', 'step': 22424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:00.324953', 'step': 22424, 'epoch': 3}
{'type': 'loss', 'content': 0.007093909662216902, 'timestamp': '2025-10-02 00:51:00.328047', 'step': 22425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:00.386754', 'step': 22425, 'epoch': 3}
{'type': 'loss', 'content': 0.005585078150033951, 'timestamp': '2025-10-02 00:51:00.389108', 'step': 22426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:00.449988', 'step': 22426, 'epoch': 3}
{'type': 'loss', 'content': 0.07782366871833801, 'timestamp': '2025-10-02 00:51:00.454767', 'step': 22427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:00.517925', 'step': 22427, 'epoch': 3}
{'type': 'loss', 'content': 0.1281779706478119, 'timestamp': '2025-10-02 00:51:00.529126', 'step': 22428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:00.587379', 'step': 22428, 'epoch': 3}
{'type': 'loss', 'content': 0.02390758879482746, 'timestamp': '2025-10-02 00:51:00.594412', 'step': 22429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:00.670560', 'step': 22429, 'epoch': 3}
{'type': 'loss', 'content': 0.14099706709384918, 'timestamp': '2025-10-02 00:51:00.674318', 'step': 22430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:00.756043', 'step': 22430, 'epoch': 3}
{'type': 'loss', 'content': 0.02691022865474224, 'timestamp': '2025-10-02 00:51:00.766203', 'step': 22431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:00.829038', 'step': 22431, 'epoch': 3}
{'type': 'loss', 'content': 0.0028968644328415394, 'timestamp': '2025-10-02 00:51:00.851913', 'step': 22432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:00.934661', 'step': 22432, 'epoch': 3}
{'type': 'loss', 'content': 0.11009664088487625, 'timestamp': '2025-10-02 00:51:00.955700', 'step': 22433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:01.066826', 'step': 22433, 'epoch': 3}
{'type': 'loss', 'content': 0.018383264541625977, 'timestamp': '2025-10-02 00:51:01.072216', 'step': 22434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:01.147687', 'step': 22434, 'epoch': 3}
{'type': 'loss', 'content': 0.0868784636259079, 'timestamp': '2025-10-02 00:51:01.150522', 'step': 22435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:01.228058', 'step': 22435, 'epoch': 3}
{'type': 'loss', 'content': 0.039951201528310776, 'timestamp': '2025-10-02 00:51:01.238790', 'step': 22436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:51:01.323041', 'step': 22436, 'epoch': 3}
{'type': 'loss', 'content': 0.017746170982718468, 'timestamp': '2025-10-02 00:51:01.336795', 'step': 22437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:01.409707', 'step': 22437, 'epoch': 3}
{'type': 'loss', 'content': 0.08826737105846405, 'timestamp': '2025-10-02 00:51:01.412773', 'step': 22438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:01.480726', 'step': 22438, 'epoch': 3}
{'type': 'loss', 'content': 0.05996743217110634, 'timestamp': '2025-10-02 00:51:01.485646', 'step': 22439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:01.553907', 'step': 22439, 'epoch': 3}
{'type': 'loss', 'content': 0.017733298242092133, 'timestamp': '2025-10-02 00:51:01.564624', 'step': 22440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:01.641810', 'step': 22440, 'epoch': 3}
{'type': 'loss', 'content': 0.11649332195520401, 'timestamp': '2025-10-02 00:51:01.646039', 'step': 22441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:01.721595', 'step': 22441, 'epoch': 3}
{'type': 'loss', 'content': 0.012370993383228779, 'timestamp': '2025-10-02 00:51:01.727119', 'step': 22442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:01.786941', 'step': 22442, 'epoch': 3}
{'type': 'loss', 'content': 0.13270820677280426, 'timestamp': '2025-10-02 00:51:01.790129', 'step': 22443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:01.848233', 'step': 22443, 'epoch': 3}
{'type': 'loss', 'content': 0.028211576864123344, 'timestamp': '2025-10-02 00:51:01.854960', 'step': 22444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:01.927379', 'step': 22444, 'epoch': 3}
{'type': 'loss', 'content': 0.11275651305913925, 'timestamp': '2025-10-02 00:51:01.930143', 'step': 22445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:01.995856', 'step': 22445, 'epoch': 3}
{'type': 'loss', 'content': 0.06837599724531174, 'timestamp': '2025-10-02 00:51:01.998896', 'step': 22446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:02.065992', 'step': 22446, 'epoch': 3}
{'type': 'loss', 'content': 0.2288006991147995, 'timestamp': '2025-10-02 00:51:02.068742', 'step': 22447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:02.133249', 'step': 22447, 'epoch': 3}
{'type': 'loss', 'content': 0.0667913556098938, 'timestamp': '2025-10-02 00:51:02.144521', 'step': 22448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:02.200387', 'step': 22448, 'epoch': 3}
{'type': 'loss', 'content': 0.05534902960062027, 'timestamp': '2025-10-02 00:51:02.204118', 'step': 22449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:02.261638', 'step': 22449, 'epoch': 3}
{'type': 'loss', 'content': 0.03931756690144539, 'timestamp': '2025-10-02 00:51:02.268924', 'step': 22450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:02.341303', 'step': 22450, 'epoch': 3}
{'type': 'loss', 'content': 0.14592935144901276, 'timestamp': '2025-10-02 00:51:02.344570', 'step': 22451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:02.405900', 'step': 22451, 'epoch': 3}
{'type': 'loss', 'content': 0.07709956914186478, 'timestamp': '2025-10-02 00:51:02.419576', 'step': 22452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:02.495768', 'step': 22452, 'epoch': 3}
{'type': 'loss', 'content': 0.047282446175813675, 'timestamp': '2025-10-02 00:51:02.505283', 'step': 22453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:02.578377', 'step': 22453, 'epoch': 3}
{'type': 'loss', 'content': 0.1434069573879242, 'timestamp': '2025-10-02 00:51:02.588643', 'step': 22454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:02.661335', 'step': 22454, 'epoch': 3}
{'type': 'loss', 'content': 0.09928072988986969, 'timestamp': '2025-10-02 00:51:02.664678', 'step': 22455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:02.730267', 'step': 22455, 'epoch': 3}
{'type': 'loss', 'content': 0.12791481614112854, 'timestamp': '2025-10-02 00:51:02.737161', 'step': 22456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:02.794401', 'step': 22456, 'epoch': 3}
{'type': 'loss', 'content': 0.1834784895181656, 'timestamp': '2025-10-02 00:51:02.802116', 'step': 22457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:02.859357', 'step': 22457, 'epoch': 3}
{'type': 'loss', 'content': 0.06390578299760818, 'timestamp': '2025-10-02 00:51:02.862637', 'step': 22458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:02.933359', 'step': 22458, 'epoch': 3}
{'type': 'loss', 'content': 0.09784417599439621, 'timestamp': '2025-10-02 00:51:02.940919', 'step': 22459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:03.002708', 'step': 22459, 'epoch': 3}
{'type': 'loss', 'content': 0.10512411594390869, 'timestamp': '2025-10-02 00:51:03.014189', 'step': 22460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:03.070755', 'step': 22460, 'epoch': 3}
{'type': 'loss', 'content': 0.011594670824706554, 'timestamp': '2025-10-02 00:51:03.078108', 'step': 22461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:03.142710', 'step': 22461, 'epoch': 3}
{'type': 'loss', 'content': 0.02837185375392437, 'timestamp': '2025-10-02 00:51:03.152265', 'step': 22462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:03.212525', 'step': 22462, 'epoch': 3}
{'type': 'loss', 'content': 0.011353506706655025, 'timestamp': '2025-10-02 00:51:03.221783', 'step': 22463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:03.291568', 'step': 22463, 'epoch': 3}
{'type': 'loss', 'content': 0.24660411477088928, 'timestamp': '2025-10-02 00:51:03.298531', 'step': 22464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:03.354092', 'step': 22464, 'epoch': 3}
{'type': 'loss', 'content': 0.07976289093494415, 'timestamp': '2025-10-02 00:51:03.357562', 'step': 22465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:03.421530', 'step': 22465, 'epoch': 3}
{'type': 'loss', 'content': 0.021289344877004623, 'timestamp': '2025-10-02 00:51:03.432014', 'step': 22466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:03.489181', 'step': 22466, 'epoch': 3}
{'type': 'loss', 'content': 0.04716472700238228, 'timestamp': '2025-10-02 00:51:03.497405', 'step': 22467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:03.567329', 'step': 22467, 'epoch': 3}
{'type': 'loss', 'content': 0.0587870217859745, 'timestamp': '2025-10-02 00:51:03.574393', 'step': 22468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:03.640520', 'step': 22468, 'epoch': 3}
{'type': 'loss', 'content': 0.09838102757930756, 'timestamp': '2025-10-02 00:51:03.643413', 'step': 22469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:03.701699', 'step': 22469, 'epoch': 3}
{'type': 'loss', 'content': 0.10129273682832718, 'timestamp': '2025-10-02 00:51:03.710039', 'step': 22470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:03.768086', 'step': 22470, 'epoch': 3}
{'type': 'loss', 'content': 0.09891387820243835, 'timestamp': '2025-10-02 00:51:03.772129', 'step': 22471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:03.833497', 'step': 22471, 'epoch': 3}
{'type': 'loss', 'content': 0.028085384517908096, 'timestamp': '2025-10-02 00:51:03.845563', 'step': 22472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:03.927229', 'step': 22472, 'epoch': 3}
{'type': 'loss', 'content': 0.033833276480436325, 'timestamp': '2025-10-02 00:51:03.938514', 'step': 22473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:04.005457', 'step': 22473, 'epoch': 3}
{'type': 'loss', 'content': 0.14722180366516113, 'timestamp': '2025-10-02 00:51:04.013856', 'step': 22474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:04.079540', 'step': 22474, 'epoch': 3}
{'type': 'loss', 'content': 0.13071343302726746, 'timestamp': '2025-10-02 00:51:04.083354', 'step': 22475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:04.139843', 'step': 22475, 'epoch': 3}
{'type': 'loss', 'content': 0.06669949740171432, 'timestamp': '2025-10-02 00:51:04.152666', 'step': 22476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:04.209143', 'step': 22476, 'epoch': 3}
{'type': 'loss', 'content': 0.050082892179489136, 'timestamp': '2025-10-02 00:51:04.217871', 'step': 22477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:04.291903', 'step': 22477, 'epoch': 3}
{'type': 'loss', 'content': 0.04679926857352257, 'timestamp': '2025-10-02 00:51:04.294467', 'step': 22478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:04.366103', 'step': 22478, 'epoch': 3}
{'type': 'loss', 'content': 0.059744950383901596, 'timestamp': '2025-10-02 00:51:04.373097', 'step': 22479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:04.440430', 'step': 22479, 'epoch': 3}
{'type': 'loss', 'content': 0.032145917415618896, 'timestamp': '2025-10-02 00:51:04.447162', 'step': 22480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:04.503632', 'step': 22480, 'epoch': 3}
{'type': 'loss', 'content': 0.1256740242242813, 'timestamp': '2025-10-02 00:51:04.513403', 'step': 22481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:04.571696', 'step': 22481, 'epoch': 3}
{'type': 'loss', 'content': 0.029840145260095596, 'timestamp': '2025-10-02 00:51:04.575140', 'step': 22482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:04.638364', 'step': 22482, 'epoch': 3}
{'type': 'loss', 'content': 0.07183164358139038, 'timestamp': '2025-10-02 00:51:04.641464', 'step': 22483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:04.698960', 'step': 22483, 'epoch': 3}
{'type': 'loss', 'content': 0.05762850120663643, 'timestamp': '2025-10-02 00:51:04.705256', 'step': 22484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:04.762736', 'step': 22484, 'epoch': 3}
{'type': 'loss', 'content': 0.0031313367653638124, 'timestamp': '2025-10-02 00:51:04.772247', 'step': 22485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:04.846430', 'step': 22485, 'epoch': 3}
{'type': 'loss', 'content': 0.03701674938201904, 'timestamp': '2025-10-02 00:51:04.855525', 'step': 22486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:04.933745', 'step': 22486, 'epoch': 3}
{'type': 'loss', 'content': 0.04806840047240257, 'timestamp': '2025-10-02 00:51:04.942862', 'step': 22487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:05.006892', 'step': 22487, 'epoch': 3}
{'type': 'loss', 'content': 0.0877843126654625, 'timestamp': '2025-10-02 00:51:05.013580', 'step': 22488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:05.084770', 'step': 22488, 'epoch': 3}
{'type': 'loss', 'content': 0.010705951601266861, 'timestamp': '2025-10-02 00:51:05.094219', 'step': 22489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:05.155443', 'step': 22489, 'epoch': 3}
{'type': 'loss', 'content': 0.1329224854707718, 'timestamp': '2025-10-02 00:51:05.158020', 'step': 22490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:05.233406', 'step': 22490, 'epoch': 3}
{'type': 'loss', 'content': 0.03830346837639809, 'timestamp': '2025-10-02 00:51:05.237423', 'step': 22491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:05.311145', 'step': 22491, 'epoch': 3}
{'type': 'loss', 'content': 0.013045198284089565, 'timestamp': '2025-10-02 00:51:05.321347', 'step': 22492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:05.397516', 'step': 22492, 'epoch': 3}
{'type': 'loss', 'content': 0.0352092944085598, 'timestamp': '2025-10-02 00:51:05.406195', 'step': 22493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:05.463943', 'step': 22493, 'epoch': 3}
{'type': 'loss', 'content': 0.2198636680841446, 'timestamp': '2025-10-02 00:51:05.467581', 'step': 22494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:05.533778', 'step': 22494, 'epoch': 3}
{'type': 'loss', 'content': 0.05946143716573715, 'timestamp': '2025-10-02 00:51:05.543128', 'step': 22495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:05.602964', 'step': 22495, 'epoch': 3}
{'type': 'loss', 'content': 0.09203147888183594, 'timestamp': '2025-10-02 00:51:05.610562', 'step': 22496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:05.668874', 'step': 22496, 'epoch': 3}
{'type': 'loss', 'content': 0.030461540445685387, 'timestamp': '2025-10-02 00:51:05.683309', 'step': 22497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:05.768673', 'step': 22497, 'epoch': 3}
{'type': 'loss', 'content': 0.020476877689361572, 'timestamp': '2025-10-02 00:51:05.778231', 'step': 22498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:05.855532', 'step': 22498, 'epoch': 3}
{'type': 'loss', 'content': 0.07933926582336426, 'timestamp': '2025-10-02 00:51:05.859236', 'step': 22499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:05.915683', 'step': 22499, 'epoch': 3}
{'type': 'loss', 'content': 0.031726494431495667, 'timestamp': '2025-10-02 00:51:05.925621', 'step': 22500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 22500', 'timestamp': '2025-10-02 00:51:06.503796', 'step': 22500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:06.567152', 'step': 22500, 'epoch': 3}
{'type': 'loss', 'content': 0.08005791902542114, 'timestamp': '2025-10-02 00:51:06.571047', 'step': 22501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:06.643022', 'step': 22501, 'epoch': 3}
{'type': 'loss', 'content': 0.033692505210638046, 'timestamp': '2025-10-02 00:51:06.653447', 'step': 22502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:06.711785', 'step': 22502, 'epoch': 3}
{'type': 'loss', 'content': 0.07952338457107544, 'timestamp': '2025-10-02 00:51:06.714940', 'step': 22503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:06.772244', 'step': 22503, 'epoch': 3}
{'type': 'loss', 'content': 0.04146596044301987, 'timestamp': '2025-10-02 00:51:06.779126', 'step': 22504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:06.840562', 'step': 22504, 'epoch': 3}
{'type': 'loss', 'content': 0.09357942640781403, 'timestamp': '2025-10-02 00:51:06.851483', 'step': 22505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:06.919069', 'step': 22505, 'epoch': 3}
{'type': 'loss', 'content': 0.016789715737104416, 'timestamp': '2025-10-02 00:51:06.928398', 'step': 22506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:07.003461', 'step': 22506, 'epoch': 3}
{'type': 'loss', 'content': 0.08542194962501526, 'timestamp': '2025-10-02 00:51:07.009450', 'step': 22507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:07.079282', 'step': 22507, 'epoch': 3}
{'type': 'loss', 'content': 0.038574088364839554, 'timestamp': '2025-10-02 00:51:07.091524', 'step': 22508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:07.163315', 'step': 22508, 'epoch': 3}
{'type': 'loss', 'content': 0.09770870953798294, 'timestamp': '2025-10-02 00:51:07.166414', 'step': 22509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:07.228128', 'step': 22509, 'epoch': 3}
{'type': 'loss', 'content': 0.07386598736047745, 'timestamp': '2025-10-02 00:51:07.234883', 'step': 22510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:07.306325', 'step': 22510, 'epoch': 3}
{'type': 'loss', 'content': 0.08870317041873932, 'timestamp': '2025-10-02 00:51:07.315616', 'step': 22511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:07.391807', 'step': 22511, 'epoch': 3}
{'type': 'loss', 'content': 0.026456300169229507, 'timestamp': '2025-10-02 00:51:07.405597', 'step': 22512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:07.475714', 'step': 22512, 'epoch': 3}
{'type': 'loss', 'content': 0.04114201292395592, 'timestamp': '2025-10-02 00:51:07.478530', 'step': 22513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:07.548529', 'step': 22513, 'epoch': 3}
{'type': 'loss', 'content': 0.14332708716392517, 'timestamp': '2025-10-02 00:51:07.557580', 'step': 22514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:07.639104', 'step': 22514, 'epoch': 3}
{'type': 'loss', 'content': 0.12196672707796097, 'timestamp': '2025-10-02 00:51:07.649808', 'step': 22515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:07.713327', 'step': 22515, 'epoch': 3}
{'type': 'loss', 'content': 0.043237291276454926, 'timestamp': '2025-10-02 00:51:07.723951', 'step': 22516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:07.794173', 'step': 22516, 'epoch': 3}
{'type': 'loss', 'content': 0.06201206147670746, 'timestamp': '2025-10-02 00:51:07.801483', 'step': 22517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:07.859188', 'step': 22517, 'epoch': 3}
{'type': 'loss', 'content': 0.10407217592000961, 'timestamp': '2025-10-02 00:51:07.862599', 'step': 22518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:07.924645', 'step': 22518, 'epoch': 3}
{'type': 'loss', 'content': 0.06005430966615677, 'timestamp': '2025-10-02 00:51:07.932488', 'step': 22519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:08.005895', 'step': 22519, 'epoch': 3}
{'type': 'loss', 'content': 0.000652196176815778, 'timestamp': '2025-10-02 00:51:08.018586', 'step': 22520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:08.091530', 'step': 22520, 'epoch': 3}
{'type': 'loss', 'content': 0.0033296600449830294, 'timestamp': '2025-10-02 00:51:08.100866', 'step': 22521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:08.175403', 'step': 22521, 'epoch': 3}
{'type': 'loss', 'content': 0.08530769497156143, 'timestamp': '2025-10-02 00:51:08.178761', 'step': 22522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:08.258529', 'step': 22522, 'epoch': 3}
{'type': 'loss', 'content': 0.020450107753276825, 'timestamp': '2025-10-02 00:51:08.268069', 'step': 22523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:08.340543', 'step': 22523, 'epoch': 3}
{'type': 'loss', 'content': 0.026339298114180565, 'timestamp': '2025-10-02 00:51:08.348053', 'step': 22524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:08.414515', 'step': 22524, 'epoch': 3}
{'type': 'loss', 'content': 0.007783228997141123, 'timestamp': '2025-10-02 00:51:08.425464', 'step': 22525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:08.488251', 'step': 22525, 'epoch': 3}
{'type': 'loss', 'content': 0.08160804957151413, 'timestamp': '2025-10-02 00:51:08.495818', 'step': 22526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:08.564520', 'step': 22526, 'epoch': 3}
{'type': 'loss', 'content': 0.0505891814827919, 'timestamp': '2025-10-02 00:51:08.572140', 'step': 22527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:08.649230', 'step': 22527, 'epoch': 3}
{'type': 'loss', 'content': 0.18538695573806763, 'timestamp': '2025-10-02 00:51:08.660792', 'step': 22528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:08.722047', 'step': 22528, 'epoch': 3}
{'type': 'loss', 'content': 0.006074275355786085, 'timestamp': '2025-10-02 00:51:08.732149', 'step': 22529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:08.810128', 'step': 22529, 'epoch': 3}
{'type': 'loss', 'content': 0.04225071519613266, 'timestamp': '2025-10-02 00:51:08.816865', 'step': 22530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:08.886858', 'step': 22530, 'epoch': 3}
{'type': 'loss', 'content': 0.06594257056713104, 'timestamp': '2025-10-02 00:51:08.898062', 'step': 22531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:08.972518', 'step': 22531, 'epoch': 3}
{'type': 'loss', 'content': 0.00270669418387115, 'timestamp': '2025-10-02 00:51:08.979501', 'step': 22532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:09.046957', 'step': 22532, 'epoch': 3}
{'type': 'loss', 'content': 0.07055798172950745, 'timestamp': '2025-10-02 00:51:09.057243', 'step': 22533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:09.114383', 'step': 22533, 'epoch': 3}
{'type': 'loss', 'content': 0.055027250200510025, 'timestamp': '2025-10-02 00:51:09.120981', 'step': 22534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:09.187838', 'step': 22534, 'epoch': 3}
{'type': 'loss', 'content': 0.09232792258262634, 'timestamp': '2025-10-02 00:51:09.193228', 'step': 22535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:09.263470', 'step': 22535, 'epoch': 3}
{'type': 'loss', 'content': 0.03115358203649521, 'timestamp': '2025-10-02 00:51:09.273434', 'step': 22536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:51:09.353945', 'step': 22536, 'epoch': 3}
{'type': 'loss', 'content': 0.01361416932195425, 'timestamp': '2025-10-02 00:51:09.370374', 'step': 22537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:09.463943', 'step': 22537, 'epoch': 3}
{'type': 'loss', 'content': 0.041612688452005386, 'timestamp': '2025-10-02 00:51:09.469462', 'step': 22538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:09.529274', 'step': 22538, 'epoch': 3}
{'type': 'loss', 'content': 0.02992827445268631, 'timestamp': '2025-10-02 00:51:09.536330', 'step': 22539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:09.599243', 'step': 22539, 'epoch': 3}
{'type': 'loss', 'content': 0.05118254944682121, 'timestamp': '2025-10-02 00:51:09.616702', 'step': 22540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:51:09.683921', 'step': 22540, 'epoch': 3}
{'type': 'loss', 'content': 0.00967152789235115, 'timestamp': '2025-10-02 00:51:09.696926', 'step': 22541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:09.789697', 'step': 22541, 'epoch': 3}
{'type': 'loss', 'content': 0.08196230977773666, 'timestamp': '2025-10-02 00:51:09.792871', 'step': 22542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:09.856645', 'step': 22542, 'epoch': 3}
{'type': 'loss', 'content': 0.04524633288383484, 'timestamp': '2025-10-02 00:51:09.864398', 'step': 22543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:09.928634', 'step': 22543, 'epoch': 3}
{'type': 'loss', 'content': 0.005969356279820204, 'timestamp': '2025-10-02 00:51:09.936556', 'step': 22544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:10.011253', 'step': 22544, 'epoch': 3}
{'type': 'loss', 'content': 0.01790570840239525, 'timestamp': '2025-10-02 00:51:10.021492', 'step': 22545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:10.100645', 'step': 22545, 'epoch': 3}
{'type': 'loss', 'content': 0.0028745552990585566, 'timestamp': '2025-10-02 00:51:10.110930', 'step': 22546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:10.187333', 'step': 22546, 'epoch': 3}
{'type': 'loss', 'content': 0.03586989641189575, 'timestamp': '2025-10-02 00:51:10.194597', 'step': 22547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:10.251785', 'step': 22547, 'epoch': 3}
{'type': 'loss', 'content': 0.034844979643821716, 'timestamp': '2025-10-02 00:51:10.263301', 'step': 22548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:10.319569', 'step': 22548, 'epoch': 3}
{'type': 'loss', 'content': 0.07451223582029343, 'timestamp': '2025-10-02 00:51:10.322246', 'step': 22549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:51:10.393542', 'step': 22549, 'epoch': 3}
{'type': 'loss', 'content': 0.0021910429932177067, 'timestamp': '2025-10-02 00:51:10.404146', 'step': 22550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:10.476331', 'step': 22550, 'epoch': 3}
{'type': 'loss', 'content': 0.07667829841375351, 'timestamp': '2025-10-02 00:51:10.479218', 'step': 22551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:10.535915', 'step': 22551, 'epoch': 3}
{'type': 'loss', 'content': 0.1874227225780487, 'timestamp': '2025-10-02 00:51:10.544468', 'step': 22552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:10.616297', 'step': 22552, 'epoch': 3}
{'type': 'loss', 'content': 0.028325408697128296, 'timestamp': '2025-10-02 00:51:10.622018', 'step': 22553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:10.689294', 'step': 22553, 'epoch': 3}
{'type': 'loss', 'content': 0.04586297646164894, 'timestamp': '2025-10-02 00:51:10.696670', 'step': 22554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:10.752930', 'step': 22554, 'epoch': 3}
{'type': 'loss', 'content': 0.05685955286026001, 'timestamp': '2025-10-02 00:51:10.766327', 'step': 22555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:10.825902', 'step': 22555, 'epoch': 3}
{'type': 'loss', 'content': 0.028474997729063034, 'timestamp': '2025-10-02 00:51:10.833899', 'step': 22556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:10.889383', 'step': 22556, 'epoch': 3}
{'type': 'loss', 'content': 0.048738352954387665, 'timestamp': '2025-10-02 00:51:10.894955', 'step': 22557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:10.952251', 'step': 22557, 'epoch': 3}
{'type': 'loss', 'content': 0.05478350818157196, 'timestamp': '2025-10-02 00:51:10.957877', 'step': 22558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:11.016034', 'step': 22558, 'epoch': 3}
{'type': 'loss', 'content': 0.06092636287212372, 'timestamp': '2025-10-02 00:51:11.023295', 'step': 22559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:11.095244', 'step': 22559, 'epoch': 3}
{'type': 'loss', 'content': 0.033781327307224274, 'timestamp': '2025-10-02 00:51:11.112095', 'step': 22560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:11.191618', 'step': 22560, 'epoch': 3}
{'type': 'loss', 'content': 0.06415129452943802, 'timestamp': '2025-10-02 00:51:11.197739', 'step': 22561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:11.255704', 'step': 22561, 'epoch': 3}
{'type': 'loss', 'content': 0.013287237845361233, 'timestamp': '2025-10-02 00:51:11.264978', 'step': 22562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:11.326915', 'step': 22562, 'epoch': 3}
{'type': 'loss', 'content': 0.07943491637706757, 'timestamp': '2025-10-02 00:51:11.332728', 'step': 22563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:11.402364', 'step': 22563, 'epoch': 3}
{'type': 'loss', 'content': 0.020848961547017097, 'timestamp': '2025-10-02 00:51:11.409027', 'step': 22564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:11.469889', 'step': 22564, 'epoch': 3}
{'type': 'loss', 'content': 0.00039652117993682623, 'timestamp': '2025-10-02 00:51:11.477082', 'step': 22565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:11.542595', 'step': 22565, 'epoch': 3}
{'type': 'loss', 'content': 0.07902040332555771, 'timestamp': '2025-10-02 00:51:11.549305', 'step': 22566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:11.608576', 'step': 22566, 'epoch': 3}
{'type': 'loss', 'content': 0.11447413265705109, 'timestamp': '2025-10-02 00:51:11.611406', 'step': 22567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:11.666620', 'step': 22567, 'epoch': 3}
{'type': 'loss', 'content': 0.0727907344698906, 'timestamp': '2025-10-02 00:51:11.676654', 'step': 22568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:11.737463', 'step': 22568, 'epoch': 3}
{'type': 'loss', 'content': 0.08107038587331772, 'timestamp': '2025-10-02 00:51:11.744471', 'step': 22569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:11.812284', 'step': 22569, 'epoch': 3}
{'type': 'loss', 'content': 0.07619250565767288, 'timestamp': '2025-10-02 00:51:11.818382', 'step': 22570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:51:11.894444', 'step': 22570, 'epoch': 3}
{'type': 'loss', 'content': 0.02728675678372383, 'timestamp': '2025-10-02 00:51:11.905086', 'step': 22571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:11.962235', 'step': 22571, 'epoch': 3}
{'type': 'loss', 'content': 0.09419894218444824, 'timestamp': '2025-10-02 00:51:11.969563', 'step': 22572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:12.041829', 'step': 22572, 'epoch': 3}
{'type': 'loss', 'content': 0.04596938565373421, 'timestamp': '2025-10-02 00:51:12.048951', 'step': 22573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:12.116674', 'step': 22573, 'epoch': 3}
{'type': 'loss', 'content': 0.03316783159971237, 'timestamp': '2025-10-02 00:51:12.119252', 'step': 22574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:12.178159', 'step': 22574, 'epoch': 3}
{'type': 'loss', 'content': 0.10322809219360352, 'timestamp': '2025-10-02 00:51:12.187670', 'step': 22575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:12.257278', 'step': 22575, 'epoch': 3}
{'type': 'loss', 'content': 0.00328710931353271, 'timestamp': '2025-10-02 00:51:12.268177', 'step': 22576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:12.328788', 'step': 22576, 'epoch': 3}
{'type': 'loss', 'content': 0.011459085159003735, 'timestamp': '2025-10-02 00:51:12.335604', 'step': 22577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:12.402111', 'step': 22577, 'epoch': 3}
{'type': 'loss', 'content': 0.03845072537660599, 'timestamp': '2025-10-02 00:51:12.409168', 'step': 22578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:12.475619', 'step': 22578, 'epoch': 3}
{'type': 'loss', 'content': 0.0746007040143013, 'timestamp': '2025-10-02 00:51:12.482235', 'step': 22579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:12.543326', 'step': 22579, 'epoch': 3}
{'type': 'loss', 'content': 0.009682157076895237, 'timestamp': '2025-10-02 00:51:12.554110', 'step': 22580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:12.625624', 'step': 22580, 'epoch': 3}
{'type': 'loss', 'content': 0.07032281905412674, 'timestamp': '2025-10-02 00:51:12.633740', 'step': 22581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:12.715536', 'step': 22581, 'epoch': 3}
{'type': 'loss', 'content': 0.05541232228279114, 'timestamp': '2025-10-02 00:51:12.723524', 'step': 22582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:12.794635', 'step': 22582, 'epoch': 3}
{'type': 'loss', 'content': 0.07040348649024963, 'timestamp': '2025-10-02 00:51:12.798090', 'step': 22583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:12.854352', 'step': 22583, 'epoch': 3}
{'type': 'loss', 'content': 0.1391199231147766, 'timestamp': '2025-10-02 00:51:12.861517', 'step': 22584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:12.917602', 'step': 22584, 'epoch': 3}
{'type': 'loss', 'content': 0.04020785912871361, 'timestamp': '2025-10-02 00:51:12.926559', 'step': 22585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:12.995549', 'step': 22585, 'epoch': 3}
{'type': 'loss', 'content': 0.023364417254924774, 'timestamp': '2025-10-02 00:51:13.005725', 'step': 22586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:13.073652', 'step': 22586, 'epoch': 3}
{'type': 'loss', 'content': 0.05561843514442444, 'timestamp': '2025-10-02 00:51:13.080097', 'step': 22587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:13.136214', 'step': 22587, 'epoch': 3}
{'type': 'loss', 'content': 0.09756150841712952, 'timestamp': '2025-10-02 00:51:13.145945', 'step': 22588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:13.216746', 'step': 22588, 'epoch': 3}
{'type': 'loss', 'content': 0.021564733237028122, 'timestamp': '2025-10-02 00:51:13.224010', 'step': 22589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:13.280167', 'step': 22589, 'epoch': 3}
{'type': 'loss', 'content': 0.10078710317611694, 'timestamp': '2025-10-02 00:51:13.287028', 'step': 22590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:13.356673', 'step': 22590, 'epoch': 3}
{'type': 'loss', 'content': 0.18167497217655182, 'timestamp': '2025-10-02 00:51:13.359400', 'step': 22591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:13.420922', 'step': 22591, 'epoch': 3}
{'type': 'loss', 'content': 0.09023650735616684, 'timestamp': '2025-10-02 00:51:13.431870', 'step': 22592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:13.494879', 'step': 22592, 'epoch': 3}
{'type': 'loss', 'content': 0.021255172789096832, 'timestamp': '2025-10-02 00:51:13.506164', 'step': 22593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:13.563115', 'step': 22593, 'epoch': 3}
{'type': 'loss', 'content': 0.05920581892132759, 'timestamp': '2025-10-02 00:51:13.572682', 'step': 22594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:13.648948', 'step': 22594, 'epoch': 3}
{'type': 'loss', 'content': 0.026655608788132668, 'timestamp': '2025-10-02 00:51:13.658136', 'step': 22595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:13.724248', 'step': 22595, 'epoch': 3}
{'type': 'loss', 'content': 0.014571179635822773, 'timestamp': '2025-10-02 00:51:13.736239', 'step': 22596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:13.800061', 'step': 22596, 'epoch': 3}
{'type': 'loss', 'content': 0.00011090249608969316, 'timestamp': '2025-10-02 00:51:13.809285', 'step': 22597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:13.875196', 'step': 22597, 'epoch': 3}
{'type': 'loss', 'content': 0.08428745716810226, 'timestamp': '2025-10-02 00:51:13.877674', 'step': 22598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:13.945315', 'step': 22598, 'epoch': 3}
{'type': 'loss', 'content': 0.005726325791329145, 'timestamp': '2025-10-02 00:51:13.953126', 'step': 22599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:14.011033', 'step': 22599, 'epoch': 3}
{'type': 'loss', 'content': 0.013127573765814304, 'timestamp': '2025-10-02 00:51:14.018868', 'step': 22600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:14.075354', 'step': 22600, 'epoch': 3}
{'type': 'loss', 'content': 0.04969429969787598, 'timestamp': '2025-10-02 00:51:14.081003', 'step': 22601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:14.136838', 'step': 22601, 'epoch': 3}
{'type': 'loss', 'content': 0.04865511506795883, 'timestamp': '2025-10-02 00:51:14.139409', 'step': 22602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:14.195874', 'step': 22602, 'epoch': 3}
{'type': 'loss', 'content': 0.047033391892910004, 'timestamp': '2025-10-02 00:51:14.198346', 'step': 22603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:14.253123', 'step': 22603, 'epoch': 3}
{'type': 'loss', 'content': 0.08466862142086029, 'timestamp': '2025-10-02 00:51:14.259273', 'step': 22604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:14.319681', 'step': 22604, 'epoch': 3}
{'type': 'loss', 'content': 0.029785869643092155, 'timestamp': '2025-10-02 00:51:14.330954', 'step': 22605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:14.385533', 'step': 22605, 'epoch': 3}
{'type': 'loss', 'content': 0.05849944427609444, 'timestamp': '2025-10-02 00:51:14.394590', 'step': 22606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:14.449984', 'step': 22606, 'epoch': 3}
{'type': 'loss', 'content': 0.10849107056856155, 'timestamp': '2025-10-02 00:51:14.452148', 'step': 22607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:14.507338', 'step': 22607, 'epoch': 3}
{'type': 'loss', 'content': 0.03317074477672577, 'timestamp': '2025-10-02 00:51:14.513449', 'step': 22608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:51:14.574335', 'step': 22608, 'epoch': 3}
{'type': 'loss', 'content': 0.017632585018873215, 'timestamp': '2025-10-02 00:51:14.585861', 'step': 22609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:14.641669', 'step': 22609, 'epoch': 3}
{'type': 'loss', 'content': 0.08718645572662354, 'timestamp': '2025-10-02 00:51:14.650684', 'step': 22610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:14.712476', 'step': 22610, 'epoch': 3}
{'type': 'loss', 'content': 0.029703713953495026, 'timestamp': '2025-10-02 00:51:14.722927', 'step': 22611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:14.780447', 'step': 22611, 'epoch': 3}
{'type': 'loss', 'content': 0.07536125183105469, 'timestamp': '2025-10-02 00:51:14.786597', 'step': 22612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:14.841309', 'step': 22612, 'epoch': 3}
{'type': 'loss', 'content': 0.03730824589729309, 'timestamp': '2025-10-02 00:51:14.843623', 'step': 22613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:14.904869', 'step': 22613, 'epoch': 3}
{'type': 'loss', 'content': 0.02746613696217537, 'timestamp': '2025-10-02 00:51:14.915297', 'step': 22614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:14.970955', 'step': 22614, 'epoch': 3}
{'type': 'loss', 'content': 0.05060210078954697, 'timestamp': '2025-10-02 00:51:14.973357', 'step': 22615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:15.027765', 'step': 22615, 'epoch': 3}
{'type': 'loss', 'content': 0.05558362975716591, 'timestamp': '2025-10-02 00:51:15.033990', 'step': 22616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:15.089312', 'step': 22616, 'epoch': 3}
{'type': 'loss', 'content': 0.06784337759017944, 'timestamp': '2025-10-02 00:51:15.091812', 'step': 22617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:51:15.154596', 'step': 22617, 'epoch': 3}
{'type': 'loss', 'content': 0.028516778722405434, 'timestamp': '2025-10-02 00:51:15.165203', 'step': 22618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:15.220374', 'step': 22618, 'epoch': 3}
{'type': 'loss', 'content': 0.10823807120323181, 'timestamp': '2025-10-02 00:51:15.225951', 'step': 22619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:15.282645', 'step': 22619, 'epoch': 3}
{'type': 'loss', 'content': 0.002516023814678192, 'timestamp': '2025-10-02 00:51:15.290650', 'step': 22620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:15.346358', 'step': 22620, 'epoch': 3}
{'type': 'loss', 'content': 0.02585575357079506, 'timestamp': '2025-10-02 00:51:15.353558', 'step': 22621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:15.408647', 'step': 22621, 'epoch': 3}
{'type': 'loss', 'content': 0.03926268592476845, 'timestamp': '2025-10-02 00:51:15.411492', 'step': 22622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:15.466839', 'step': 22622, 'epoch': 3}
{'type': 'loss', 'content': 0.027688732370734215, 'timestamp': '2025-10-02 00:51:15.469543', 'step': 22623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:15.531684', 'step': 22623, 'epoch': 3}
{'type': 'loss', 'content': 0.003442521905526519, 'timestamp': '2025-10-02 00:51:15.542900', 'step': 22624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:15.597739', 'step': 22624, 'epoch': 3}
{'type': 'loss', 'content': 0.06488897651433945, 'timestamp': '2025-10-02 00:51:15.603521', 'step': 22625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:15.658678', 'step': 22625, 'epoch': 3}
{'type': 'loss', 'content': 0.04331663250923157, 'timestamp': '2025-10-02 00:51:15.661264', 'step': 22626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:15.722636', 'step': 22626, 'epoch': 3}
{'type': 'loss', 'content': 0.004679305478930473, 'timestamp': '2025-10-02 00:51:15.730802', 'step': 22627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:15.787154', 'step': 22627, 'epoch': 3}
{'type': 'loss', 'content': 0.10792388767004013, 'timestamp': '2025-10-02 00:51:15.793760', 'step': 22628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:15.851831', 'step': 22628, 'epoch': 3}
{'type': 'loss', 'content': 0.005382373929023743, 'timestamp': '2025-10-02 00:51:15.860868', 'step': 22629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:15.919320', 'step': 22629, 'epoch': 3}
{'type': 'loss', 'content': 0.044972918927669525, 'timestamp': '2025-10-02 00:51:15.922332', 'step': 22630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:15.983312', 'step': 22630, 'epoch': 3}
{'type': 'loss', 'content': 0.053702447563409805, 'timestamp': '2025-10-02 00:51:15.993401', 'step': 22631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:16.051297', 'step': 22631, 'epoch': 3}
{'type': 'loss', 'content': 0.010502607561647892, 'timestamp': '2025-10-02 00:51:16.058542', 'step': 22632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:16.115106', 'step': 22632, 'epoch': 3}
{'type': 'loss', 'content': 0.01318422146141529, 'timestamp': '2025-10-02 00:51:16.121944', 'step': 22633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:16.177139', 'step': 22633, 'epoch': 3}
{'type': 'loss', 'content': 0.008590214885771275, 'timestamp': '2025-10-02 00:51:16.179638', 'step': 22634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:16.235792', 'step': 22634, 'epoch': 3}
{'type': 'loss', 'content': 0.05477104336023331, 'timestamp': '2025-10-02 00:51:16.238710', 'step': 22635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:16.296002', 'step': 22635, 'epoch': 3}
{'type': 'loss', 'content': 0.013852194882929325, 'timestamp': '2025-10-02 00:51:16.302716', 'step': 22636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:16.360528', 'step': 22636, 'epoch': 3}
{'type': 'loss', 'content': 0.10437270253896713, 'timestamp': '2025-10-02 00:51:16.363420', 'step': 22637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:51:16.442433', 'step': 22637, 'epoch': 3}
{'type': 'loss', 'content': 0.037062935531139374, 'timestamp': '2025-10-02 00:51:16.456080', 'step': 22638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:16.514186', 'step': 22638, 'epoch': 3}
{'type': 'loss', 'content': 0.0070461248978972435, 'timestamp': '2025-10-02 00:51:16.521328', 'step': 22639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:16.580017', 'step': 22639, 'epoch': 3}
{'type': 'loss', 'content': 0.004103256855159998, 'timestamp': '2025-10-02 00:51:16.590047', 'step': 22640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:16.648509', 'step': 22640, 'epoch': 3}
{'type': 'loss', 'content': 0.02692999877035618, 'timestamp': '2025-10-02 00:51:16.654184', 'step': 22641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:16.714247', 'step': 22641, 'epoch': 3}
{'type': 'loss', 'content': 0.07150810956954956, 'timestamp': '2025-10-02 00:51:16.717788', 'step': 22642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:16.774908', 'step': 22642, 'epoch': 3}
{'type': 'loss', 'content': 0.0322091244161129, 'timestamp': '2025-10-02 00:51:16.777454', 'step': 22643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:16.836250', 'step': 22643, 'epoch': 3}
{'type': 'loss', 'content': 0.012009553611278534, 'timestamp': '2025-10-02 00:51:16.846336', 'step': 22644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:16.903760', 'step': 22644, 'epoch': 3}
{'type': 'loss', 'content': 0.01628984324634075, 'timestamp': '2025-10-02 00:51:16.907043', 'step': 22645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:16.964530', 'step': 22645, 'epoch': 3}
{'type': 'loss', 'content': 0.11105257272720337, 'timestamp': '2025-10-02 00:51:16.967553', 'step': 22646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:17.023998', 'step': 22646, 'epoch': 3}
{'type': 'loss', 'content': 0.053469449281692505, 'timestamp': '2025-10-02 00:51:17.027073', 'step': 22647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:17.084525', 'step': 22647, 'epoch': 3}
{'type': 'loss', 'content': 0.12670281529426575, 'timestamp': '2025-10-02 00:51:17.094856', 'step': 22648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:17.155608', 'step': 22648, 'epoch': 3}
{'type': 'loss', 'content': 0.026435688138008118, 'timestamp': '2025-10-02 00:51:17.159051', 'step': 22649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:17.220704', 'step': 22649, 'epoch': 3}
{'type': 'loss', 'content': 0.05110899358987808, 'timestamp': '2025-10-02 00:51:17.231209', 'step': 22650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:17.288757', 'step': 22650, 'epoch': 3}
{'type': 'loss', 'content': 0.14499790966510773, 'timestamp': '2025-10-02 00:51:17.291920', 'step': 22651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:17.348904', 'step': 22651, 'epoch': 3}
{'type': 'loss', 'content': 0.06607559323310852, 'timestamp': '2025-10-02 00:51:17.355569', 'step': 22652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:17.411435', 'step': 22652, 'epoch': 3}
{'type': 'loss', 'content': 0.10045657306909561, 'timestamp': '2025-10-02 00:51:17.414335', 'step': 22653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:51:17.489526', 'step': 22653, 'epoch': 3}
{'type': 'loss', 'content': 0.021201908588409424, 'timestamp': '2025-10-02 00:51:17.502698', 'step': 22654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:17.558776', 'step': 22654, 'epoch': 3}
{'type': 'loss', 'content': 0.12138035148382187, 'timestamp': '2025-10-02 00:51:17.561452', 'step': 22655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:17.618373', 'step': 22655, 'epoch': 3}
{'type': 'loss', 'content': 0.00986277125775814, 'timestamp': '2025-10-02 00:51:17.626229', 'step': 22656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:17.684347', 'step': 22656, 'epoch': 3}
{'type': 'loss', 'content': 0.08049830794334412, 'timestamp': '2025-10-02 00:51:17.689248', 'step': 22657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:17.756994', 'step': 22657, 'epoch': 3}
{'type': 'loss', 'content': 0.011435899883508682, 'timestamp': '2025-10-02 00:51:17.767411', 'step': 22658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:17.827501', 'step': 22658, 'epoch': 3}
{'type': 'loss', 'content': 0.05658973380923271, 'timestamp': '2025-10-02 00:51:17.830559', 'step': 22659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:17.893905', 'step': 22659, 'epoch': 3}
{'type': 'loss', 'content': 0.03925803676247597, 'timestamp': '2025-10-02 00:51:17.905108', 'step': 22660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:17.962875', 'step': 22660, 'epoch': 3}
{'type': 'loss', 'content': 0.12601149082183838, 'timestamp': '2025-10-02 00:51:17.965530', 'step': 22661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:18.020839', 'step': 22661, 'epoch': 3}
{'type': 'loss', 'content': 0.10271679610013962, 'timestamp': '2025-10-02 00:51:18.023761', 'step': 22662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:18.079432', 'step': 22662, 'epoch': 3}
{'type': 'loss', 'content': 0.017125535756349564, 'timestamp': '2025-10-02 00:51:18.081941', 'step': 22663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:18.138201', 'step': 22663, 'epoch': 3}
{'type': 'loss', 'content': 0.055462196469306946, 'timestamp': '2025-10-02 00:51:18.144483', 'step': 22664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:18.199648', 'step': 22664, 'epoch': 3}
{'type': 'loss', 'content': 0.03496333956718445, 'timestamp': '2025-10-02 00:51:18.205310', 'step': 22665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:18.262874', 'step': 22665, 'epoch': 3}
{'type': 'loss', 'content': 0.054869405925273895, 'timestamp': '2025-10-02 00:51:18.268363', 'step': 22666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:18.324118', 'step': 22666, 'epoch': 3}
{'type': 'loss', 'content': 0.07667666673660278, 'timestamp': '2025-10-02 00:51:18.332542', 'step': 22667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:18.393444', 'step': 22667, 'epoch': 3}
{'type': 'loss', 'content': 0.07707086950540543, 'timestamp': '2025-10-02 00:51:18.403468', 'step': 22668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:18.457769', 'step': 22668, 'epoch': 3}
{'type': 'loss', 'content': 0.018182069063186646, 'timestamp': '2025-10-02 00:51:18.460272', 'step': 22669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:18.514696', 'step': 22669, 'epoch': 3}
{'type': 'loss', 'content': 0.05688359588384628, 'timestamp': '2025-10-02 00:51:18.517645', 'step': 22670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:18.580052', 'step': 22670, 'epoch': 3}
{'type': 'loss', 'content': 0.06110576540231705, 'timestamp': '2025-10-02 00:51:18.590518', 'step': 22671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:18.649542', 'step': 22671, 'epoch': 3}
{'type': 'loss', 'content': 0.03059336729347706, 'timestamp': '2025-10-02 00:51:18.659639', 'step': 22672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:18.715985', 'step': 22672, 'epoch': 3}
{'type': 'loss', 'content': 0.020949672907590866, 'timestamp': '2025-10-02 00:51:18.718982', 'step': 22673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:18.775590', 'step': 22673, 'epoch': 3}
{'type': 'loss', 'content': 0.035743098706007004, 'timestamp': '2025-10-02 00:51:18.778124', 'step': 22674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:18.833006', 'step': 22674, 'epoch': 3}
{'type': 'loss', 'content': 0.06146872416138649, 'timestamp': '2025-10-02 00:51:18.835458', 'step': 22675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:18.891816', 'step': 22675, 'epoch': 3}
{'type': 'loss', 'content': 0.04665832966566086, 'timestamp': '2025-10-02 00:51:18.901991', 'step': 22676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:18.956196', 'step': 22676, 'epoch': 3}
{'type': 'loss', 'content': 0.09572314471006393, 'timestamp': '2025-10-02 00:51:18.959065', 'step': 22677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:19.015163', 'step': 22677, 'epoch': 3}
{'type': 'loss', 'content': 0.03524826094508171, 'timestamp': '2025-10-02 00:51:19.024649', 'step': 22678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:19.083930', 'step': 22678, 'epoch': 3}
{'type': 'loss', 'content': 0.005230552516877651, 'timestamp': '2025-10-02 00:51:19.089659', 'step': 22679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:51:19.159169', 'step': 22679, 'epoch': 3}
{'type': 'loss', 'content': 0.01284459326416254, 'timestamp': '2025-10-02 00:51:19.171910', 'step': 22680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:19.228190', 'step': 22680, 'epoch': 3}
{'type': 'loss', 'content': 0.003339222399517894, 'timestamp': '2025-10-02 00:51:19.235575', 'step': 22681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:19.294528', 'step': 22681, 'epoch': 3}
{'type': 'loss', 'content': 0.03399529680609703, 'timestamp': '2025-10-02 00:51:19.297027', 'step': 22682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:19.352440', 'step': 22682, 'epoch': 3}
{'type': 'loss', 'content': 0.10508190095424652, 'timestamp': '2025-10-02 00:51:19.354807', 'step': 22683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:19.410128', 'step': 22683, 'epoch': 3}
{'type': 'loss', 'content': 0.10989997535943985, 'timestamp': '2025-10-02 00:51:19.418005', 'step': 22684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:19.473146', 'step': 22684, 'epoch': 3}
{'type': 'loss', 'content': 0.06822372227907181, 'timestamp': '2025-10-02 00:51:19.478908', 'step': 22685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:19.536264', 'step': 22685, 'epoch': 3}
{'type': 'loss', 'content': 0.07082363218069077, 'timestamp': '2025-10-02 00:51:19.545550', 'step': 22686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:19.601791', 'step': 22686, 'epoch': 3}
{'type': 'loss', 'content': 0.11977480351924896, 'timestamp': '2025-10-02 00:51:19.604105', 'step': 22687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:19.659384', 'step': 22687, 'epoch': 3}
{'type': 'loss', 'content': 0.009481808170676231, 'timestamp': '2025-10-02 00:51:19.665299', 'step': 22688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:19.720981', 'step': 22688, 'epoch': 3}
{'type': 'loss', 'content': 0.03098444826900959, 'timestamp': '2025-10-02 00:51:19.723450', 'step': 22689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:19.779593', 'step': 22689, 'epoch': 3}
{'type': 'loss', 'content': 0.012433906085789204, 'timestamp': '2025-10-02 00:51:19.786826', 'step': 22690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:19.841702', 'step': 22690, 'epoch': 3}
{'type': 'loss', 'content': 0.05556534230709076, 'timestamp': '2025-10-02 00:51:19.844343', 'step': 22691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:19.900194', 'step': 22691, 'epoch': 3}
{'type': 'loss', 'content': 0.019729850813746452, 'timestamp': '2025-10-02 00:51:19.906182', 'step': 22692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:19.960784', 'step': 22692, 'epoch': 3}
{'type': 'loss', 'content': 0.05204971879720688, 'timestamp': '2025-10-02 00:51:19.964021', 'step': 22693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:20.019408', 'step': 22693, 'epoch': 3}
{'type': 'loss', 'content': 0.07452628016471863, 'timestamp': '2025-10-02 00:51:20.021911', 'step': 22694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:20.077529', 'step': 22694, 'epoch': 3}
{'type': 'loss', 'content': 0.0038770355749875307, 'timestamp': '2025-10-02 00:51:20.086586', 'step': 22695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:20.141424', 'step': 22695, 'epoch': 3}
{'type': 'loss', 'content': 0.11318760365247726, 'timestamp': '2025-10-02 00:51:20.148056', 'step': 22696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:20.202674', 'step': 22696, 'epoch': 3}
{'type': 'loss', 'content': 0.0549129992723465, 'timestamp': '2025-10-02 00:51:20.210008', 'step': 22697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:20.265543', 'step': 22697, 'epoch': 3}
{'type': 'loss', 'content': 0.019197892397642136, 'timestamp': '2025-10-02 00:51:20.268262', 'step': 22698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:20.328529', 'step': 22698, 'epoch': 3}
{'type': 'loss', 'content': 0.07085441052913666, 'timestamp': '2025-10-02 00:51:20.338672', 'step': 22699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:20.394162', 'step': 22699, 'epoch': 3}
{'type': 'loss', 'content': 0.08673590421676636, 'timestamp': '2025-10-02 00:51:20.400315', 'step': 22700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:20.454418', 'step': 22700, 'epoch': 3}
{'type': 'loss', 'content': 0.025765584781765938, 'timestamp': '2025-10-02 00:51:20.461604', 'step': 22701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:20.517907', 'step': 22701, 'epoch': 3}
{'type': 'loss', 'content': 0.0782792940735817, 'timestamp': '2025-10-02 00:51:20.520352', 'step': 22702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:20.582656', 'step': 22702, 'epoch': 3}
{'type': 'loss', 'content': 0.03439779952168465, 'timestamp': '2025-10-02 00:51:20.593074', 'step': 22703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:20.650304', 'step': 22703, 'epoch': 3}
{'type': 'loss', 'content': 0.02580218017101288, 'timestamp': '2025-10-02 00:51:20.660590', 'step': 22704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:20.721276', 'step': 22704, 'epoch': 3}
{'type': 'loss', 'content': 0.013510537333786488, 'timestamp': '2025-10-02 00:51:20.724229', 'step': 22705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:20.780543', 'step': 22705, 'epoch': 3}
{'type': 'loss', 'content': 0.05600147321820259, 'timestamp': '2025-10-02 00:51:20.783049', 'step': 22706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:20.837479', 'step': 22706, 'epoch': 3}
{'type': 'loss', 'content': 0.03940323740243912, 'timestamp': '2025-10-02 00:51:20.840252', 'step': 22707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:20.899799', 'step': 22707, 'epoch': 3}
{'type': 'loss', 'content': 0.04747774824500084, 'timestamp': '2025-10-02 00:51:20.910739', 'step': 22708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:20.965209', 'step': 22708, 'epoch': 3}
{'type': 'loss', 'content': 0.10471174865961075, 'timestamp': '2025-10-02 00:51:20.967793', 'step': 22709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:21.024036', 'step': 22709, 'epoch': 3}
{'type': 'loss', 'content': 0.06432934105396271, 'timestamp': '2025-10-02 00:51:21.026634', 'step': 22710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:21.080965', 'step': 22710, 'epoch': 3}
{'type': 'loss', 'content': 0.05044025927782059, 'timestamp': '2025-10-02 00:51:21.084041', 'step': 22711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:21.139305', 'step': 22711, 'epoch': 3}
{'type': 'loss', 'content': 0.024306127801537514, 'timestamp': '2025-10-02 00:51:21.145605', 'step': 22712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:21.200004', 'step': 22712, 'epoch': 3}
{'type': 'loss', 'content': 0.09918876737356186, 'timestamp': '2025-10-02 00:51:21.202404', 'step': 22713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:21.257016', 'step': 22713, 'epoch': 3}
{'type': 'loss', 'content': 0.05764550343155861, 'timestamp': '2025-10-02 00:51:21.259473', 'step': 22714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:21.313743', 'step': 22714, 'epoch': 3}
{'type': 'loss', 'content': 0.03342067450284958, 'timestamp': '2025-10-02 00:51:21.316207', 'step': 22715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:21.377760', 'step': 22715, 'epoch': 3}
{'type': 'loss', 'content': 0.01262163370847702, 'timestamp': '2025-10-02 00:51:21.388937', 'step': 22716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:21.442923', 'step': 22716, 'epoch': 3}
{'type': 'loss', 'content': 0.048788659274578094, 'timestamp': '2025-10-02 00:51:21.445377', 'step': 22717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:21.499687', 'step': 22717, 'epoch': 3}
{'type': 'loss', 'content': 0.03775060549378395, 'timestamp': '2025-10-02 00:51:21.505319', 'step': 22718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:21.560711', 'step': 22718, 'epoch': 3}
{'type': 'loss', 'content': 0.0920492634177208, 'timestamp': '2025-10-02 00:51:21.563158', 'step': 22719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:21.617893', 'step': 22719, 'epoch': 3}
{'type': 'loss', 'content': 0.0453861765563488, 'timestamp': '2025-10-02 00:51:21.623892', 'step': 22720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:21.679484', 'step': 22720, 'epoch': 3}
{'type': 'loss', 'content': 0.07356526702642441, 'timestamp': '2025-10-02 00:51:21.681828', 'step': 22721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:21.736454', 'step': 22721, 'epoch': 3}
{'type': 'loss', 'content': 0.10636722296476364, 'timestamp': '2025-10-02 00:51:21.742018', 'step': 22722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:21.796434', 'step': 22722, 'epoch': 3}
{'type': 'loss', 'content': 0.03334904834628105, 'timestamp': '2025-10-02 00:51:21.799332', 'step': 22723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:21.855244', 'step': 22723, 'epoch': 3}
{'type': 'loss', 'content': 0.06755612045526505, 'timestamp': '2025-10-02 00:51:21.861455', 'step': 22724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:21.915946', 'step': 22724, 'epoch': 3}
{'type': 'loss', 'content': 0.10034924000501633, 'timestamp': '2025-10-02 00:51:21.918804', 'step': 22725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:21.973552', 'step': 22725, 'epoch': 3}
{'type': 'loss', 'content': 0.03538728132843971, 'timestamp': '2025-10-02 00:51:21.980895', 'step': 22726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:22.037446', 'step': 22726, 'epoch': 3}
{'type': 'loss', 'content': 0.05581256374716759, 'timestamp': '2025-10-02 00:51:22.039913', 'step': 22727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:22.094831', 'step': 22727, 'epoch': 3}
{'type': 'loss', 'content': 0.04022989049553871, 'timestamp': '2025-10-02 00:51:22.100949', 'step': 22728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:22.155668', 'step': 22728, 'epoch': 3}
{'type': 'loss', 'content': 0.01767650432884693, 'timestamp': '2025-10-02 00:51:22.158470', 'step': 22729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:22.214373', 'step': 22729, 'epoch': 3}
{'type': 'loss', 'content': 0.029911402612924576, 'timestamp': '2025-10-02 00:51:22.219873', 'step': 22730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:22.274842', 'step': 22730, 'epoch': 3}
{'type': 'loss', 'content': 0.025435375049710274, 'timestamp': '2025-10-02 00:51:22.281949', 'step': 22731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:22.338020', 'step': 22731, 'epoch': 3}
{'type': 'loss', 'content': 0.0477156862616539, 'timestamp': '2025-10-02 00:51:22.344002', 'step': 22732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:22.398707', 'step': 22732, 'epoch': 3}
{'type': 'loss', 'content': 0.0344778373837471, 'timestamp': '2025-10-02 00:51:22.408280', 'step': 22733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:22.462135', 'step': 22733, 'epoch': 3}
{'type': 'loss', 'content': 0.08390136063098907, 'timestamp': '2025-10-02 00:51:22.464457', 'step': 22734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:22.520255', 'step': 22734, 'epoch': 3}
{'type': 'loss', 'content': 0.09278495609760284, 'timestamp': '2025-10-02 00:51:22.529753', 'step': 22735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:51:22.600276', 'step': 22735, 'epoch': 3}
{'type': 'loss', 'content': 0.013208786956965923, 'timestamp': '2025-10-02 00:51:22.613490', 'step': 22736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:22.674375', 'step': 22736, 'epoch': 3}
{'type': 'loss', 'content': 0.06833302974700928, 'timestamp': '2025-10-02 00:51:22.685663', 'step': 22737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:22.741368', 'step': 22737, 'epoch': 3}
{'type': 'loss', 'content': 0.04683597385883331, 'timestamp': '2025-10-02 00:51:22.744467', 'step': 22738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:22.800201', 'step': 22738, 'epoch': 3}
{'type': 'loss', 'content': 0.0351204052567482, 'timestamp': '2025-10-02 00:51:22.807347', 'step': 22739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:22.862619', 'step': 22739, 'epoch': 3}
{'type': 'loss', 'content': 0.058656640350818634, 'timestamp': '2025-10-02 00:51:22.868496', 'step': 22740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:22.922605', 'step': 22740, 'epoch': 3}
{'type': 'loss', 'content': 0.1501684933900833, 'timestamp': '2025-10-02 00:51:22.925079', 'step': 22741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:22.981021', 'step': 22741, 'epoch': 3}
{'type': 'loss', 'content': 0.0363185852766037, 'timestamp': '2025-10-02 00:51:22.988512', 'step': 22742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:23.045294', 'step': 22742, 'epoch': 3}
{'type': 'loss', 'content': 0.04419006407260895, 'timestamp': '2025-10-02 00:51:23.054782', 'step': 22743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:23.109541', 'step': 22743, 'epoch': 3}
{'type': 'loss', 'content': 0.018423955887556076, 'timestamp': '2025-10-02 00:51:23.116093', 'step': 22744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:23.171583', 'step': 22744, 'epoch': 3}
{'type': 'loss', 'content': 0.022960104048252106, 'timestamp': '2025-10-02 00:51:23.181856', 'step': 22745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:23.236581', 'step': 22745, 'epoch': 3}
{'type': 'loss', 'content': 0.12607118487358093, 'timestamp': '2025-10-02 00:51:23.239068', 'step': 22746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:23.294052', 'step': 22746, 'epoch': 3}
{'type': 'loss', 'content': 0.0443803071975708, 'timestamp': '2025-10-02 00:51:23.299585', 'step': 22747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:23.353992', 'step': 22747, 'epoch': 3}
{'type': 'loss', 'content': 0.040553018450737, 'timestamp': '2025-10-02 00:51:23.360191', 'step': 22748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:23.413539', 'step': 22748, 'epoch': 3}
{'type': 'loss', 'content': 0.07217331230640411, 'timestamp': '2025-10-02 00:51:23.415923', 'step': 22749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:23.470078', 'step': 22749, 'epoch': 3}
{'type': 'loss', 'content': 0.05940130352973938, 'timestamp': '2025-10-02 00:51:23.472533', 'step': 22750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:23.527231', 'step': 22750, 'epoch': 3}
{'type': 'loss', 'content': 0.08200767636299133, 'timestamp': '2025-10-02 00:51:23.529790', 'step': 22751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:23.584534', 'step': 22751, 'epoch': 3}
{'type': 'loss', 'content': 0.03720726817846298, 'timestamp': '2025-10-02 00:51:23.590937', 'step': 22752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:23.654863', 'step': 22752, 'epoch': 3}
{'type': 'loss', 'content': 0.07892608642578125, 'timestamp': '2025-10-02 00:51:23.663527', 'step': 22753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:23.720924', 'step': 22753, 'epoch': 3}
{'type': 'loss', 'content': 0.04172086715698242, 'timestamp': '2025-10-02 00:51:23.726571', 'step': 22754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:23.783128', 'step': 22754, 'epoch': 3}
{'type': 'loss', 'content': 0.04354437068104744, 'timestamp': '2025-10-02 00:51:23.785585', 'step': 22755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:23.841536', 'step': 22755, 'epoch': 3}
{'type': 'loss', 'content': 0.03617733344435692, 'timestamp': '2025-10-02 00:51:23.851796', 'step': 22756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:23.905921', 'step': 22756, 'epoch': 3}
{'type': 'loss', 'content': 0.01855289936065674, 'timestamp': '2025-10-02 00:51:23.911607', 'step': 22757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:23.966252', 'step': 22757, 'epoch': 3}
{'type': 'loss', 'content': 0.046844083815813065, 'timestamp': '2025-10-02 00:51:23.973474', 'step': 22758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:24.028244', 'step': 22758, 'epoch': 3}
{'type': 'loss', 'content': 0.12044990807771683, 'timestamp': '2025-10-02 00:51:24.030783', 'step': 22759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:24.085870', 'step': 22759, 'epoch': 3}
{'type': 'loss', 'content': 0.040142204612493515, 'timestamp': '2025-10-02 00:51:24.095949', 'step': 22760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:24.150500', 'step': 22760, 'epoch': 3}
{'type': 'loss', 'content': 0.0888698548078537, 'timestamp': '2025-10-02 00:51:24.153151', 'step': 22761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:24.207883', 'step': 22761, 'epoch': 3}
{'type': 'loss', 'content': 0.04096970334649086, 'timestamp': '2025-10-02 00:51:24.215158', 'step': 22762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:24.272896', 'step': 22762, 'epoch': 3}
{'type': 'loss', 'content': 0.018048858270049095, 'timestamp': '2025-10-02 00:51:24.279954', 'step': 22763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:24.334779', 'step': 22763, 'epoch': 3}
{'type': 'loss', 'content': 0.06198933720588684, 'timestamp': '2025-10-02 00:51:24.341143', 'step': 22764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:24.395858', 'step': 22764, 'epoch': 3}
{'type': 'loss', 'content': 0.04649011418223381, 'timestamp': '2025-10-02 00:51:24.398368', 'step': 22765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:24.453138', 'step': 22765, 'epoch': 3}
{'type': 'loss', 'content': 0.04065842181444168, 'timestamp': '2025-10-02 00:51:24.460467', 'step': 22766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:24.515700', 'step': 22766, 'epoch': 3}
{'type': 'loss', 'content': 0.020246921107172966, 'timestamp': '2025-10-02 00:51:24.524783', 'step': 22767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:24.579907', 'step': 22767, 'epoch': 3}
{'type': 'loss', 'content': 0.0328269898891449, 'timestamp': '2025-10-02 00:51:24.586289', 'step': 22768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:24.640636', 'step': 22768, 'epoch': 3}
{'type': 'loss', 'content': 0.03539784997701645, 'timestamp': '2025-10-02 00:51:24.643409', 'step': 22769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:24.699486', 'step': 22769, 'epoch': 3}
{'type': 'loss', 'content': 0.017506049945950508, 'timestamp': '2025-10-02 00:51:24.708955', 'step': 22770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:24.764009', 'step': 22770, 'epoch': 3}
{'type': 'loss', 'content': 0.012914593331515789, 'timestamp': '2025-10-02 00:51:24.766536', 'step': 22771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:24.820961', 'step': 22771, 'epoch': 3}
{'type': 'loss', 'content': 0.054983533918857574, 'timestamp': '2025-10-02 00:51:24.831079', 'step': 22772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:24.886351', 'step': 22772, 'epoch': 3}
{'type': 'loss', 'content': 0.06732800602912903, 'timestamp': '2025-10-02 00:51:24.888840', 'step': 22773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:24.943877', 'step': 22773, 'epoch': 3}
{'type': 'loss', 'content': 0.10639939457178116, 'timestamp': '2025-10-02 00:51:24.946486', 'step': 22774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:25.001271', 'step': 22774, 'epoch': 3}
{'type': 'loss', 'content': 0.06194140762090683, 'timestamp': '2025-10-02 00:51:25.006787', 'step': 22775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:25.061992', 'step': 22775, 'epoch': 3}
{'type': 'loss', 'content': 0.04060736671090126, 'timestamp': '2025-10-02 00:51:25.067896', 'step': 22776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:25.122439', 'step': 22776, 'epoch': 3}
{'type': 'loss', 'content': 0.059272680431604385, 'timestamp': '2025-10-02 00:51:25.125111', 'step': 22777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:25.179914', 'step': 22777, 'epoch': 3}
{'type': 'loss', 'content': 0.024868961423635483, 'timestamp': '2025-10-02 00:51:25.189174', 'step': 22778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:25.244761', 'step': 22778, 'epoch': 3}
{'type': 'loss', 'content': 0.12581337988376617, 'timestamp': '2025-10-02 00:51:25.253109', 'step': 22779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:25.313960', 'step': 22779, 'epoch': 3}
{'type': 'loss', 'content': 0.06600655615329742, 'timestamp': '2025-10-02 00:51:25.320121', 'step': 22780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:25.374606', 'step': 22780, 'epoch': 3}
{'type': 'loss', 'content': 0.03148830682039261, 'timestamp': '2025-10-02 00:51:25.377251', 'step': 22781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:25.431888', 'step': 22781, 'epoch': 3}
{'type': 'loss', 'content': 0.037981029599905014, 'timestamp': '2025-10-02 00:51:25.434506', 'step': 22782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:25.488632', 'step': 22782, 'epoch': 3}
{'type': 'loss', 'content': 0.02578987181186676, 'timestamp': '2025-10-02 00:51:25.490980', 'step': 22783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:25.552259', 'step': 22783, 'epoch': 3}
{'type': 'loss', 'content': 0.06460723280906677, 'timestamp': '2025-10-02 00:51:25.563515', 'step': 22784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:25.617401', 'step': 22784, 'epoch': 3}
{'type': 'loss', 'content': 0.11177822947502136, 'timestamp': '2025-10-02 00:51:25.619680', 'step': 22785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:25.673767', 'step': 22785, 'epoch': 3}
{'type': 'loss', 'content': 0.037557102739810944, 'timestamp': '2025-10-02 00:51:25.677674', 'step': 22786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:25.735108', 'step': 22786, 'epoch': 3}
{'type': 'loss', 'content': 0.03554346039891243, 'timestamp': '2025-10-02 00:51:25.737606', 'step': 22787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:25.792423', 'step': 22787, 'epoch': 3}
{'type': 'loss', 'content': 0.08077128231525421, 'timestamp': '2025-10-02 00:51:25.801947', 'step': 22788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:25.859135', 'step': 22788, 'epoch': 3}
{'type': 'loss', 'content': 0.06618117541074753, 'timestamp': '2025-10-02 00:51:25.861529', 'step': 22789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:25.922445', 'step': 22789, 'epoch': 3}
{'type': 'loss', 'content': 0.05354394391179085, 'timestamp': '2025-10-02 00:51:25.925534', 'step': 22790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:25.982941', 'step': 22790, 'epoch': 3}
{'type': 'loss', 'content': 0.05774739384651184, 'timestamp': '2025-10-02 00:51:25.992462', 'step': 22791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:26.051443', 'step': 22791, 'epoch': 3}
{'type': 'loss', 'content': 0.030516713857650757, 'timestamp': '2025-10-02 00:51:26.064914', 'step': 22792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:26.121162', 'step': 22792, 'epoch': 3}
{'type': 'loss', 'content': 0.010302950628101826, 'timestamp': '2025-10-02 00:51:26.126461', 'step': 22793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:26.186195', 'step': 22793, 'epoch': 3}
{'type': 'loss', 'content': 0.010265935212373734, 'timestamp': '2025-10-02 00:51:26.189044', 'step': 22794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:26.244955', 'step': 22794, 'epoch': 3}
{'type': 'loss', 'content': 0.05650755390524864, 'timestamp': '2025-10-02 00:51:26.249976', 'step': 22795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:26.305386', 'step': 22795, 'epoch': 3}
{'type': 'loss', 'content': 0.02365819737315178, 'timestamp': '2025-10-02 00:51:26.313304', 'step': 22796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:26.370738', 'step': 22796, 'epoch': 3}
{'type': 'loss', 'content': 0.02775256149470806, 'timestamp': '2025-10-02 00:51:26.373141', 'step': 22797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:26.429735', 'step': 22797, 'epoch': 3}
{'type': 'loss', 'content': 0.053506046533584595, 'timestamp': '2025-10-02 00:51:26.432802', 'step': 22798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:26.490841', 'step': 22798, 'epoch': 3}
{'type': 'loss', 'content': 0.032437775284051895, 'timestamp': '2025-10-02 00:51:26.494434', 'step': 22799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:26.551622', 'step': 22799, 'epoch': 3}
{'type': 'loss', 'content': 0.0214333888143301, 'timestamp': '2025-10-02 00:51:26.558685', 'step': 22800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:26.616588', 'step': 22800, 'epoch': 3}
{'type': 'loss', 'content': 0.019630515947937965, 'timestamp': '2025-10-02 00:51:26.620642', 'step': 22801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:26.690918', 'step': 22801, 'epoch': 3}
{'type': 'loss', 'content': 0.09398622065782547, 'timestamp': '2025-10-02 00:51:26.699734', 'step': 22802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:26.782238', 'step': 22802, 'epoch': 3}
{'type': 'loss', 'content': 0.1805701106786728, 'timestamp': '2025-10-02 00:51:26.786134', 'step': 22803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:26.842279', 'step': 22803, 'epoch': 3}
{'type': 'loss', 'content': 0.0771428793668747, 'timestamp': '2025-10-02 00:51:26.850943', 'step': 22804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:26.924802', 'step': 22804, 'epoch': 3}
{'type': 'loss', 'content': 0.01144733838737011, 'timestamp': '2025-10-02 00:51:26.930250', 'step': 22805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:51:27.000020', 'step': 22805, 'epoch': 3}
{'type': 'loss', 'content': 0.020319979637861252, 'timestamp': '2025-10-02 00:51:27.011962', 'step': 22806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:27.070872', 'step': 22806, 'epoch': 3}
{'type': 'loss', 'content': 0.046956486999988556, 'timestamp': '2025-10-02 00:51:27.079643', 'step': 22807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:27.136637', 'step': 22807, 'epoch': 3}
{'type': 'loss', 'content': 0.07593000680208206, 'timestamp': '2025-10-02 00:51:27.143658', 'step': 22808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:27.199072', 'step': 22808, 'epoch': 3}
{'type': 'loss', 'content': 0.09502004832029343, 'timestamp': '2025-10-02 00:51:27.202459', 'step': 22809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:27.258582', 'step': 22809, 'epoch': 3}
{'type': 'loss', 'content': 0.058919891715049744, 'timestamp': '2025-10-02 00:51:27.261852', 'step': 22810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:27.318811', 'step': 22810, 'epoch': 3}
{'type': 'loss', 'content': 0.0220040176063776, 'timestamp': '2025-10-02 00:51:27.324431', 'step': 22811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:51:27.398248', 'step': 22811, 'epoch': 3}
{'type': 'loss', 'content': 0.005634521134197712, 'timestamp': '2025-10-02 00:51:27.411639', 'step': 22812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:27.468883', 'step': 22812, 'epoch': 3}
{'type': 'loss', 'content': 0.04977568984031677, 'timestamp': '2025-10-02 00:51:27.475807', 'step': 22813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:27.533198', 'step': 22813, 'epoch': 3}
{'type': 'loss', 'content': 0.10676531493663788, 'timestamp': '2025-10-02 00:51:27.536448', 'step': 22814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:27.592424', 'step': 22814, 'epoch': 3}
{'type': 'loss', 'content': 0.13343368470668793, 'timestamp': '2025-10-02 00:51:27.595728', 'step': 22815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:27.652998', 'step': 22815, 'epoch': 3}
{'type': 'loss', 'content': 0.029399121180176735, 'timestamp': '2025-10-02 00:51:27.661034', 'step': 22816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:27.717313', 'step': 22816, 'epoch': 3}
{'type': 'loss', 'content': 0.04901978373527527, 'timestamp': '2025-10-02 00:51:27.724219', 'step': 22817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:27.783421', 'step': 22817, 'epoch': 3}
{'type': 'loss', 'content': 0.13486768305301666, 'timestamp': '2025-10-02 00:51:27.790237', 'step': 22818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:27.846858', 'step': 22818, 'epoch': 3}
{'type': 'loss', 'content': 0.040287215262651443, 'timestamp': '2025-10-02 00:51:27.850553', 'step': 22819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:27.907995', 'step': 22819, 'epoch': 3}
{'type': 'loss', 'content': 0.1203504428267479, 'timestamp': '2025-10-02 00:51:27.914047', 'step': 22820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:51:27.975442', 'step': 22820, 'epoch': 3}
{'type': 'loss', 'content': 0.01310777384787798, 'timestamp': '2025-10-02 00:51:27.987194', 'step': 22821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:28.043781', 'step': 22821, 'epoch': 3}
{'type': 'loss', 'content': 0.046790603548288345, 'timestamp': '2025-10-02 00:51:28.049845', 'step': 22822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:28.104273', 'step': 22822, 'epoch': 3}
{'type': 'loss', 'content': 0.014175943098962307, 'timestamp': '2025-10-02 00:51:28.107054', 'step': 22823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:28.162699', 'step': 22823, 'epoch': 3}
{'type': 'loss', 'content': 0.04656292498111725, 'timestamp': '2025-10-02 00:51:28.168814', 'step': 22824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:28.223663', 'step': 22824, 'epoch': 3}
{'type': 'loss', 'content': 0.07070865482091904, 'timestamp': '2025-10-02 00:51:28.226156', 'step': 22825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:28.281916', 'step': 22825, 'epoch': 3}
{'type': 'loss', 'content': 0.03774800896644592, 'timestamp': '2025-10-02 00:51:28.284460', 'step': 22826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:28.340951', 'step': 22826, 'epoch': 3}
{'type': 'loss', 'content': 0.0271182619035244, 'timestamp': '2025-10-02 00:51:28.350461', 'step': 22827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:28.405445', 'step': 22827, 'epoch': 3}
{'type': 'loss', 'content': 0.10288715362548828, 'timestamp': '2025-10-02 00:51:28.411457', 'step': 22828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:28.465417', 'step': 22828, 'epoch': 3}
{'type': 'loss', 'content': 0.06042087450623512, 'timestamp': '2025-10-02 00:51:28.468742', 'step': 22829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:28.530193', 'step': 22829, 'epoch': 3}
{'type': 'loss', 'content': 0.028249545022845268, 'timestamp': '2025-10-02 00:51:28.540639', 'step': 22830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:28.595638', 'step': 22830, 'epoch': 3}
{'type': 'loss', 'content': 0.1474132239818573, 'timestamp': '2025-10-02 00:51:28.598119', 'step': 22831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:28.652183', 'step': 22831, 'epoch': 3}
{'type': 'loss', 'content': 0.07966917753219604, 'timestamp': '2025-10-02 00:51:28.658275', 'step': 22832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:28.712762', 'step': 22832, 'epoch': 3}
{'type': 'loss', 'content': 0.029800571501255035, 'timestamp': '2025-10-02 00:51:28.715358', 'step': 22833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:28.769921', 'step': 22833, 'epoch': 3}
{'type': 'loss', 'content': 0.07428232580423355, 'timestamp': '2025-10-02 00:51:28.772517', 'step': 22834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:28.827329', 'step': 22834, 'epoch': 3}
{'type': 'loss', 'content': 0.08877203613519669, 'timestamp': '2025-10-02 00:51:28.830214', 'step': 22835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:28.884025', 'step': 22835, 'epoch': 3}
{'type': 'loss', 'content': 0.1526583582162857, 'timestamp': '2025-10-02 00:51:28.890181', 'step': 22836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:28.944261', 'step': 22836, 'epoch': 3}
{'type': 'loss', 'content': 0.058925483375787735, 'timestamp': '2025-10-02 00:51:28.954500', 'step': 22837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:29.009857', 'step': 22837, 'epoch': 3}
{'type': 'loss', 'content': 0.02050120197236538, 'timestamp': '2025-10-02 00:51:29.019011', 'step': 22838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:29.073707', 'step': 22838, 'epoch': 3}
{'type': 'loss', 'content': 0.09382113814353943, 'timestamp': '2025-10-02 00:51:29.076817', 'step': 22839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:29.131056', 'step': 22839, 'epoch': 3}
{'type': 'loss', 'content': 0.2957143187522888, 'timestamp': '2025-10-02 00:51:29.137515', 'step': 22840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:29.191436', 'step': 22840, 'epoch': 3}
{'type': 'loss', 'content': 0.06521545350551605, 'timestamp': '2025-10-02 00:51:29.194219', 'step': 22841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:29.248110', 'step': 22841, 'epoch': 3}
{'type': 'loss', 'content': 0.09107037633657455, 'timestamp': '2025-10-02 00:51:29.250542', 'step': 22842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:29.305645', 'step': 22842, 'epoch': 3}
{'type': 'loss', 'content': 0.03218109905719757, 'timestamp': '2025-10-02 00:51:29.308051', 'step': 22843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:29.362545', 'step': 22843, 'epoch': 3}
{'type': 'loss', 'content': 0.06515327095985413, 'timestamp': '2025-10-02 00:51:29.368619', 'step': 22844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:29.423275', 'step': 22844, 'epoch': 3}
{'type': 'loss', 'content': 0.016370166093111038, 'timestamp': '2025-10-02 00:51:29.425796', 'step': 22845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:29.480874', 'step': 22845, 'epoch': 3}
{'type': 'loss', 'content': 0.002844960195943713, 'timestamp': '2025-10-02 00:51:29.490247', 'step': 22846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:29.544423', 'step': 22846, 'epoch': 3}
{'type': 'loss', 'content': 0.060822825878858566, 'timestamp': '2025-10-02 00:51:29.551753', 'step': 22847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:29.606292', 'step': 22847, 'epoch': 3}
{'type': 'loss', 'content': 0.10053404420614243, 'timestamp': '2025-10-02 00:51:29.612624', 'step': 22848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:29.666326', 'step': 22848, 'epoch': 3}
{'type': 'loss', 'content': 0.14123104512691498, 'timestamp': '2025-10-02 00:51:29.668842', 'step': 22849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:51:29.730362', 'step': 22849, 'epoch': 3}
{'type': 'loss', 'content': 0.038713205605745316, 'timestamp': '2025-10-02 00:51:29.741013', 'step': 22850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:29.796457', 'step': 22850, 'epoch': 3}
{'type': 'loss', 'content': 0.025224760174751282, 'timestamp': '2025-10-02 00:51:29.799116', 'step': 22851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:29.853344', 'step': 22851, 'epoch': 3}
{'type': 'loss', 'content': 0.1151079311966896, 'timestamp': '2025-10-02 00:51:29.861511', 'step': 22852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:29.916498', 'step': 22852, 'epoch': 3}
{'type': 'loss', 'content': 0.04670635238289833, 'timestamp': '2025-10-02 00:51:29.919694', 'step': 22853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:29.975854', 'step': 22853, 'epoch': 3}
{'type': 'loss', 'content': 0.025777094066143036, 'timestamp': '2025-10-02 00:51:29.983230', 'step': 22854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:30.039551', 'step': 22854, 'epoch': 3}
{'type': 'loss', 'content': 0.025468558073043823, 'timestamp': '2025-10-02 00:51:30.045109', 'step': 22855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:30.099528', 'step': 22855, 'epoch': 3}
{'type': 'loss', 'content': 0.05801410973072052, 'timestamp': '2025-10-02 00:51:30.107493', 'step': 22856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:30.160700', 'step': 22856, 'epoch': 3}
{'type': 'loss', 'content': 0.1221846267580986, 'timestamp': '2025-10-02 00:51:30.163050', 'step': 22857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:30.217032', 'step': 22857, 'epoch': 3}
{'type': 'loss', 'content': 0.08958590775728226, 'timestamp': '2025-10-02 00:51:30.219678', 'step': 22858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:30.281079', 'step': 22858, 'epoch': 3}
{'type': 'loss', 'content': 0.026676496490836143, 'timestamp': '2025-10-02 00:51:30.291540', 'step': 22859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:30.350182', 'step': 22859, 'epoch': 3}
{'type': 'loss', 'content': 0.0603083074092865, 'timestamp': '2025-10-02 00:51:30.356522', 'step': 22860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:51:30.410370', 'step': 22860, 'epoch': 3}
{'type': 'loss', 'content': 0.05215662345290184, 'timestamp': '2025-10-02 00:51:30.412575', 'step': 22861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:30.467089', 'step': 22861, 'epoch': 3}
{'type': 'loss', 'content': 0.030642036348581314, 'timestamp': '2025-10-02 00:51:30.469678', 'step': 22862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:51:30.543628', 'step': 22862, 'epoch': 3}
{'type': 'loss', 'content': 0.025692226365208626, 'timestamp': '2025-10-02 00:51:30.556775', 'step': 22863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:30.611047', 'step': 22863, 'epoch': 3}
{'type': 'loss', 'content': 0.059327125549316406, 'timestamp': '2025-10-02 00:51:30.617328', 'step': 22864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:30.671439', 'step': 22864, 'epoch': 3}
{'type': 'loss', 'content': 0.014275754801928997, 'timestamp': '2025-10-02 00:51:30.673786', 'step': 22865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:30.728379', 'step': 22865, 'epoch': 3}
{'type': 'loss', 'content': 0.10721002519130707, 'timestamp': '2025-10-02 00:51:30.730951', 'step': 22866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:30.786267', 'step': 22866, 'epoch': 3}
{'type': 'loss', 'content': 0.04204064980149269, 'timestamp': '2025-10-02 00:51:30.789057', 'step': 22867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:30.845050', 'step': 22867, 'epoch': 3}
{'type': 'loss', 'content': 0.026422899216413498, 'timestamp': '2025-10-02 00:51:30.855168', 'step': 22868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:30.909145', 'step': 22868, 'epoch': 3}
{'type': 'loss', 'content': 0.001518887234851718, 'timestamp': '2025-10-02 00:51:30.918524', 'step': 22869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:30.973691', 'step': 22869, 'epoch': 3}
{'type': 'loss', 'content': 0.0014205947518348694, 'timestamp': '2025-10-02 00:51:30.982911', 'step': 22870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:31.038013', 'step': 22870, 'epoch': 3}
{'type': 'loss', 'content': 0.05358763784170151, 'timestamp': '2025-10-02 00:51:31.040518', 'step': 22871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:31.095336', 'step': 22871, 'epoch': 3}
{'type': 'loss', 'content': 0.10106410831212997, 'timestamp': '2025-10-02 00:51:31.105289', 'step': 22872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:31.158987', 'step': 22872, 'epoch': 3}
{'type': 'loss', 'content': 0.018421484157443047, 'timestamp': '2025-10-02 00:51:31.164562', 'step': 22873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:31.219165', 'step': 22873, 'epoch': 3}
{'type': 'loss', 'content': 0.11937171220779419, 'timestamp': '2025-10-02 00:51:31.221434', 'step': 22874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:31.275946', 'step': 22874, 'epoch': 3}
{'type': 'loss', 'content': 0.0990699753165245, 'timestamp': '2025-10-02 00:51:31.278445', 'step': 22875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:31.332902', 'step': 22875, 'epoch': 3}
{'type': 'loss', 'content': 0.05296565592288971, 'timestamp': '2025-10-02 00:51:31.342982', 'step': 22876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:31.397519', 'step': 22876, 'epoch': 3}
{'type': 'loss', 'content': 0.04046780243515968, 'timestamp': '2025-10-02 00:51:31.400043', 'step': 22877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:31.454437', 'step': 22877, 'epoch': 3}
{'type': 'loss', 'content': 0.09862332046031952, 'timestamp': '2025-10-02 00:51:31.456937', 'step': 22878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:31.511355', 'step': 22878, 'epoch': 3}
{'type': 'loss', 'content': 0.14574338495731354, 'timestamp': '2025-10-02 00:51:31.513852', 'step': 22879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:31.573220', 'step': 22879, 'epoch': 3}
{'type': 'loss', 'content': 0.03421587869524956, 'timestamp': '2025-10-02 00:51:31.581183', 'step': 22880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:31.635172', 'step': 22880, 'epoch': 3}
{'type': 'loss', 'content': 0.05324072763323784, 'timestamp': '2025-10-02 00:51:31.637561', 'step': 22881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:31.691671', 'step': 22881, 'epoch': 3}
{'type': 'loss', 'content': 0.07916649430990219, 'timestamp': '2025-10-02 00:51:31.695702', 'step': 22882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:31.750672', 'step': 22882, 'epoch': 3}
{'type': 'loss', 'content': 0.03649887070059776, 'timestamp': '2025-10-02 00:51:31.753167', 'step': 22883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:31.807585', 'step': 22883, 'epoch': 3}
{'type': 'loss', 'content': 0.09472895413637161, 'timestamp': '2025-10-02 00:51:31.814157', 'step': 22884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:31.868098', 'step': 22884, 'epoch': 3}
{'type': 'loss', 'content': 0.033942483365535736, 'timestamp': '2025-10-02 00:51:31.872024', 'step': 22885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:31.929376', 'step': 22885, 'epoch': 3}
{'type': 'loss', 'content': 0.04015206918120384, 'timestamp': '2025-10-02 00:51:31.931977', 'step': 22886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:51:31.991603', 'step': 22886, 'epoch': 3}
{'type': 'loss', 'content': 0.056287068873643875, 'timestamp': '2025-10-02 00:51:31.994480', 'step': 22887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:32.048359', 'step': 22887, 'epoch': 3}
{'type': 'loss', 'content': 0.04141012951731682, 'timestamp': '2025-10-02 00:51:32.054354', 'step': 22888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:51:32.114520', 'step': 22888, 'epoch': 3}
{'type': 'loss', 'content': 0.06061442568898201, 'timestamp': '2025-10-02 00:51:32.126034', 'step': 22889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:32.180316', 'step': 22889, 'epoch': 3}
{'type': 'loss', 'content': 0.11869659274816513, 'timestamp': '2025-10-02 00:51:32.182694', 'step': 22890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:32.236412', 'step': 22890, 'epoch': 3}
{'type': 'loss', 'content': 0.11439196765422821, 'timestamp': '2025-10-02 00:51:32.238918', 'step': 22891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:32.294236', 'step': 22891, 'epoch': 3}
{'type': 'loss', 'content': 0.03772903233766556, 'timestamp': '2025-10-02 00:51:32.304579', 'step': 22892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:32.358851', 'step': 22892, 'epoch': 3}
{'type': 'loss', 'content': 0.037580542266368866, 'timestamp': '2025-10-02 00:51:32.361238', 'step': 22893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:51:32.423327', 'step': 22893, 'epoch': 3}
{'type': 'loss', 'content': 0.025672050192952156, 'timestamp': '2025-10-02 00:51:32.433926', 'step': 22894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:32.490255', 'step': 22894, 'epoch': 3}
{'type': 'loss', 'content': 0.09070776402950287, 'timestamp': '2025-10-02 00:51:32.493353', 'step': 22895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:32.548014', 'step': 22895, 'epoch': 3}
{'type': 'loss', 'content': 0.018560701981186867, 'timestamp': '2025-10-02 00:51:32.554786', 'step': 22896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:32.608732', 'step': 22896, 'epoch': 3}
{'type': 'loss', 'content': 0.14434702694416046, 'timestamp': '2025-10-02 00:51:32.611337', 'step': 22897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:32.665890', 'step': 22897, 'epoch': 3}
{'type': 'loss', 'content': 0.03970007598400116, 'timestamp': '2025-10-02 00:51:32.668348', 'step': 22898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:32.724319', 'step': 22898, 'epoch': 3}
{'type': 'loss', 'content': 0.014072186313569546, 'timestamp': '2025-10-02 00:51:32.733622', 'step': 22899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:32.789868', 'step': 22899, 'epoch': 3}
{'type': 'loss', 'content': 0.16128940880298615, 'timestamp': '2025-10-02 00:51:32.795930', 'step': 22900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:32.850663', 'step': 22900, 'epoch': 3}
{'type': 'loss', 'content': 0.039262089878320694, 'timestamp': '2025-10-02 00:51:32.853221', 'step': 22901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:32.908784', 'step': 22901, 'epoch': 3}
{'type': 'loss', 'content': 0.012894021347165108, 'timestamp': '2025-10-02 00:51:32.911367', 'step': 22902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:32.966132', 'step': 22902, 'epoch': 3}
{'type': 'loss', 'content': 0.03324269503355026, 'timestamp': '2025-10-02 00:51:32.969005', 'step': 22903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:33.024870', 'step': 22903, 'epoch': 3}
{'type': 'loss', 'content': 0.03579357638955116, 'timestamp': '2025-10-02 00:51:33.030973', 'step': 22904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:33.085684', 'step': 22904, 'epoch': 3}
{'type': 'loss', 'content': 0.04824928566813469, 'timestamp': '2025-10-02 00:51:33.095855', 'step': 22905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:33.150311', 'step': 22905, 'epoch': 3}
{'type': 'loss', 'content': 0.06513792276382446, 'timestamp': '2025-10-02 00:51:33.152917', 'step': 22906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:33.211686', 'step': 22906, 'epoch': 3}
{'type': 'loss', 'content': 0.012331617064774036, 'timestamp': '2025-10-02 00:51:33.221820', 'step': 22907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:33.276692', 'step': 22907, 'epoch': 3}
{'type': 'loss', 'content': 0.030525177717208862, 'timestamp': '2025-10-02 00:51:33.282741', 'step': 22908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:33.337512', 'step': 22908, 'epoch': 3}
{'type': 'loss', 'content': 0.06312532722949982, 'timestamp': '2025-10-02 00:51:33.340291', 'step': 22909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:33.395042', 'step': 22909, 'epoch': 3}
{'type': 'loss', 'content': 0.04925210401415825, 'timestamp': '2025-10-02 00:51:33.398351', 'step': 22910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:33.454158', 'step': 22910, 'epoch': 3}
{'type': 'loss', 'content': 0.019184567034244537, 'timestamp': '2025-10-02 00:51:33.461381', 'step': 22911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:33.517019', 'step': 22911, 'epoch': 3}
{'type': 'loss', 'content': 0.07737389951944351, 'timestamp': '2025-10-02 00:51:33.524315', 'step': 22912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:33.579014', 'step': 22912, 'epoch': 3}
{'type': 'loss', 'content': 0.04123842716217041, 'timestamp': '2025-10-02 00:51:33.581500', 'step': 22913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:51:33.651850', 'step': 22913, 'epoch': 3}
{'type': 'loss', 'content': 0.01588038168847561, 'timestamp': '2025-10-02 00:51:33.664466', 'step': 22914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:33.719485', 'step': 22914, 'epoch': 3}
{'type': 'loss', 'content': 0.04961873218417168, 'timestamp': '2025-10-02 00:51:33.722121', 'step': 22915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:33.777277', 'step': 22915, 'epoch': 3}
{'type': 'loss', 'content': 0.015220807865262032, 'timestamp': '2025-10-02 00:51:33.783257', 'step': 22916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:33.837159', 'step': 22916, 'epoch': 3}
{'type': 'loss', 'content': 0.027094051241874695, 'timestamp': '2025-10-02 00:51:33.839337', 'step': 22917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:33.893302', 'step': 22917, 'epoch': 3}
{'type': 'loss', 'content': 0.04670460894703865, 'timestamp': '2025-10-02 00:51:33.895802', 'step': 22918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:33.949993', 'step': 22918, 'epoch': 3}
{'type': 'loss', 'content': 0.043473225086927414, 'timestamp': '2025-10-02 00:51:33.952388', 'step': 22919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:34.006351', 'step': 22919, 'epoch': 3}
{'type': 'loss', 'content': 0.05720727890729904, 'timestamp': '2025-10-02 00:51:34.012342', 'step': 22920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:34.066306', 'step': 22920, 'epoch': 3}
{'type': 'loss', 'content': 0.021973712369799614, 'timestamp': '2025-10-02 00:51:34.073515', 'step': 22921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:34.127705', 'step': 22921, 'epoch': 3}
{'type': 'loss', 'content': 0.054668162018060684, 'timestamp': '2025-10-02 00:51:34.133428', 'step': 22922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:34.189048', 'step': 22922, 'epoch': 3}
{'type': 'loss', 'content': 0.09563075751066208, 'timestamp': '2025-10-02 00:51:34.191514', 'step': 22923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:51:34.246525', 'step': 22923, 'epoch': 3}
{'type': 'loss', 'content': 0.035164158791303635, 'timestamp': '2025-10-02 00:51:34.256825', 'step': 22924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:34.310491', 'step': 22924, 'epoch': 3}
{'type': 'loss', 'content': 0.06649912148714066, 'timestamp': '2025-10-02 00:51:34.313086', 'step': 22925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:34.367988', 'step': 22925, 'epoch': 3}
{'type': 'loss', 'content': 0.00039918761467561126, 'timestamp': '2025-10-02 00:51:34.370464', 'step': 22926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:34.425528', 'step': 22926, 'epoch': 3}
{'type': 'loss', 'content': 0.07009895145893097, 'timestamp': '2025-10-02 00:51:34.428007', 'step': 22927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:34.482834', 'step': 22927, 'epoch': 3}
{'type': 'loss', 'content': 0.02362816594541073, 'timestamp': '2025-10-02 00:51:34.488867', 'step': 22928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:34.542796', 'step': 22928, 'epoch': 3}
{'type': 'loss', 'content': 0.0769931972026825, 'timestamp': '2025-10-02 00:51:34.545717', 'step': 22929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:51:34.599844', 'step': 22929, 'epoch': 3}
{'type': 'loss', 'content': 0.035642217844724655, 'timestamp': '2025-10-02 00:51:34.602256', 'step': 22930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:34.656896', 'step': 22930, 'epoch': 3}
{'type': 'loss', 'content': 0.04710325226187706, 'timestamp': '2025-10-02 00:51:34.659701', 'step': 22931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:34.714321', 'step': 22931, 'epoch': 3}
{'type': 'loss', 'content': 0.02324433997273445, 'timestamp': '2025-10-02 00:51:34.722503', 'step': 22932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:34.776433', 'step': 22932, 'epoch': 3}
{'type': 'loss', 'content': 0.014139682054519653, 'timestamp': '2025-10-02 00:51:34.778881', 'step': 22933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:34.833381', 'step': 22933, 'epoch': 3}
{'type': 'loss', 'content': 0.08336665481328964, 'timestamp': '2025-10-02 00:51:34.835987', 'step': 22934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:34.890557', 'step': 22934, 'epoch': 3}
{'type': 'loss', 'content': 0.027922524139285088, 'timestamp': '2025-10-02 00:51:34.897777', 'step': 22935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:51:34.959266', 'step': 22935, 'epoch': 3}
{'type': 'loss', 'content': 0.013333340175449848, 'timestamp': '2025-10-02 00:51:34.970502', 'step': 22936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:35.025030', 'step': 22936, 'epoch': 3}
{'type': 'loss', 'content': 0.04673748463392258, 'timestamp': '2025-10-02 00:51:35.027888', 'step': 22937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:35.083323', 'step': 22937, 'epoch': 3}
{'type': 'loss', 'content': 0.05556810274720192, 'timestamp': '2025-10-02 00:51:35.085720', 'step': 22938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:35.140825', 'step': 22938, 'epoch': 3}
{'type': 'loss', 'content': 0.0689493790268898, 'timestamp': '2025-10-02 00:51:35.146441', 'step': 22939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:35.203785', 'step': 22939, 'epoch': 3}
{'type': 'loss', 'content': 0.07000274211168289, 'timestamp': '2025-10-02 00:51:35.211785', 'step': 22940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:51:35.265907', 'step': 22940, 'epoch': 3}
{'type': 'loss', 'content': 0.060706861317157745, 'timestamp': '2025-10-02 00:51:35.278489', 'step': 22941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:35.334140', 'step': 22941, 'epoch': 3}
{'type': 'loss', 'content': 0.051067985594272614, 'timestamp': '2025-10-02 00:51:35.339724', 'step': 22942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:35.394683', 'step': 22942, 'epoch': 3}
{'type': 'loss', 'content': 0.042762674391269684, 'timestamp': '2025-10-02 00:51:35.401789', 'step': 22943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:35.456226', 'step': 22943, 'epoch': 3}
{'type': 'loss', 'content': 0.055554553866386414, 'timestamp': '2025-10-02 00:51:35.462639', 'step': 22944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:51:35.516142', 'step': 22944, 'epoch': 3}
{'type': 'loss', 'content': 0.13670597970485687, 'timestamp': '2025-10-02 00:51:35.518692', 'step': 22945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:35.573838', 'step': 22945, 'epoch': 3}
{'type': 'loss', 'content': 0.030080005526542664, 'timestamp': '2025-10-02 00:51:35.576470', 'step': 22946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:35.630840', 'step': 22946, 'epoch': 3}
{'type': 'loss', 'content': 0.02286967821419239, 'timestamp': '2025-10-02 00:51:35.633594', 'step': 22947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:51:35.692962', 'step': 22947, 'epoch': 3}
{'type': 'loss', 'content': 0.015995897352695465, 'timestamp': '2025-10-02 00:51:35.703849', 'step': 22948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:35.757635', 'step': 22948, 'epoch': 3}
{'type': 'loss', 'content': 0.08921301364898682, 'timestamp': '2025-10-02 00:51:35.761924', 'step': 22949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:35.816310', 'step': 22949, 'epoch': 3}
{'type': 'loss', 'content': 0.05665228143334389, 'timestamp': '2025-10-02 00:51:35.821934', 'step': 22950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:51:35.876871', 'step': 22950, 'epoch': 3}
{'type': 'loss', 'content': 0.11869949847459793, 'timestamp': '2025-10-02 00:51:35.879132', 'step': 22951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:35.933641', 'step': 22951, 'epoch': 3}
{'type': 'loss', 'content': 0.014459307305514812, 'timestamp': '2025-10-02 00:51:35.939784', 'step': 22952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:35.994181', 'step': 22952, 'epoch': 3}
{'type': 'loss', 'content': 0.017130723223090172, 'timestamp': '2025-10-02 00:51:35.996728', 'step': 22953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:51:36.050522', 'step': 22953, 'epoch': 3}
{'type': 'loss', 'content': 0.03118985705077648, 'timestamp': '2025-10-02 00:51:36.053523', 'step': 22954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:36.108243', 'step': 22954, 'epoch': 3}
{'type': 'loss', 'content': 0.05813426896929741, 'timestamp': '2025-10-02 00:51:36.110638', 'step': 22955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:36.165753', 'step': 22955, 'epoch': 3}
{'type': 'loss', 'content': 0.02508286014199257, 'timestamp': '2025-10-02 00:51:36.171772', 'step': 22956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:36.225324', 'step': 22956, 'epoch': 3}
{'type': 'loss', 'content': 0.06841857731342316, 'timestamp': '2025-10-02 00:51:36.227568', 'step': 22957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:51:36.281801', 'step': 22957, 'epoch': 3}
{'type': 'loss', 'content': 0.03637790307402611, 'timestamp': '2025-10-02 00:51:36.288966', 'step': 22958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:36.344170', 'step': 22958, 'epoch': 3}
{'type': 'loss', 'content': 0.0808434784412384, 'timestamp': '2025-10-02 00:51:36.346607', 'step': 22959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:51:36.401143', 'step': 22959, 'epoch': 3}
{'type': 'loss', 'content': 0.05315027013421059, 'timestamp': '2025-10-02 00:51:36.408545', 'step': 22960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:51:36.463673', 'step': 22960, 'epoch': 3}
{'type': 'loss', 'content': 0.05537896603345871, 'timestamp': '2025-10-02 00:51:36.469406', 'step': 22961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:51:36.526158', 'step': 22961, 'epoch': 3}
{'type': 'loss', 'content': 0.04433140158653259, 'timestamp': '2025-10-02 00:51:36.535026', 'step': 22962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:36.591373', 'step': 22962, 'epoch': 3}
{'type': 'loss', 'content': 0.03953240439295769, 'timestamp': '2025-10-02 00:51:36.594273', 'step': 22963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:51:36.651048', 'step': 22963, 'epoch': 3}
{'type': 'loss', 'content': 0.036714840680360794, 'timestamp': '2025-10-02 00:51:36.657873', 'step': 22964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:51:36.716327', 'step': 22964, 'epoch': 3}
{'type': 'loss', 'content': 0.014648455195128918, 'timestamp': '2025-10-02 00:51:36.719056', 'step': 22965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:51:36.777642', 'step': 22965, 'epoch': 3}
{'type': 'loss', 'content': 0.1909259855747223, 'timestamp': '2025-10-02 00:51:36.780603', 'step': 22966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:51:36.837308', 'step': 22966, 'epoch': 3}
{'type': 'loss', 'content': 0.014860565774142742, 'timestamp': '2025-10-02 00:51:36.840556', 'step': 22967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:51:36.896300', 'step': 22967, 'epoch': 3}
{'type': 'loss', 'content': 0.05597913637757301, 'timestamp': '2025-10-02 00:51:36.903494', 'step': 22968, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:52:09.753836', 'step': 22968, 'epoch': 3}
{'type': 'pplx', 'content': 110.77211873359994, 'timestamp': '2025-10-02 00:52:09.768529', 'step': 22968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:09.841265', 'step': 22968, 'epoch': 3}
{'type': 'loss', 'content': 0.07019833475351334, 'timestamp': '2025-10-02 00:52:09.846576', 'step': 22969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:09.917042', 'step': 22969, 'epoch': 3}
{'type': 'loss', 'content': 0.029004400596022606, 'timestamp': '2025-10-02 00:52:09.927441', 'step': 22970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:10.019939', 'step': 22970, 'epoch': 3}
{'type': 'loss', 'content': 0.13514994084835052, 'timestamp': '2025-10-02 00:52:10.022600', 'step': 22971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:52:10.104377', 'step': 22971, 'epoch': 3}
{'type': 'loss', 'content': 0.0017613930394873023, 'timestamp': '2025-10-02 00:52:10.118378', 'step': 22972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:10.183947', 'step': 22972, 'epoch': 3}
{'type': 'loss', 'content': 0.08973988890647888, 'timestamp': '2025-10-02 00:52:10.187773', 'step': 22973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:10.255566', 'step': 22973, 'epoch': 3}
{'type': 'loss', 'content': 0.08893986791372299, 'timestamp': '2025-10-02 00:52:10.269281', 'step': 22974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:10.325831', 'step': 22974, 'epoch': 3}
{'type': 'loss', 'content': 0.05462583526968956, 'timestamp': '2025-10-02 00:52:10.328586', 'step': 22975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:10.405323', 'step': 22975, 'epoch': 3}
{'type': 'loss', 'content': 0.05491937696933746, 'timestamp': '2025-10-02 00:52:10.423201', 'step': 22976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:10.509667', 'step': 22976, 'epoch': 3}
{'type': 'loss', 'content': 0.08456156402826309, 'timestamp': '2025-10-02 00:52:10.520225', 'step': 22977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:10.605631', 'step': 22977, 'epoch': 3}
{'type': 'loss', 'content': 0.06022096797823906, 'timestamp': '2025-10-02 00:52:10.609316', 'step': 22978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:10.697181', 'step': 22978, 'epoch': 3}
{'type': 'loss', 'content': 0.053368836641311646, 'timestamp': '2025-10-02 00:52:10.706641', 'step': 22979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:10.790422', 'step': 22979, 'epoch': 3}
{'type': 'loss', 'content': 0.0476900152862072, 'timestamp': '2025-10-02 00:52:10.797285', 'step': 22980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:10.876921', 'step': 22980, 'epoch': 3}
{'type': 'loss', 'content': 0.03957340493798256, 'timestamp': '2025-10-02 00:52:10.884275', 'step': 22981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:10.968412', 'step': 22981, 'epoch': 3}
{'type': 'loss', 'content': 0.0639357939362526, 'timestamp': '2025-10-02 00:52:10.971682', 'step': 22982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:52:11.069052', 'step': 22982, 'epoch': 3}
{'type': 'loss', 'content': 0.027003711089491844, 'timestamp': '2025-10-02 00:52:11.081038', 'step': 22983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:11.161093', 'step': 22983, 'epoch': 3}
{'type': 'loss', 'content': 0.044053785502910614, 'timestamp': '2025-10-02 00:52:11.173456', 'step': 22984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:11.250187', 'step': 22984, 'epoch': 3}
{'type': 'loss', 'content': 0.05662116780877113, 'timestamp': '2025-10-02 00:52:11.253327', 'step': 22985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:11.334340', 'step': 22985, 'epoch': 3}
{'type': 'loss', 'content': 0.039075132459402084, 'timestamp': '2025-10-02 00:52:11.344287', 'step': 22986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:11.423438', 'step': 22986, 'epoch': 3}
{'type': 'loss', 'content': 0.05458814650774002, 'timestamp': '2025-10-02 00:52:11.426961', 'step': 22987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:11.502536', 'step': 22987, 'epoch': 3}
{'type': 'loss', 'content': 0.026382718235254288, 'timestamp': '2025-10-02 00:52:11.516253', 'step': 22988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:11.574600', 'step': 22988, 'epoch': 3}
{'type': 'loss', 'content': 0.013489347882568836, 'timestamp': '2025-10-02 00:52:11.581965', 'step': 22989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:11.664588', 'step': 22989, 'epoch': 3}
{'type': 'loss', 'content': 0.03289567306637764, 'timestamp': '2025-10-02 00:52:11.668858', 'step': 22990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:11.751533', 'step': 22990, 'epoch': 3}
{'type': 'loss', 'content': 0.027803219854831696, 'timestamp': '2025-10-02 00:52:11.763381', 'step': 22991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:11.837717', 'step': 22991, 'epoch': 3}
{'type': 'loss', 'content': 0.011015553958714008, 'timestamp': '2025-10-02 00:52:11.854201', 'step': 22992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:11.935695', 'step': 22992, 'epoch': 3}
{'type': 'loss', 'content': 0.044877875596284866, 'timestamp': '2025-10-02 00:52:11.947358', 'step': 22993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:12.031826', 'step': 22993, 'epoch': 3}
{'type': 'loss', 'content': 0.009760348126292229, 'timestamp': '2025-10-02 00:52:12.042740', 'step': 22994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:12.124859', 'step': 22994, 'epoch': 3}
{'type': 'loss', 'content': 0.1352114975452423, 'timestamp': '2025-10-02 00:52:12.135635', 'step': 22995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:12.218827', 'step': 22995, 'epoch': 3}
{'type': 'loss', 'content': 0.07306186854839325, 'timestamp': '2025-10-02 00:52:12.233719', 'step': 22996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:12.298960', 'step': 22996, 'epoch': 3}
{'type': 'loss', 'content': 0.050903744995594025, 'timestamp': '2025-10-02 00:52:12.308121', 'step': 22997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:52:12.382297', 'step': 22997, 'epoch': 3}
{'type': 'loss', 'content': 0.03984220698475838, 'timestamp': '2025-10-02 00:52:12.393140', 'step': 22998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:12.462191', 'step': 22998, 'epoch': 3}
{'type': 'loss', 'content': 0.11150404810905457, 'timestamp': '2025-10-02 00:52:12.473030', 'step': 22999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:12.562308', 'step': 22999, 'epoch': 3}
{'type': 'loss', 'content': 0.017213840037584305, 'timestamp': '2025-10-02 00:52:12.576554', 'step': 23000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 23000', 'timestamp': '2025-10-02 00:52:13.023778', 'step': 23000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:13.096727', 'step': 23000, 'epoch': 3}
{'type': 'loss', 'content': 0.03374354913830757, 'timestamp': '2025-10-02 00:52:13.105977', 'step': 23001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:13.179763', 'step': 23001, 'epoch': 3}
{'type': 'loss', 'content': 0.020806286484003067, 'timestamp': '2025-10-02 00:52:13.188383', 'step': 23002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:13.271135', 'step': 23002, 'epoch': 3}
{'type': 'loss', 'content': 0.03841088339686394, 'timestamp': '2025-10-02 00:52:13.281579', 'step': 23003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:13.359766', 'step': 23003, 'epoch': 3}
{'type': 'loss', 'content': 0.08232702314853668, 'timestamp': '2025-10-02 00:52:13.375596', 'step': 23004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:13.449144', 'step': 23004, 'epoch': 3}
{'type': 'loss', 'content': 0.04296529293060303, 'timestamp': '2025-10-02 00:52:13.459899', 'step': 23005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:13.524981', 'step': 23005, 'epoch': 3}
{'type': 'loss', 'content': 0.02720213681459427, 'timestamp': '2025-10-02 00:52:13.528364', 'step': 23006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:13.587397', 'step': 23006, 'epoch': 3}
{'type': 'loss', 'content': 0.05385234206914902, 'timestamp': '2025-10-02 00:52:13.597260', 'step': 23007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:13.675849', 'step': 23007, 'epoch': 3}
{'type': 'loss', 'content': 0.009322277270257473, 'timestamp': '2025-10-02 00:52:13.687179', 'step': 23008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:13.761106', 'step': 23008, 'epoch': 3}
{'type': 'loss', 'content': 0.11303148418664932, 'timestamp': '2025-10-02 00:52:13.771097', 'step': 23009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:13.829731', 'step': 23009, 'epoch': 3}
{'type': 'loss', 'content': 0.07332798838615417, 'timestamp': '2025-10-02 00:52:13.842381', 'step': 23010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:13.911273', 'step': 23010, 'epoch': 3}
{'type': 'loss', 'content': 0.10911168903112411, 'timestamp': '2025-10-02 00:52:13.920114', 'step': 23011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:13.995789', 'step': 23011, 'epoch': 3}
{'type': 'loss', 'content': 0.09798616170883179, 'timestamp': '2025-10-02 00:52:14.002627', 'step': 23012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:14.066283', 'step': 23012, 'epoch': 3}
{'type': 'loss', 'content': 0.11718621104955673, 'timestamp': '2025-10-02 00:52:14.075312', 'step': 23013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:14.159685', 'step': 23013, 'epoch': 3}
{'type': 'loss', 'content': 0.09454914182424545, 'timestamp': '2025-10-02 00:52:14.162641', 'step': 23014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:14.220555', 'step': 23014, 'epoch': 3}
{'type': 'loss', 'content': 0.08311943709850311, 'timestamp': '2025-10-02 00:52:14.225163', 'step': 23015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:52:14.297629', 'step': 23015, 'epoch': 3}
{'type': 'loss', 'content': 0.008507017977535725, 'timestamp': '2025-10-02 00:52:14.310303', 'step': 23016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:14.387763', 'step': 23016, 'epoch': 3}
{'type': 'loss', 'content': 0.05969780683517456, 'timestamp': '2025-10-02 00:52:14.390942', 'step': 23017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:14.474784', 'step': 23017, 'epoch': 3}
{'type': 'loss', 'content': 0.07090643793344498, 'timestamp': '2025-10-02 00:52:14.479022', 'step': 23018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:14.548270', 'step': 23018, 'epoch': 3}
{'type': 'loss', 'content': 0.09824361652135849, 'timestamp': '2025-10-02 00:52:14.558806', 'step': 23019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:14.624653', 'step': 23019, 'epoch': 3}
{'type': 'loss', 'content': 0.0515783317387104, 'timestamp': '2025-10-02 00:52:14.638241', 'step': 23020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:14.708355', 'step': 23020, 'epoch': 3}
{'type': 'loss', 'content': 0.015715686604380608, 'timestamp': '2025-10-02 00:52:14.711994', 'step': 23021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:14.768416', 'step': 23021, 'epoch': 3}
{'type': 'loss', 'content': 0.05230646952986717, 'timestamp': '2025-10-02 00:52:14.772038', 'step': 23022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:14.853082', 'step': 23022, 'epoch': 3}
{'type': 'loss', 'content': 0.019832909107208252, 'timestamp': '2025-10-02 00:52:14.864397', 'step': 23023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:14.934383', 'step': 23023, 'epoch': 3}
{'type': 'loss', 'content': 0.05039644613862038, 'timestamp': '2025-10-02 00:52:14.949513', 'step': 23024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:15.025914', 'step': 23024, 'epoch': 3}
{'type': 'loss', 'content': 0.021714933216571808, 'timestamp': '2025-10-02 00:52:15.037810', 'step': 23025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:15.128174', 'step': 23025, 'epoch': 3}
{'type': 'loss', 'content': 0.022099962458014488, 'timestamp': '2025-10-02 00:52:15.139423', 'step': 23026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:15.227285', 'step': 23026, 'epoch': 3}
{'type': 'loss', 'content': 0.06844999641180038, 'timestamp': '2025-10-02 00:52:15.238186', 'step': 23027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:15.304521', 'step': 23027, 'epoch': 3}
{'type': 'loss', 'content': 0.14368735253810883, 'timestamp': '2025-10-02 00:52:15.318379', 'step': 23028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:52:15.407361', 'step': 23028, 'epoch': 3}
{'type': 'loss', 'content': 0.004447625949978828, 'timestamp': '2025-10-02 00:52:15.419086', 'step': 23029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:15.485630', 'step': 23029, 'epoch': 3}
{'type': 'loss', 'content': 0.021914059296250343, 'timestamp': '2025-10-02 00:52:15.498201', 'step': 23030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:15.588731', 'step': 23030, 'epoch': 3}
{'type': 'loss', 'content': 0.08623791486024857, 'timestamp': '2025-10-02 00:52:15.592323', 'step': 23031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:15.670575', 'step': 23031, 'epoch': 3}
{'type': 'loss', 'content': 0.009487167000770569, 'timestamp': '2025-10-02 00:52:15.680677', 'step': 23032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:15.745750', 'step': 23032, 'epoch': 3}
{'type': 'loss', 'content': 0.11974307894706726, 'timestamp': '2025-10-02 00:52:15.749057', 'step': 23033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:15.809207', 'step': 23033, 'epoch': 3}
{'type': 'loss', 'content': 0.08638103306293488, 'timestamp': '2025-10-02 00:52:15.823612', 'step': 23034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:15.921199', 'step': 23034, 'epoch': 3}
{'type': 'loss', 'content': 0.09244150668382645, 'timestamp': '2025-10-02 00:52:15.940881', 'step': 23035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:16.051953', 'step': 23035, 'epoch': 3}
{'type': 'loss', 'content': 0.038880057632923126, 'timestamp': '2025-10-02 00:52:16.060379', 'step': 23036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:16.146840', 'step': 23036, 'epoch': 3}
{'type': 'loss', 'content': 0.0047873579896986485, 'timestamp': '2025-10-02 00:52:16.157551', 'step': 23037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:16.238994', 'step': 23037, 'epoch': 3}
{'type': 'loss', 'content': 0.05057503655552864, 'timestamp': '2025-10-02 00:52:16.250922', 'step': 23038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:16.314690', 'step': 23038, 'epoch': 3}
{'type': 'loss', 'content': 0.09408204257488251, 'timestamp': '2025-10-02 00:52:16.317460', 'step': 23039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:16.374618', 'step': 23039, 'epoch': 3}
{'type': 'loss', 'content': 0.08053254336118698, 'timestamp': '2025-10-02 00:52:16.388992', 'step': 23040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:16.460990', 'step': 23040, 'epoch': 3}
{'type': 'loss', 'content': 0.0241882111877203, 'timestamp': '2025-10-02 00:52:16.465908', 'step': 23041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:16.529576', 'step': 23041, 'epoch': 3}
{'type': 'loss', 'content': 0.056795574724674225, 'timestamp': '2025-10-02 00:52:16.539349', 'step': 23042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:16.627853', 'step': 23042, 'epoch': 3}
{'type': 'loss', 'content': 0.06785158812999725, 'timestamp': '2025-10-02 00:52:16.645703', 'step': 23043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:16.749903', 'step': 23043, 'epoch': 3}
{'type': 'loss', 'content': 0.02058752067387104, 'timestamp': '2025-10-02 00:52:16.762543', 'step': 23044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:16.827498', 'step': 23044, 'epoch': 3}
{'type': 'loss', 'content': 0.1751810610294342, 'timestamp': '2025-10-02 00:52:16.831132', 'step': 23045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:16.892887', 'step': 23045, 'epoch': 3}
{'type': 'loss', 'content': 0.04465354606509209, 'timestamp': '2025-10-02 00:52:16.895508', 'step': 23046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:16.963022', 'step': 23046, 'epoch': 3}
{'type': 'loss', 'content': 0.14107318222522736, 'timestamp': '2025-10-02 00:52:16.970624', 'step': 23047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:17.041100', 'step': 23047, 'epoch': 3}
{'type': 'loss', 'content': 0.06030884012579918, 'timestamp': '2025-10-02 00:52:17.048453', 'step': 23048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:52:17.157170', 'step': 23048, 'epoch': 3}
{'type': 'loss', 'content': 0.014826212078332901, 'timestamp': '2025-10-02 00:52:17.170491', 'step': 23049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:17.248515', 'step': 23049, 'epoch': 3}
{'type': 'loss', 'content': 0.02510501816868782, 'timestamp': '2025-10-02 00:52:17.257782', 'step': 23050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:17.336684', 'step': 23050, 'epoch': 3}
{'type': 'loss', 'content': 0.1454092115163803, 'timestamp': '2025-10-02 00:52:17.347385', 'step': 23051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:17.426315', 'step': 23051, 'epoch': 3}
{'type': 'loss', 'content': 0.16908565163612366, 'timestamp': '2025-10-02 00:52:17.433626', 'step': 23052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:17.510815', 'step': 23052, 'epoch': 3}
{'type': 'loss', 'content': 0.03390035405755043, 'timestamp': '2025-10-02 00:52:17.522420', 'step': 23053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:52:17.613941', 'step': 23053, 'epoch': 3}
{'type': 'loss', 'content': 0.017610857263207436, 'timestamp': '2025-10-02 00:52:17.626531', 'step': 23054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:17.688111', 'step': 23054, 'epoch': 3}
{'type': 'loss', 'content': 0.05749291554093361, 'timestamp': '2025-10-02 00:52:17.698212', 'step': 23055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:17.781107', 'step': 23055, 'epoch': 3}
{'type': 'loss', 'content': 0.03840065747499466, 'timestamp': '2025-10-02 00:52:17.789076', 'step': 23056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:17.853623', 'step': 23056, 'epoch': 3}
{'type': 'loss', 'content': 0.07677963376045227, 'timestamp': '2025-10-02 00:52:17.866834', 'step': 23057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:17.950534', 'step': 23057, 'epoch': 3}
{'type': 'loss', 'content': 0.05519365146756172, 'timestamp': '2025-10-02 00:52:17.961176', 'step': 23058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:18.035842', 'step': 23058, 'epoch': 3}
{'type': 'loss', 'content': 0.12186749279499054, 'timestamp': '2025-10-02 00:52:18.047125', 'step': 23059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:18.106048', 'step': 23059, 'epoch': 3}
{'type': 'loss', 'content': 0.058634281158447266, 'timestamp': '2025-10-02 00:52:18.112955', 'step': 23060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:18.177440', 'step': 23060, 'epoch': 3}
{'type': 'loss', 'content': 0.0013057359028607607, 'timestamp': '2025-10-02 00:52:18.183671', 'step': 23061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:18.251265', 'step': 23061, 'epoch': 3}
{'type': 'loss', 'content': 0.06829211860895157, 'timestamp': '2025-10-02 00:52:18.255597', 'step': 23062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:18.315494', 'step': 23062, 'epoch': 3}
{'type': 'loss', 'content': 0.09718358516693115, 'timestamp': '2025-10-02 00:52:18.326345', 'step': 23063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:18.411259', 'step': 23063, 'epoch': 3}
{'type': 'loss', 'content': 0.081975057721138, 'timestamp': '2025-10-02 00:52:18.421547', 'step': 23064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:18.487820', 'step': 23064, 'epoch': 3}
{'type': 'loss', 'content': 0.042523834854364395, 'timestamp': '2025-10-02 00:52:18.494560', 'step': 23065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:18.587302', 'step': 23065, 'epoch': 3}
{'type': 'loss', 'content': 0.04443281888961792, 'timestamp': '2025-10-02 00:52:18.602206', 'step': 23066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:18.664278', 'step': 23066, 'epoch': 3}
{'type': 'loss', 'content': 0.06258496642112732, 'timestamp': '2025-10-02 00:52:18.679348', 'step': 23067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:18.749376', 'step': 23067, 'epoch': 3}
{'type': 'loss', 'content': 0.09509436041116714, 'timestamp': '2025-10-02 00:52:18.757152', 'step': 23068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:18.851274', 'step': 23068, 'epoch': 3}
{'type': 'loss', 'content': 0.05441749840974808, 'timestamp': '2025-10-02 00:52:18.861460', 'step': 23069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:18.953766', 'step': 23069, 'epoch': 3}
{'type': 'loss', 'content': 0.08339071273803711, 'timestamp': '2025-10-02 00:52:18.956833', 'step': 23070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:19.040360', 'step': 23070, 'epoch': 3}
{'type': 'loss', 'content': 0.10672695934772491, 'timestamp': '2025-10-02 00:52:19.052851', 'step': 23071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:19.134015', 'step': 23071, 'epoch': 3}
{'type': 'loss', 'content': 0.038238540291786194, 'timestamp': '2025-10-02 00:52:19.150689', 'step': 23072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:19.238866', 'step': 23072, 'epoch': 3}
{'type': 'loss', 'content': 0.10211042314767838, 'timestamp': '2025-10-02 00:52:19.252353', 'step': 23073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:19.341233', 'step': 23073, 'epoch': 3}
{'type': 'loss', 'content': 0.11407140642404556, 'timestamp': '2025-10-02 00:52:19.344665', 'step': 23074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:19.436889', 'step': 23074, 'epoch': 3}
{'type': 'loss', 'content': 0.024631649255752563, 'timestamp': '2025-10-02 00:52:19.440153', 'step': 23075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:19.511726', 'step': 23075, 'epoch': 3}
{'type': 'loss', 'content': 0.04167935624718666, 'timestamp': '2025-10-02 00:52:19.520077', 'step': 23076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:19.603965', 'step': 23076, 'epoch': 3}
{'type': 'loss', 'content': 0.07006961107254028, 'timestamp': '2025-10-02 00:52:19.623042', 'step': 23077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:52:19.729061', 'step': 23077, 'epoch': 3}
{'type': 'loss', 'content': 0.0025797022972255945, 'timestamp': '2025-10-02 00:52:19.748507', 'step': 23078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:19.837210', 'step': 23078, 'epoch': 3}
{'type': 'loss', 'content': 0.01734105870127678, 'timestamp': '2025-10-02 00:52:19.853637', 'step': 23079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:19.920860', 'step': 23079, 'epoch': 3}
{'type': 'loss', 'content': 0.004134685266762972, 'timestamp': '2025-10-02 00:52:19.931813', 'step': 23080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:20.020951', 'step': 23080, 'epoch': 3}
{'type': 'loss', 'content': 0.12955132126808167, 'timestamp': '2025-10-02 00:52:20.038357', 'step': 23081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:20.137666', 'step': 23081, 'epoch': 3}
{'type': 'loss', 'content': 0.051250725984573364, 'timestamp': '2025-10-02 00:52:20.143556', 'step': 23082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:20.219155', 'step': 23082, 'epoch': 3}
{'type': 'loss', 'content': 0.0404835008084774, 'timestamp': '2025-10-02 00:52:20.234011', 'step': 23083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:20.344514', 'step': 23083, 'epoch': 3}
{'type': 'loss', 'content': 0.02813008613884449, 'timestamp': '2025-10-02 00:52:20.363483', 'step': 23084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:20.465888', 'step': 23084, 'epoch': 3}
{'type': 'loss', 'content': 0.004489049781113863, 'timestamp': '2025-10-02 00:52:20.471218', 'step': 23085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:20.554132', 'step': 23085, 'epoch': 3}
{'type': 'loss', 'content': 0.0035304087214171886, 'timestamp': '2025-10-02 00:52:20.570015', 'step': 23086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:20.642994', 'step': 23086, 'epoch': 3}
{'type': 'loss', 'content': 0.012671409174799919, 'timestamp': '2025-10-02 00:52:20.652212', 'step': 23087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:20.714151', 'step': 23087, 'epoch': 3}
{'type': 'loss', 'content': 0.04847788065671921, 'timestamp': '2025-10-02 00:52:20.721526', 'step': 23088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:20.789894', 'step': 23088, 'epoch': 3}
{'type': 'loss', 'content': 0.05000266805291176, 'timestamp': '2025-10-02 00:52:20.794362', 'step': 23089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:20.869046', 'step': 23089, 'epoch': 3}
{'type': 'loss', 'content': 0.12647393345832825, 'timestamp': '2025-10-02 00:52:20.872220', 'step': 23090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:20.958526', 'step': 23090, 'epoch': 3}
{'type': 'loss', 'content': 0.0046272738836705685, 'timestamp': '2025-10-02 00:52:20.973616', 'step': 23091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:21.084031', 'step': 23091, 'epoch': 3}
{'type': 'loss', 'content': 0.012456168420612812, 'timestamp': '2025-10-02 00:52:21.093971', 'step': 23092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:21.182997', 'step': 23092, 'epoch': 3}
{'type': 'loss', 'content': 0.023166442289948463, 'timestamp': '2025-10-02 00:52:21.190238', 'step': 23093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:21.253836', 'step': 23093, 'epoch': 3}
{'type': 'loss', 'content': 0.011423285119235516, 'timestamp': '2025-10-02 00:52:21.270776', 'step': 23094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:21.342331', 'step': 23094, 'epoch': 3}
{'type': 'loss', 'content': 0.023996392264962196, 'timestamp': '2025-10-02 00:52:21.349130', 'step': 23095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:21.436518', 'step': 23095, 'epoch': 3}
{'type': 'loss', 'content': 0.0801723301410675, 'timestamp': '2025-10-02 00:52:21.444897', 'step': 23096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:21.544232', 'step': 23096, 'epoch': 3}
{'type': 'loss', 'content': 0.07808279991149902, 'timestamp': '2025-10-02 00:52:21.559132', 'step': 23097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:21.641316', 'step': 23097, 'epoch': 3}
{'type': 'loss', 'content': 0.013813227415084839, 'timestamp': '2025-10-02 00:52:21.659411', 'step': 23098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:21.757786', 'step': 23098, 'epoch': 3}
{'type': 'loss', 'content': 0.08622624725103378, 'timestamp': '2025-10-02 00:52:21.761986', 'step': 23099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:21.821629', 'step': 23099, 'epoch': 3}
{'type': 'loss', 'content': 0.04385579749941826, 'timestamp': '2025-10-02 00:52:21.829803', 'step': 23100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:21.929261', 'step': 23100, 'epoch': 3}
{'type': 'loss', 'content': 0.03539654612541199, 'timestamp': '2025-10-02 00:52:21.946114', 'step': 23101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:22.019333', 'step': 23101, 'epoch': 3}
{'type': 'loss', 'content': 0.021662412211298943, 'timestamp': '2025-10-02 00:52:22.034935', 'step': 23102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:22.132026', 'step': 23102, 'epoch': 3}
{'type': 'loss', 'content': 0.02332845889031887, 'timestamp': '2025-10-02 00:52:22.147294', 'step': 23103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:22.235777', 'step': 23103, 'epoch': 3}
{'type': 'loss', 'content': 0.021276172250509262, 'timestamp': '2025-10-02 00:52:22.247167', 'step': 23104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:22.305639', 'step': 23104, 'epoch': 3}
{'type': 'loss', 'content': 0.07013928145170212, 'timestamp': '2025-10-02 00:52:22.309976', 'step': 23105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:22.370432', 'step': 23105, 'epoch': 3}
{'type': 'loss', 'content': 0.018070222809910774, 'timestamp': '2025-10-02 00:52:22.379975', 'step': 23106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:22.438644', 'step': 23106, 'epoch': 3}
{'type': 'loss', 'content': 0.06894171237945557, 'timestamp': '2025-10-02 00:52:22.454189', 'step': 23107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:22.557218', 'step': 23107, 'epoch': 3}
{'type': 'loss', 'content': 0.06803084164857864, 'timestamp': '2025-10-02 00:52:22.567360', 'step': 23108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:22.630792', 'step': 23108, 'epoch': 3}
{'type': 'loss', 'content': 0.038636714220047, 'timestamp': '2025-10-02 00:52:22.645966', 'step': 23109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:22.706741', 'step': 23109, 'epoch': 3}
{'type': 'loss', 'content': 0.033023733645677567, 'timestamp': '2025-10-02 00:52:22.713781', 'step': 23110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:22.790103', 'step': 23110, 'epoch': 3}
{'type': 'loss', 'content': 0.10965440422296524, 'timestamp': '2025-10-02 00:52:22.794325', 'step': 23111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:22.881533', 'step': 23111, 'epoch': 3}
{'type': 'loss', 'content': 0.012842997908592224, 'timestamp': '2025-10-02 00:52:22.901672', 'step': 23112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:22.971138', 'step': 23112, 'epoch': 3}
{'type': 'loss', 'content': 0.041852060705423355, 'timestamp': '2025-10-02 00:52:22.974861', 'step': 23113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:23.034912', 'step': 23113, 'epoch': 3}
{'type': 'loss', 'content': 0.10204316675662994, 'timestamp': '2025-10-02 00:52:23.044390', 'step': 23114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:23.137550', 'step': 23114, 'epoch': 3}
{'type': 'loss', 'content': 0.036937881261110306, 'timestamp': '2025-10-02 00:52:23.155510', 'step': 23115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:23.279191', 'step': 23115, 'epoch': 3}
{'type': 'loss', 'content': 0.09630324691534042, 'timestamp': '2025-10-02 00:52:23.287664', 'step': 23116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:23.363692', 'step': 23116, 'epoch': 3}
{'type': 'loss', 'content': 0.19697162508964539, 'timestamp': '2025-10-02 00:52:23.382043', 'step': 23117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:23.485848', 'step': 23117, 'epoch': 3}
{'type': 'loss', 'content': 0.06557205319404602, 'timestamp': '2025-10-02 00:52:23.491946', 'step': 23118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:23.556890', 'step': 23118, 'epoch': 3}
{'type': 'loss', 'content': 0.046650610864162445, 'timestamp': '2025-10-02 00:52:23.569771', 'step': 23119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:23.655900', 'step': 23119, 'epoch': 3}
{'type': 'loss', 'content': 0.02147386036813259, 'timestamp': '2025-10-02 00:52:23.673849', 'step': 23120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:23.766412', 'step': 23120, 'epoch': 3}
{'type': 'loss', 'content': 0.1647665649652481, 'timestamp': '2025-10-02 00:52:23.770590', 'step': 23121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:23.840646', 'step': 23121, 'epoch': 3}
{'type': 'loss', 'content': 0.08749253302812576, 'timestamp': '2025-10-02 00:52:23.856826', 'step': 23122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:23.950889', 'step': 23122, 'epoch': 3}
{'type': 'loss', 'content': 0.15108166635036469, 'timestamp': '2025-10-02 00:52:23.955435', 'step': 23123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:24.028425', 'step': 23123, 'epoch': 3}
{'type': 'loss', 'content': 0.0510091632604599, 'timestamp': '2025-10-02 00:52:24.048644', 'step': 23124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:24.119456', 'step': 23124, 'epoch': 3}
{'type': 'loss', 'content': 0.014926428906619549, 'timestamp': '2025-10-02 00:52:24.128561', 'step': 23125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:24.222652', 'step': 23125, 'epoch': 3}
{'type': 'loss', 'content': 0.04691820964217186, 'timestamp': '2025-10-02 00:52:24.232100', 'step': 23126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:24.329687', 'step': 23126, 'epoch': 3}
{'type': 'loss', 'content': 0.06947499513626099, 'timestamp': '2025-10-02 00:52:24.334293', 'step': 23127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:24.393835', 'step': 23127, 'epoch': 3}
{'type': 'loss', 'content': 0.1194145530462265, 'timestamp': '2025-10-02 00:52:24.401706', 'step': 23128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:24.460905', 'step': 23128, 'epoch': 3}
{'type': 'loss', 'content': 0.06088555231690407, 'timestamp': '2025-10-02 00:52:24.465540', 'step': 23129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:24.527467', 'step': 23129, 'epoch': 3}
{'type': 'loss', 'content': 0.02180510386824608, 'timestamp': '2025-10-02 00:52:24.544936', 'step': 23130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:24.650944', 'step': 23130, 'epoch': 3}
{'type': 'loss', 'content': 0.028541799634695053, 'timestamp': '2025-10-02 00:52:24.668983', 'step': 23131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:24.766142', 'step': 23131, 'epoch': 3}
{'type': 'loss', 'content': 0.11506754904985428, 'timestamp': '2025-10-02 00:52:24.787572', 'step': 23132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:24.915907', 'step': 23132, 'epoch': 3}
{'type': 'loss', 'content': 0.03506968170404434, 'timestamp': '2025-10-02 00:52:24.923043', 'step': 23133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:24.986768', 'step': 23133, 'epoch': 3}
{'type': 'loss', 'content': 0.03644964098930359, 'timestamp': '2025-10-02 00:52:25.002633', 'step': 23134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:25.109134', 'step': 23134, 'epoch': 3}
{'type': 'loss', 'content': 0.049755923449993134, 'timestamp': '2025-10-02 00:52:25.124858', 'step': 23135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:25.232052', 'step': 23135, 'epoch': 3}
{'type': 'loss', 'content': 0.02281130850315094, 'timestamp': '2025-10-02 00:52:25.252892', 'step': 23136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:25.349903', 'step': 23136, 'epoch': 3}
{'type': 'loss', 'content': 0.07964695245027542, 'timestamp': '2025-10-02 00:52:25.371819', 'step': 23137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:25.448516', 'step': 23137, 'epoch': 3}
{'type': 'loss', 'content': 0.03340456634759903, 'timestamp': '2025-10-02 00:52:25.464004', 'step': 23138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:25.537902', 'step': 23138, 'epoch': 3}
{'type': 'loss', 'content': 0.19020968675613403, 'timestamp': '2025-10-02 00:52:25.555347', 'step': 23139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:25.641235', 'step': 23139, 'epoch': 3}
{'type': 'loss', 'content': 0.03649869188666344, 'timestamp': '2025-10-02 00:52:25.649251', 'step': 23140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:25.758648', 'step': 23140, 'epoch': 3}
{'type': 'loss', 'content': 0.0012475316179916263, 'timestamp': '2025-10-02 00:52:25.764161', 'step': 23141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:25.830855', 'step': 23141, 'epoch': 3}
{'type': 'loss', 'content': 0.034763310104608536, 'timestamp': '2025-10-02 00:52:25.847540', 'step': 23142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:25.950280', 'step': 23142, 'epoch': 3}
{'type': 'loss', 'content': 0.012277504429221153, 'timestamp': '2025-10-02 00:52:25.966550', 'step': 23143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:26.064398', 'step': 23143, 'epoch': 3}
{'type': 'loss', 'content': 0.12095329165458679, 'timestamp': '2025-10-02 00:52:26.086988', 'step': 23144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:26.207604', 'step': 23144, 'epoch': 3}
{'type': 'loss', 'content': 0.05595650523900986, 'timestamp': '2025-10-02 00:52:26.212022', 'step': 23145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:26.271065', 'step': 23145, 'epoch': 3}
{'type': 'loss', 'content': 0.06264146417379379, 'timestamp': '2025-10-02 00:52:26.290643', 'step': 23146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:26.352242', 'step': 23146, 'epoch': 3}
{'type': 'loss', 'content': 0.0688432902097702, 'timestamp': '2025-10-02 00:52:26.356340', 'step': 23147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:26.439005', 'step': 23147, 'epoch': 3}
{'type': 'loss', 'content': 0.07700232416391373, 'timestamp': '2025-10-02 00:52:26.458273', 'step': 23148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:26.517691', 'step': 23148, 'epoch': 3}
{'type': 'loss', 'content': 0.04639562591910362, 'timestamp': '2025-10-02 00:52:26.523332', 'step': 23149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:26.617190', 'step': 23149, 'epoch': 3}
{'type': 'loss', 'content': 0.07415693998336792, 'timestamp': '2025-10-02 00:52:26.625671', 'step': 23150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:26.723301', 'step': 23150, 'epoch': 3}
{'type': 'loss', 'content': 0.029277313500642776, 'timestamp': '2025-10-02 00:52:26.727769', 'step': 23151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:26.826728', 'step': 23151, 'epoch': 3}
{'type': 'loss', 'content': 0.1350233107805252, 'timestamp': '2025-10-02 00:52:26.835481', 'step': 23152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:26.936825', 'step': 23152, 'epoch': 3}
{'type': 'loss', 'content': 0.03014000505208969, 'timestamp': '2025-10-02 00:52:26.952956', 'step': 23153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:27.051598', 'step': 23153, 'epoch': 3}
{'type': 'loss', 'content': 0.08130977302789688, 'timestamp': '2025-10-02 00:52:27.067514', 'step': 23154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:27.169422', 'step': 23154, 'epoch': 3}
{'type': 'loss', 'content': 0.01405204739421606, 'timestamp': '2025-10-02 00:52:27.174963', 'step': 23155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:27.233718', 'step': 23155, 'epoch': 3}
{'type': 'loss', 'content': 0.022943807765841484, 'timestamp': '2025-10-02 00:52:27.254401', 'step': 23156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:27.358410', 'step': 23156, 'epoch': 3}
{'type': 'loss', 'content': 0.181552454829216, 'timestamp': '2025-10-02 00:52:27.379551', 'step': 23157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:27.440300', 'step': 23157, 'epoch': 3}
{'type': 'loss', 'content': 0.02346009388566017, 'timestamp': '2025-10-02 00:52:27.465059', 'step': 23158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:27.599699', 'step': 23158, 'epoch': 3}
{'type': 'loss', 'content': 0.027275942265987396, 'timestamp': '2025-10-02 00:52:27.604411', 'step': 23159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:27.681828', 'step': 23159, 'epoch': 3}
{'type': 'loss', 'content': 0.10458365082740784, 'timestamp': '2025-10-02 00:52:27.706425', 'step': 23160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:27.813915', 'step': 23160, 'epoch': 3}
{'type': 'loss', 'content': 0.08653315156698227, 'timestamp': '2025-10-02 00:52:27.819155', 'step': 23161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:27.894045', 'step': 23161, 'epoch': 3}
{'type': 'loss', 'content': 0.014829130843281746, 'timestamp': '2025-10-02 00:52:27.901588', 'step': 23162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:27.964223', 'step': 23162, 'epoch': 3}
{'type': 'loss', 'content': 0.11298470199108124, 'timestamp': '2025-10-02 00:52:27.980415', 'step': 23163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:28.048078', 'step': 23163, 'epoch': 3}
{'type': 'loss', 'content': 0.009565863758325577, 'timestamp': '2025-10-02 00:52:28.071670', 'step': 23164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:28.183769', 'step': 23164, 'epoch': 3}
{'type': 'loss', 'content': 0.04723489284515381, 'timestamp': '2025-10-02 00:52:28.201253', 'step': 23165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:28.324253', 'step': 23165, 'epoch': 3}
{'type': 'loss', 'content': 0.11226530373096466, 'timestamp': '2025-10-02 00:52:28.330389', 'step': 23166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:28.423849', 'step': 23166, 'epoch': 3}
{'type': 'loss', 'content': 0.03801970183849335, 'timestamp': '2025-10-02 00:52:28.446489', 'step': 23167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:28.549931', 'step': 23167, 'epoch': 3}
{'type': 'loss', 'content': 0.08190904557704926, 'timestamp': '2025-10-02 00:52:28.560839', 'step': 23168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:28.646264', 'step': 23168, 'epoch': 3}
{'type': 'loss', 'content': 0.04807518795132637, 'timestamp': '2025-10-02 00:52:28.663794', 'step': 23169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:28.725275', 'step': 23169, 'epoch': 3}
{'type': 'loss', 'content': 0.08201433718204498, 'timestamp': '2025-10-02 00:52:28.730259', 'step': 23170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:28.819257', 'step': 23170, 'epoch': 3}
{'type': 'loss', 'content': 0.049503859132528305, 'timestamp': '2025-10-02 00:52:28.846760', 'step': 23171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:28.946661', 'step': 23171, 'epoch': 3}
{'type': 'loss', 'content': 0.030870189890265465, 'timestamp': '2025-10-02 00:52:28.954164', 'step': 23172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:29.026904', 'step': 23172, 'epoch': 3}
{'type': 'loss', 'content': 0.11801417171955109, 'timestamp': '2025-10-02 00:52:29.034272', 'step': 23173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:29.125632', 'step': 23173, 'epoch': 3}
{'type': 'loss', 'content': 0.05861850827932358, 'timestamp': '2025-10-02 00:52:29.134985', 'step': 23174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:29.233027', 'step': 23174, 'epoch': 3}
{'type': 'loss', 'content': 0.05282876640558243, 'timestamp': '2025-10-02 00:52:29.253089', 'step': 23175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:29.355912', 'step': 23175, 'epoch': 3}
{'type': 'loss', 'content': 0.058995164930820465, 'timestamp': '2025-10-02 00:52:29.378801', 'step': 23176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:29.450686', 'step': 23176, 'epoch': 3}
{'type': 'loss', 'content': 0.037970419973134995, 'timestamp': '2025-10-02 00:52:29.456426', 'step': 23177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:29.557837', 'step': 23177, 'epoch': 3}
{'type': 'loss', 'content': 0.062316883355379105, 'timestamp': '2025-10-02 00:52:29.563053', 'step': 23178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:29.624825', 'step': 23178, 'epoch': 3}
{'type': 'loss', 'content': 0.038924675434827805, 'timestamp': '2025-10-02 00:52:29.648463', 'step': 23179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:29.743659', 'step': 23179, 'epoch': 3}
{'type': 'loss', 'content': 0.09340540319681168, 'timestamp': '2025-10-02 00:52:29.762950', 'step': 23180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:29.856716', 'step': 23180, 'epoch': 3}
{'type': 'loss', 'content': 0.004502648953348398, 'timestamp': '2025-10-02 00:52:29.873818', 'step': 23181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:29.965016', 'step': 23181, 'epoch': 3}
{'type': 'loss', 'content': 0.04552019387483597, 'timestamp': '2025-10-02 00:52:29.986642', 'step': 23182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:52:30.046180', 'step': 23182, 'epoch': 3}
{'type': 'loss', 'content': 0.05745355039834976, 'timestamp': '2025-10-02 00:52:30.050116', 'step': 23183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:30.118019', 'step': 23183, 'epoch': 3}
{'type': 'loss', 'content': 0.013650014996528625, 'timestamp': '2025-10-02 00:52:30.141464', 'step': 23184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:30.201637', 'step': 23184, 'epoch': 3}
{'type': 'loss', 'content': 0.09885542839765549, 'timestamp': '2025-10-02 00:52:30.205125', 'step': 23185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:30.276391', 'step': 23185, 'epoch': 3}
{'type': 'loss', 'content': 0.040776561945676804, 'timestamp': '2025-10-02 00:52:30.281529', 'step': 23186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:30.352567', 'step': 23186, 'epoch': 3}
{'type': 'loss', 'content': 0.10909882932901382, 'timestamp': '2025-10-02 00:52:30.369340', 'step': 23187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:30.454748', 'step': 23187, 'epoch': 3}
{'type': 'loss', 'content': 0.006868021097034216, 'timestamp': '2025-10-02 00:52:30.473801', 'step': 23188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:30.569361', 'step': 23188, 'epoch': 3}
{'type': 'loss', 'content': 0.0167807899415493, 'timestamp': '2025-10-02 00:52:30.587051', 'step': 23189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:30.674200', 'step': 23189, 'epoch': 3}
{'type': 'loss', 'content': 0.02709345705807209, 'timestamp': '2025-10-02 00:52:30.683528', 'step': 23190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:30.743745', 'step': 23190, 'epoch': 3}
{'type': 'loss', 'content': 0.04422352835536003, 'timestamp': '2025-10-02 00:52:30.753279', 'step': 23191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:30.835552', 'step': 23191, 'epoch': 3}
{'type': 'loss', 'content': 0.023497262969613075, 'timestamp': '2025-10-02 00:52:30.851619', 'step': 23192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:30.941891', 'step': 23192, 'epoch': 3}
{'type': 'loss', 'content': 0.03573014587163925, 'timestamp': '2025-10-02 00:52:30.950997', 'step': 23193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:31.038095', 'step': 23193, 'epoch': 3}
{'type': 'loss', 'content': 0.04810523986816406, 'timestamp': '2025-10-02 00:52:31.042515', 'step': 23194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:31.111733', 'step': 23194, 'epoch': 3}
{'type': 'loss', 'content': 0.03205921873450279, 'timestamp': '2025-10-02 00:52:31.126649', 'step': 23195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:31.207335', 'step': 23195, 'epoch': 3}
{'type': 'loss', 'content': 0.0679377093911171, 'timestamp': '2025-10-02 00:52:31.215324', 'step': 23196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:31.287255', 'step': 23196, 'epoch': 3}
{'type': 'loss', 'content': 0.16957561671733856, 'timestamp': '2025-10-02 00:52:31.290381', 'step': 23197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:31.372270', 'step': 23197, 'epoch': 3}
{'type': 'loss', 'content': 0.0541844442486763, 'timestamp': '2025-10-02 00:52:31.388646', 'step': 23198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:31.502689', 'step': 23198, 'epoch': 3}
{'type': 'loss', 'content': 0.015826111659407616, 'timestamp': '2025-10-02 00:52:31.507010', 'step': 23199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:31.602309', 'step': 23199, 'epoch': 3}
{'type': 'loss', 'content': 0.045306913554668427, 'timestamp': '2025-10-02 00:52:31.622239', 'step': 23200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:31.708922', 'step': 23200, 'epoch': 3}
{'type': 'loss', 'content': 0.11406998336315155, 'timestamp': '2025-10-02 00:52:31.712709', 'step': 23201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:31.772614', 'step': 23201, 'epoch': 3}
{'type': 'loss', 'content': 0.03491408750414848, 'timestamp': '2025-10-02 00:52:31.779639', 'step': 23202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:31.878686', 'step': 23202, 'epoch': 3}
{'type': 'loss', 'content': 0.06441809982061386, 'timestamp': '2025-10-02 00:52:31.892839', 'step': 23203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:31.976513', 'step': 23203, 'epoch': 3}
{'type': 'loss', 'content': 0.04026506468653679, 'timestamp': '2025-10-02 00:52:31.991769', 'step': 23204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:32.080913', 'step': 23204, 'epoch': 3}
{'type': 'loss', 'content': 0.05084632337093353, 'timestamp': '2025-10-02 00:52:32.093041', 'step': 23205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:32.183425', 'step': 23205, 'epoch': 3}
{'type': 'loss', 'content': 0.056790243834257126, 'timestamp': '2025-10-02 00:52:32.198964', 'step': 23206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:32.307293', 'step': 23206, 'epoch': 3}
{'type': 'loss', 'content': 0.02707992121577263, 'timestamp': '2025-10-02 00:52:32.311857', 'step': 23207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:32.396403', 'step': 23207, 'epoch': 3}
{'type': 'loss', 'content': 0.04872084781527519, 'timestamp': '2025-10-02 00:52:32.405173', 'step': 23208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:32.467384', 'step': 23208, 'epoch': 3}
{'type': 'loss', 'content': 0.0668095126748085, 'timestamp': '2025-10-02 00:52:32.473050', 'step': 23209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:32.562253', 'step': 23209, 'epoch': 3}
{'type': 'loss', 'content': 0.056220218539237976, 'timestamp': '2025-10-02 00:52:32.566720', 'step': 23210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:32.666736', 'step': 23210, 'epoch': 3}
{'type': 'loss', 'content': 0.019309552386403084, 'timestamp': '2025-10-02 00:52:32.682007', 'step': 23211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:32.780682', 'step': 23211, 'epoch': 3}
{'type': 'loss', 'content': 0.02537313848733902, 'timestamp': '2025-10-02 00:52:32.790913', 'step': 23212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:32.852011', 'step': 23212, 'epoch': 3}
{'type': 'loss', 'content': 0.00452072499319911, 'timestamp': '2025-10-02 00:52:32.864981', 'step': 23213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:32.934591', 'step': 23213, 'epoch': 3}
{'type': 'loss', 'content': 0.012700319290161133, 'timestamp': '2025-10-02 00:52:32.940173', 'step': 23214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:33.026511', 'step': 23214, 'epoch': 3}
{'type': 'loss', 'content': 0.022985296323895454, 'timestamp': '2025-10-02 00:52:33.036702', 'step': 23215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:33.119031', 'step': 23215, 'epoch': 3}
{'type': 'loss', 'content': 0.038752928376197815, 'timestamp': '2025-10-02 00:52:33.125767', 'step': 23216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:52:33.203724', 'step': 23216, 'epoch': 3}
{'type': 'loss', 'content': 0.013213695958256721, 'timestamp': '2025-10-02 00:52:33.217176', 'step': 23217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:33.287310', 'step': 23217, 'epoch': 3}
{'type': 'loss', 'content': 0.08660344034433365, 'timestamp': '2025-10-02 00:52:33.302926', 'step': 23218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:33.361788', 'step': 23218, 'epoch': 3}
{'type': 'loss', 'content': 0.09742813557386398, 'timestamp': '2025-10-02 00:52:33.376590', 'step': 23219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:33.475819', 'step': 23219, 'epoch': 3}
{'type': 'loss', 'content': 0.0006095742573961616, 'timestamp': '2025-10-02 00:52:33.493991', 'step': 23220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:52:33.558650', 'step': 23220, 'epoch': 3}
{'type': 'loss', 'content': 0.028485475108027458, 'timestamp': '2025-10-02 00:52:33.571043', 'step': 23221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:33.662325', 'step': 23221, 'epoch': 3}
{'type': 'loss', 'content': 0.04431075602769852, 'timestamp': '2025-10-02 00:52:33.677678', 'step': 23222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:33.759217', 'step': 23222, 'epoch': 3}
{'type': 'loss', 'content': 0.08930964767932892, 'timestamp': '2025-10-02 00:52:33.762493', 'step': 23223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:33.845353', 'step': 23223, 'epoch': 3}
{'type': 'loss', 'content': 0.1439782977104187, 'timestamp': '2025-10-02 00:52:33.861662', 'step': 23224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:33.935049', 'step': 23224, 'epoch': 3}
{'type': 'loss', 'content': 0.0514327734708786, 'timestamp': '2025-10-02 00:52:33.946468', 'step': 23225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:34.028133', 'step': 23225, 'epoch': 3}
{'type': 'loss', 'content': 0.05835402011871338, 'timestamp': '2025-10-02 00:52:34.032639', 'step': 23226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:52:34.102913', 'step': 23226, 'epoch': 3}
{'type': 'loss', 'content': 0.0416216216981411, 'timestamp': '2025-10-02 00:52:34.113747', 'step': 23227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:34.194848', 'step': 23227, 'epoch': 3}
{'type': 'loss', 'content': 0.034928593784570694, 'timestamp': '2025-10-02 00:52:34.211594', 'step': 23228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:34.299871', 'step': 23228, 'epoch': 3}
{'type': 'loss', 'content': 0.07141334563493729, 'timestamp': '2025-10-02 00:52:34.311522', 'step': 23229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:34.409166', 'step': 23229, 'epoch': 3}
{'type': 'loss', 'content': 0.026941128075122833, 'timestamp': '2025-10-02 00:52:34.415471', 'step': 23230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:34.476433', 'step': 23230, 'epoch': 3}
{'type': 'loss', 'content': 0.0656488686800003, 'timestamp': '2025-10-02 00:52:34.488191', 'step': 23231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:34.569038', 'step': 23231, 'epoch': 3}
{'type': 'loss', 'content': 0.005298370495438576, 'timestamp': '2025-10-02 00:52:34.575904', 'step': 23232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 00:52:34.679935', 'step': 23232, 'epoch': 3}
{'type': 'loss', 'content': 0.022541847079992294, 'timestamp': '2025-10-02 00:52:34.696225', 'step': 23233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:34.762982', 'step': 23233, 'epoch': 3}
{'type': 'loss', 'content': 0.047035276889801025, 'timestamp': '2025-10-02 00:52:34.772552', 'step': 23234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:34.849750', 'step': 23234, 'epoch': 3}
{'type': 'loss', 'content': 0.06022031977772713, 'timestamp': '2025-10-02 00:52:34.852585', 'step': 23235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:34.924428', 'step': 23235, 'epoch': 3}
{'type': 'loss', 'content': 0.07293403893709183, 'timestamp': '2025-10-02 00:52:34.935389', 'step': 23236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:35.001058', 'step': 23236, 'epoch': 3}
{'type': 'loss', 'content': 0.06959255784749985, 'timestamp': '2025-10-02 00:52:35.004563', 'step': 23237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:35.078737', 'step': 23237, 'epoch': 3}
{'type': 'loss', 'content': 0.08598580956459045, 'timestamp': '2025-10-02 00:52:35.082522', 'step': 23238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:35.146686', 'step': 23238, 'epoch': 3}
{'type': 'loss', 'content': 0.049136847257614136, 'timestamp': '2025-10-02 00:52:35.150420', 'step': 23239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:35.225700', 'step': 23239, 'epoch': 3}
{'type': 'loss', 'content': 0.019991589710116386, 'timestamp': '2025-10-02 00:52:35.234345', 'step': 23240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:35.290347', 'step': 23240, 'epoch': 3}
{'type': 'loss', 'content': 0.08361966162919998, 'timestamp': '2025-10-02 00:52:35.294010', 'step': 23241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:35.355881', 'step': 23241, 'epoch': 3}
{'type': 'loss', 'content': 0.1037256047129631, 'timestamp': '2025-10-02 00:52:35.369464', 'step': 23242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:35.456908', 'step': 23242, 'epoch': 3}
{'type': 'loss', 'content': 0.03823193162679672, 'timestamp': '2025-10-02 00:52:35.464022', 'step': 23243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:35.533555', 'step': 23243, 'epoch': 3}
{'type': 'loss', 'content': 0.04322081431746483, 'timestamp': '2025-10-02 00:52:35.541757', 'step': 23244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:35.630376', 'step': 23244, 'epoch': 3}
{'type': 'loss', 'content': 0.02882300689816475, 'timestamp': '2025-10-02 00:52:35.640136', 'step': 23245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:35.723041', 'step': 23245, 'epoch': 3}
{'type': 'loss', 'content': 0.005476824473589659, 'timestamp': '2025-10-02 00:52:35.727115', 'step': 23246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:35.832152', 'step': 23246, 'epoch': 3}
{'type': 'loss', 'content': 0.01121760904788971, 'timestamp': '2025-10-02 00:52:35.842743', 'step': 23247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:35.916007', 'step': 23247, 'epoch': 3}
{'type': 'loss', 'content': 0.02364179864525795, 'timestamp': '2025-10-02 00:52:35.922720', 'step': 23248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:35.999832', 'step': 23248, 'epoch': 3}
{'type': 'loss', 'content': 0.021366441622376442, 'timestamp': '2025-10-02 00:52:36.010668', 'step': 23249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:36.096109', 'step': 23249, 'epoch': 3}
{'type': 'loss', 'content': 0.056267958134412766, 'timestamp': '2025-10-02 00:52:36.106300', 'step': 23250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:52:36.205622', 'step': 23250, 'epoch': 3}
{'type': 'loss', 'content': 0.01112002320587635, 'timestamp': '2025-10-02 00:52:36.218273', 'step': 23251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:36.308757', 'step': 23251, 'epoch': 3}
{'type': 'loss', 'content': 0.07060320675373077, 'timestamp': '2025-10-02 00:52:36.323110', 'step': 23252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:36.387928', 'step': 23252, 'epoch': 3}
{'type': 'loss', 'content': 0.011257474310696125, 'timestamp': '2025-10-02 00:52:36.399412', 'step': 23253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:36.466120', 'step': 23253, 'epoch': 3}
{'type': 'loss', 'content': 0.0070726824924349785, 'timestamp': '2025-10-02 00:52:36.475851', 'step': 23254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:36.562084', 'step': 23254, 'epoch': 3}
{'type': 'loss', 'content': 0.05727463215589523, 'timestamp': '2025-10-02 00:52:36.573222', 'step': 23255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:36.649905', 'step': 23255, 'epoch': 3}
{'type': 'loss', 'content': 0.030275512486696243, 'timestamp': '2025-10-02 00:52:36.657067', 'step': 23256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:36.739955', 'step': 23256, 'epoch': 3}
{'type': 'loss', 'content': 0.01724873296916485, 'timestamp': '2025-10-02 00:52:36.743327', 'step': 23257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:36.817100', 'step': 23257, 'epoch': 3}
{'type': 'loss', 'content': 0.0699416995048523, 'timestamp': '2025-10-02 00:52:36.821253', 'step': 23258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:36.889810', 'step': 23258, 'epoch': 3}
{'type': 'loss', 'content': 0.0290475245565176, 'timestamp': '2025-10-02 00:52:36.900502', 'step': 23259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:36.974118', 'step': 23259, 'epoch': 3}
{'type': 'loss', 'content': 0.04460940137505531, 'timestamp': '2025-10-02 00:52:36.989271', 'step': 23260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:37.081173', 'step': 23260, 'epoch': 3}
{'type': 'loss', 'content': 0.01695086620748043, 'timestamp': '2025-10-02 00:52:37.086882', 'step': 23261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:37.163202', 'step': 23261, 'epoch': 3}
{'type': 'loss', 'content': 0.04432012513279915, 'timestamp': '2025-10-02 00:52:37.168927', 'step': 23262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:37.235012', 'step': 23262, 'epoch': 3}
{'type': 'loss', 'content': 0.13113249838352203, 'timestamp': '2025-10-02 00:52:37.251327', 'step': 23263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:37.315491', 'step': 23263, 'epoch': 3}
{'type': 'loss', 'content': 0.017118915915489197, 'timestamp': '2025-10-02 00:52:37.326461', 'step': 23264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:37.416136', 'step': 23264, 'epoch': 3}
{'type': 'loss', 'content': 0.037647806107997894, 'timestamp': '2025-10-02 00:52:37.427091', 'step': 23265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:37.532444', 'step': 23265, 'epoch': 3}
{'type': 'loss', 'content': 0.02905283495783806, 'timestamp': '2025-10-02 00:52:37.537873', 'step': 23266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:37.620570', 'step': 23266, 'epoch': 3}
{'type': 'loss', 'content': 0.029513182118535042, 'timestamp': '2025-10-02 00:52:37.624223', 'step': 23267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:37.694678', 'step': 23267, 'epoch': 3}
{'type': 'loss', 'content': 0.00564236007630825, 'timestamp': '2025-10-02 00:52:37.711221', 'step': 23268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:37.768657', 'step': 23268, 'epoch': 3}
{'type': 'loss', 'content': 0.02832813374698162, 'timestamp': '2025-10-02 00:52:37.783169', 'step': 23269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:37.875270', 'step': 23269, 'epoch': 3}
{'type': 'loss', 'content': 0.06802696734666824, 'timestamp': '2025-10-02 00:52:37.889229', 'step': 23270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:37.975670', 'step': 23270, 'epoch': 3}
{'type': 'loss', 'content': 0.08081989735364914, 'timestamp': '2025-10-02 00:52:37.980017', 'step': 23271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:38.055402', 'step': 23271, 'epoch': 3}
{'type': 'loss', 'content': 0.1127711832523346, 'timestamp': '2025-10-02 00:52:38.063105', 'step': 23272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:38.132966', 'step': 23272, 'epoch': 3}
{'type': 'loss', 'content': 0.027708975598216057, 'timestamp': '2025-10-02 00:52:38.145296', 'step': 23273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:38.219123', 'step': 23273, 'epoch': 3}
{'type': 'loss', 'content': 0.0061898804269731045, 'timestamp': '2025-10-02 00:52:38.226728', 'step': 23274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:38.316801', 'step': 23274, 'epoch': 3}
{'type': 'loss', 'content': 0.04061392694711685, 'timestamp': '2025-10-02 00:52:38.326321', 'step': 23275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:38.390466', 'step': 23275, 'epoch': 3}
{'type': 'loss', 'content': 0.024433620274066925, 'timestamp': '2025-10-02 00:52:38.402067', 'step': 23276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:38.473671', 'step': 23276, 'epoch': 3}
{'type': 'loss', 'content': 0.11672768741846085, 'timestamp': '2025-10-02 00:52:38.477995', 'step': 23277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:38.546919', 'step': 23277, 'epoch': 3}
{'type': 'loss', 'content': 0.01792561449110508, 'timestamp': '2025-10-02 00:52:38.557334', 'step': 23278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:38.626920', 'step': 23278, 'epoch': 3}
{'type': 'loss', 'content': 0.10386944562196732, 'timestamp': '2025-10-02 00:52:38.631018', 'step': 23279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:38.695518', 'step': 23279, 'epoch': 3}
{'type': 'loss', 'content': 0.02224011905491352, 'timestamp': '2025-10-02 00:52:38.704280', 'step': 23280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:38.794653', 'step': 23280, 'epoch': 3}
{'type': 'loss', 'content': 0.01979401521384716, 'timestamp': '2025-10-02 00:52:38.808238', 'step': 23281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:38.889270', 'step': 23281, 'epoch': 3}
{'type': 'loss', 'content': 0.008271342143416405, 'timestamp': '2025-10-02 00:52:38.899714', 'step': 23282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:38.958642', 'step': 23282, 'epoch': 3}
{'type': 'loss', 'content': 0.0741768628358841, 'timestamp': '2025-10-02 00:52:38.962764', 'step': 23283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:39.054965', 'step': 23283, 'epoch': 3}
{'type': 'loss', 'content': 0.06654220074415207, 'timestamp': '2025-10-02 00:52:39.062238', 'step': 23284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:39.146591', 'step': 23284, 'epoch': 3}
{'type': 'loss', 'content': 0.01739264465868473, 'timestamp': '2025-10-02 00:52:39.155837', 'step': 23285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:39.252170', 'step': 23285, 'epoch': 3}
{'type': 'loss', 'content': 0.021469656378030777, 'timestamp': '2025-10-02 00:52:39.257661', 'step': 23286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:39.343501', 'step': 23286, 'epoch': 3}
{'type': 'loss', 'content': 0.11032401770353317, 'timestamp': '2025-10-02 00:52:39.359261', 'step': 23287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:39.441448', 'step': 23287, 'epoch': 3}
{'type': 'loss', 'content': 0.0043401289731264114, 'timestamp': '2025-10-02 00:52:39.451721', 'step': 23288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:39.509739', 'step': 23288, 'epoch': 3}
{'type': 'loss', 'content': 0.010898539796471596, 'timestamp': '2025-10-02 00:52:39.514245', 'step': 23289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:39.586860', 'step': 23289, 'epoch': 3}
{'type': 'loss', 'content': 0.08411533385515213, 'timestamp': '2025-10-02 00:52:39.591424', 'step': 23290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:39.669825', 'step': 23290, 'epoch': 3}
{'type': 'loss', 'content': 0.06511294096708298, 'timestamp': '2025-10-02 00:52:39.679392', 'step': 23291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:39.745693', 'step': 23291, 'epoch': 3}
{'type': 'loss', 'content': 0.16451554000377655, 'timestamp': '2025-10-02 00:52:39.752618', 'step': 23292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:39.814286', 'step': 23292, 'epoch': 3}
{'type': 'loss', 'content': 0.011350657790899277, 'timestamp': '2025-10-02 00:52:39.824042', 'step': 23293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:39.884617', 'step': 23293, 'epoch': 3}
{'type': 'loss', 'content': 0.0013514063321053982, 'timestamp': '2025-10-02 00:52:39.894003', 'step': 23294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:52:39.965115', 'step': 23294, 'epoch': 3}
{'type': 'loss', 'content': 0.03346413001418114, 'timestamp': '2025-10-02 00:52:39.978050', 'step': 23295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:40.059342', 'step': 23295, 'epoch': 3}
{'type': 'loss', 'content': 0.04826883599162102, 'timestamp': '2025-10-02 00:52:40.073107', 'step': 23296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:40.130661', 'step': 23296, 'epoch': 3}
{'type': 'loss', 'content': 0.011707060039043427, 'timestamp': '2025-10-02 00:52:40.133597', 'step': 23297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:40.192401', 'step': 23297, 'epoch': 3}
{'type': 'loss', 'content': 0.09133385866880417, 'timestamp': '2025-10-02 00:52:40.205331', 'step': 23298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:40.273506', 'step': 23298, 'epoch': 3}
{'type': 'loss', 'content': 0.057995881885290146, 'timestamp': '2025-10-02 00:52:40.280876', 'step': 23299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:40.357165', 'step': 23299, 'epoch': 3}
{'type': 'loss', 'content': 0.11907363682985306, 'timestamp': '2025-10-02 00:52:40.364143', 'step': 23300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:40.421584', 'step': 23300, 'epoch': 3}
{'type': 'loss', 'content': 0.07960455119609833, 'timestamp': '2025-10-02 00:52:40.431848', 'step': 23301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:40.498450', 'step': 23301, 'epoch': 3}
{'type': 'loss', 'content': 0.04059113934636116, 'timestamp': '2025-10-02 00:52:40.504439', 'step': 23302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:40.574181', 'step': 23302, 'epoch': 3}
{'type': 'loss', 'content': 0.04563301056623459, 'timestamp': '2025-10-02 00:52:40.580334', 'step': 23303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:40.644144', 'step': 23303, 'epoch': 3}
{'type': 'loss', 'content': 0.0852532833814621, 'timestamp': '2025-10-02 00:52:40.655093', 'step': 23304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:40.714497', 'step': 23304, 'epoch': 3}
{'type': 'loss', 'content': 0.033931054174900055, 'timestamp': '2025-10-02 00:52:40.724687', 'step': 23305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:40.785404', 'step': 23305, 'epoch': 3}
{'type': 'loss', 'content': 0.04358426481485367, 'timestamp': '2025-10-02 00:52:40.792859', 'step': 23306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:40.852611', 'step': 23306, 'epoch': 3}
{'type': 'loss', 'content': 0.054577115923166275, 'timestamp': '2025-10-02 00:52:40.856977', 'step': 23307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:40.937510', 'step': 23307, 'epoch': 3}
{'type': 'loss', 'content': 0.022168181836605072, 'timestamp': '2025-10-02 00:52:40.953526', 'step': 23308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:41.013088', 'step': 23308, 'epoch': 3}
{'type': 'loss', 'content': 0.05378924682736397, 'timestamp': '2025-10-02 00:52:41.022696', 'step': 23309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:41.095618', 'step': 23309, 'epoch': 3}
{'type': 'loss', 'content': 0.02776900678873062, 'timestamp': '2025-10-02 00:52:41.099672', 'step': 23310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:41.165554', 'step': 23310, 'epoch': 3}
{'type': 'loss', 'content': 0.05227171257138252, 'timestamp': '2025-10-02 00:52:41.168116', 'step': 23311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:41.224506', 'step': 23311, 'epoch': 3}
{'type': 'loss', 'content': 0.10231244564056396, 'timestamp': '2025-10-02 00:52:41.231296', 'step': 23312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:41.293930', 'step': 23312, 'epoch': 3}
{'type': 'loss', 'content': 0.024308666586875916, 'timestamp': '2025-10-02 00:52:41.304174', 'step': 23313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:41.380341', 'step': 23313, 'epoch': 3}
{'type': 'loss', 'content': 0.06584400683641434, 'timestamp': '2025-10-02 00:52:41.384042', 'step': 23314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:41.458064', 'step': 23314, 'epoch': 3}
{'type': 'loss', 'content': 0.0673370435833931, 'timestamp': '2025-10-02 00:52:41.461344', 'step': 23315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:52:41.566831', 'step': 23315, 'epoch': 3}
{'type': 'loss', 'content': 0.03358162194490433, 'timestamp': '2025-10-02 00:52:41.580231', 'step': 23316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:41.650213', 'step': 23316, 'epoch': 3}
{'type': 'loss', 'content': 0.056165654212236404, 'timestamp': '2025-10-02 00:52:41.654017', 'step': 23317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:41.725684', 'step': 23317, 'epoch': 3}
{'type': 'loss', 'content': 0.03217286989092827, 'timestamp': '2025-10-02 00:52:41.735219', 'step': 23318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:41.795947', 'step': 23318, 'epoch': 3}
{'type': 'loss', 'content': 0.10456749051809311, 'timestamp': '2025-10-02 00:52:41.800833', 'step': 23319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:41.872178', 'step': 23319, 'epoch': 3}
{'type': 'loss', 'content': 0.041140586137771606, 'timestamp': '2025-10-02 00:52:41.888252', 'step': 23320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:41.967635', 'step': 23320, 'epoch': 3}
{'type': 'loss', 'content': 0.058569952845573425, 'timestamp': '2025-10-02 00:52:41.981865', 'step': 23321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:42.044651', 'step': 23321, 'epoch': 3}
{'type': 'loss', 'content': 0.03958238288760185, 'timestamp': '2025-10-02 00:52:42.051721', 'step': 23322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:42.110988', 'step': 23322, 'epoch': 3}
{'type': 'loss', 'content': 0.049511779099702835, 'timestamp': '2025-10-02 00:52:42.117583', 'step': 23323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:42.191059', 'step': 23323, 'epoch': 3}
{'type': 'loss', 'content': 0.08519629389047623, 'timestamp': '2025-10-02 00:52:42.208859', 'step': 23324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:42.276452', 'step': 23324, 'epoch': 3}
{'type': 'loss', 'content': 0.0789625495672226, 'timestamp': '2025-10-02 00:52:42.288890', 'step': 23325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:42.348792', 'step': 23325, 'epoch': 3}
{'type': 'loss', 'content': 0.07047677785158157, 'timestamp': '2025-10-02 00:52:42.355980', 'step': 23326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:42.424214', 'step': 23326, 'epoch': 3}
{'type': 'loss', 'content': 0.08635260909795761, 'timestamp': '2025-10-02 00:52:42.434867', 'step': 23327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:42.518676', 'step': 23327, 'epoch': 3}
{'type': 'loss', 'content': 0.046591971069574356, 'timestamp': '2025-10-02 00:52:42.529040', 'step': 23328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:42.621591', 'step': 23328, 'epoch': 3}
{'type': 'loss', 'content': 0.06880635023117065, 'timestamp': '2025-10-02 00:52:42.625523', 'step': 23329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:42.696290', 'step': 23329, 'epoch': 3}
{'type': 'loss', 'content': 0.006673423107713461, 'timestamp': '2025-10-02 00:52:42.699665', 'step': 23330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:42.777301', 'step': 23330, 'epoch': 3}
{'type': 'loss', 'content': 0.07886427640914917, 'timestamp': '2025-10-02 00:52:42.781435', 'step': 23331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:42.851104', 'step': 23331, 'epoch': 3}
{'type': 'loss', 'content': 0.03761960193514824, 'timestamp': '2025-10-02 00:52:42.860544', 'step': 23332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:42.935513', 'step': 23332, 'epoch': 3}
{'type': 'loss', 'content': 0.0010263739386573434, 'timestamp': '2025-10-02 00:52:42.947042', 'step': 23333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:43.004255', 'step': 23333, 'epoch': 3}
{'type': 'loss', 'content': 0.11524581909179688, 'timestamp': '2025-10-02 00:52:43.007713', 'step': 23334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:43.086023', 'step': 23334, 'epoch': 3}
{'type': 'loss', 'content': 0.0419880636036396, 'timestamp': '2025-10-02 00:52:43.099468', 'step': 23335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:43.168178', 'step': 23335, 'epoch': 3}
{'type': 'loss', 'content': 0.06661289185285568, 'timestamp': '2025-10-02 00:52:43.181882', 'step': 23336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:43.249625', 'step': 23336, 'epoch': 3}
{'type': 'loss', 'content': 0.05399605631828308, 'timestamp': '2025-10-02 00:52:43.252590', 'step': 23337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:43.312369', 'step': 23337, 'epoch': 3}
{'type': 'loss', 'content': 0.06295005232095718, 'timestamp': '2025-10-02 00:52:43.316498', 'step': 23338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:43.390086', 'step': 23338, 'epoch': 3}
{'type': 'loss', 'content': 0.09001541137695312, 'timestamp': '2025-10-02 00:52:43.400404', 'step': 23339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:43.470170', 'step': 23339, 'epoch': 3}
{'type': 'loss', 'content': 0.11422467976808548, 'timestamp': '2025-10-02 00:52:43.480478', 'step': 23340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:43.554889', 'step': 23340, 'epoch': 3}
{'type': 'loss', 'content': 0.13663066923618317, 'timestamp': '2025-10-02 00:52:43.564450', 'step': 23341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:43.621769', 'step': 23341, 'epoch': 3}
{'type': 'loss', 'content': 0.05417110398411751, 'timestamp': '2025-10-02 00:52:43.624623', 'step': 23342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:43.681674', 'step': 23342, 'epoch': 3}
{'type': 'loss', 'content': 0.03840953856706619, 'timestamp': '2025-10-02 00:52:43.684293', 'step': 23343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:43.741803', 'step': 23343, 'epoch': 3}
{'type': 'loss', 'content': 0.030826345086097717, 'timestamp': '2025-10-02 00:52:43.751901', 'step': 23344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:43.826027', 'step': 23344, 'epoch': 3}
{'type': 'loss', 'content': 0.09317311644554138, 'timestamp': '2025-10-02 00:52:43.833379', 'step': 23345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:43.915222', 'step': 23345, 'epoch': 3}
{'type': 'loss', 'content': 0.03439393267035484, 'timestamp': '2025-10-02 00:52:43.919784', 'step': 23346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:43.997470', 'step': 23346, 'epoch': 3}
{'type': 'loss', 'content': 0.031593505293130875, 'timestamp': '2025-10-02 00:52:44.006466', 'step': 23347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:44.068725', 'step': 23347, 'epoch': 3}
{'type': 'loss', 'content': 0.032177723944187164, 'timestamp': '2025-10-02 00:52:44.077175', 'step': 23348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:44.173085', 'step': 23348, 'epoch': 3}
{'type': 'loss', 'content': 0.10625330358743668, 'timestamp': '2025-10-02 00:52:44.175665', 'step': 23349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:44.233845', 'step': 23349, 'epoch': 3}
{'type': 'loss', 'content': 0.057182516902685165, 'timestamp': '2025-10-02 00:52:44.237516', 'step': 23350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:44.315112', 'step': 23350, 'epoch': 3}
{'type': 'loss', 'content': 0.16066083312034607, 'timestamp': '2025-10-02 00:52:44.319706', 'step': 23351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:44.387006', 'step': 23351, 'epoch': 3}
{'type': 'loss', 'content': 0.018730521202087402, 'timestamp': '2025-10-02 00:52:44.397927', 'step': 23352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:44.469310', 'step': 23352, 'epoch': 3}
{'type': 'loss', 'content': 0.03889317810535431, 'timestamp': '2025-10-02 00:52:44.480307', 'step': 23353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:44.559986', 'step': 23353, 'epoch': 3}
{'type': 'loss', 'content': 0.055511996150016785, 'timestamp': '2025-10-02 00:52:44.569541', 'step': 23354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:44.636136', 'step': 23354, 'epoch': 3}
{'type': 'loss', 'content': 0.03787699341773987, 'timestamp': '2025-10-02 00:52:44.638996', 'step': 23355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:44.697130', 'step': 23355, 'epoch': 3}
{'type': 'loss', 'content': 0.06768270581960678, 'timestamp': '2025-10-02 00:52:44.703881', 'step': 23356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:44.778393', 'step': 23356, 'epoch': 3}
{'type': 'loss', 'content': 0.07566597312688828, 'timestamp': '2025-10-02 00:52:44.787718', 'step': 23357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:44.861001', 'step': 23357, 'epoch': 3}
{'type': 'loss', 'content': 0.09010834246873856, 'timestamp': '2025-10-02 00:52:44.870366', 'step': 23358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:44.938979', 'step': 23358, 'epoch': 3}
{'type': 'loss', 'content': 0.02918844483792782, 'timestamp': '2025-10-02 00:52:44.945460', 'step': 23359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:45.015140', 'step': 23359, 'epoch': 3}
{'type': 'loss', 'content': 0.016144203022122383, 'timestamp': '2025-10-02 00:52:45.022521', 'step': 23360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:45.080043', 'step': 23360, 'epoch': 3}
{'type': 'loss', 'content': 0.035001400858163834, 'timestamp': '2025-10-02 00:52:45.085041', 'step': 23361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:45.159249', 'step': 23361, 'epoch': 3}
{'type': 'loss', 'content': 0.03710813820362091, 'timestamp': '2025-10-02 00:52:45.162440', 'step': 23362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:45.219686', 'step': 23362, 'epoch': 3}
{'type': 'loss', 'content': 0.04935815930366516, 'timestamp': '2025-10-02 00:52:45.225942', 'step': 23363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:45.281542', 'step': 23363, 'epoch': 3}
{'type': 'loss', 'content': 0.05332178249955177, 'timestamp': '2025-10-02 00:52:45.288357', 'step': 23364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:45.351871', 'step': 23364, 'epoch': 3}
{'type': 'loss', 'content': 0.039172932505607605, 'timestamp': '2025-10-02 00:52:45.363342', 'step': 23365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:45.421694', 'step': 23365, 'epoch': 3}
{'type': 'loss', 'content': 0.0486818365752697, 'timestamp': '2025-10-02 00:52:45.427333', 'step': 23366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:45.493580', 'step': 23366, 'epoch': 3}
{'type': 'loss', 'content': 0.08630333840847015, 'timestamp': '2025-10-02 00:52:45.499503', 'step': 23367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:45.563210', 'step': 23367, 'epoch': 3}
{'type': 'loss', 'content': 0.10631924867630005, 'timestamp': '2025-10-02 00:52:45.571441', 'step': 23368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:45.630639', 'step': 23368, 'epoch': 3}
{'type': 'loss', 'content': 0.08437620103359222, 'timestamp': '2025-10-02 00:52:45.633960', 'step': 23369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:45.699149', 'step': 23369, 'epoch': 3}
{'type': 'loss', 'content': 0.0025699136313050985, 'timestamp': '2025-10-02 00:52:45.706472', 'step': 23370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:45.764302', 'step': 23370, 'epoch': 3}
{'type': 'loss', 'content': 0.06894471496343613, 'timestamp': '2025-10-02 00:52:45.767490', 'step': 23371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:45.830836', 'step': 23371, 'epoch': 3}
{'type': 'loss', 'content': 0.01326809637248516, 'timestamp': '2025-10-02 00:52:45.837554', 'step': 23372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:45.920035', 'step': 23372, 'epoch': 3}
{'type': 'loss', 'content': 0.08150319755077362, 'timestamp': '2025-10-02 00:52:45.924383', 'step': 23373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:46.009770', 'step': 23373, 'epoch': 3}
{'type': 'loss', 'content': 0.04420235753059387, 'timestamp': '2025-10-02 00:52:46.020172', 'step': 23374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:46.077650', 'step': 23374, 'epoch': 3}
{'type': 'loss', 'content': 0.04420381411910057, 'timestamp': '2025-10-02 00:52:46.080246', 'step': 23375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:46.158683', 'step': 23375, 'epoch': 3}
{'type': 'loss', 'content': 0.034653156995773315, 'timestamp': '2025-10-02 00:52:46.169814', 'step': 23376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:46.226672', 'step': 23376, 'epoch': 3}
{'type': 'loss', 'content': 0.009267564862966537, 'timestamp': '2025-10-02 00:52:46.235890', 'step': 23377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:46.317223', 'step': 23377, 'epoch': 3}
{'type': 'loss', 'content': 0.01571687124669552, 'timestamp': '2025-10-02 00:52:46.324428', 'step': 23378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:46.389791', 'step': 23378, 'epoch': 3}
{'type': 'loss', 'content': 0.03724875673651695, 'timestamp': '2025-10-02 00:52:46.400250', 'step': 23379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:52:46.458839', 'step': 23379, 'epoch': 3}
{'type': 'loss', 'content': 0.05530015006661415, 'timestamp': '2025-10-02 00:52:46.472934', 'step': 23380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:46.542416', 'step': 23380, 'epoch': 3}
{'type': 'loss', 'content': 0.0381331741809845, 'timestamp': '2025-10-02 00:52:46.546829', 'step': 23381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:46.634010', 'step': 23381, 'epoch': 3}
{'type': 'loss', 'content': 0.08415982127189636, 'timestamp': '2025-10-02 00:52:46.638255', 'step': 23382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:46.701613', 'step': 23382, 'epoch': 3}
{'type': 'loss', 'content': 0.015165158547461033, 'timestamp': '2025-10-02 00:52:46.710865', 'step': 23383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:46.790468', 'step': 23383, 'epoch': 3}
{'type': 'loss', 'content': 0.011125952005386353, 'timestamp': '2025-10-02 00:52:46.806443', 'step': 23384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:46.878130', 'step': 23384, 'epoch': 3}
{'type': 'loss', 'content': 0.05436613783240318, 'timestamp': '2025-10-02 00:52:46.881331', 'step': 23385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:46.939257', 'step': 23385, 'epoch': 3}
{'type': 'loss', 'content': 0.10549680888652802, 'timestamp': '2025-10-02 00:52:46.942409', 'step': 23386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:47.024076', 'step': 23386, 'epoch': 3}
{'type': 'loss', 'content': 0.001065024291165173, 'timestamp': '2025-10-02 00:52:47.034757', 'step': 23387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:47.106986', 'step': 23387, 'epoch': 3}
{'type': 'loss', 'content': 0.04962223395705223, 'timestamp': '2025-10-02 00:52:47.115998', 'step': 23388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:47.187288', 'step': 23388, 'epoch': 3}
{'type': 'loss', 'content': 0.05286719650030136, 'timestamp': '2025-10-02 00:52:47.193825', 'step': 23389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:47.263608', 'step': 23389, 'epoch': 3}
{'type': 'loss', 'content': 0.042606525123119354, 'timestamp': '2025-10-02 00:52:47.272468', 'step': 23390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:47.330323', 'step': 23390, 'epoch': 3}
{'type': 'loss', 'content': 0.14364194869995117, 'timestamp': '2025-10-02 00:52:47.333979', 'step': 23391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:47.396897', 'step': 23391, 'epoch': 3}
{'type': 'loss', 'content': 0.009504226967692375, 'timestamp': '2025-10-02 00:52:47.408997', 'step': 23392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:47.475693', 'step': 23392, 'epoch': 3}
{'type': 'loss', 'content': 0.06283341348171234, 'timestamp': '2025-10-02 00:52:47.481471', 'step': 23393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:47.548339', 'step': 23393, 'epoch': 3}
{'type': 'loss', 'content': 0.025301672518253326, 'timestamp': '2025-10-02 00:52:47.551936', 'step': 23394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:47.621279', 'step': 23394, 'epoch': 3}
{'type': 'loss', 'content': 0.10686443746089935, 'timestamp': '2025-10-02 00:52:47.629837', 'step': 23395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:47.690322', 'step': 23395, 'epoch': 3}
{'type': 'loss', 'content': 0.03888557851314545, 'timestamp': '2025-10-02 00:52:47.700066', 'step': 23396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:47.764133', 'step': 23396, 'epoch': 3}
{'type': 'loss', 'content': 0.047990698367357254, 'timestamp': '2025-10-02 00:52:47.767543', 'step': 23397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:47.831350', 'step': 23397, 'epoch': 3}
{'type': 'loss', 'content': 0.07046463340520859, 'timestamp': '2025-10-02 00:52:47.838133', 'step': 23398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:47.912955', 'step': 23398, 'epoch': 3}
{'type': 'loss', 'content': 0.0741451159119606, 'timestamp': '2025-10-02 00:52:47.915905', 'step': 23399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:47.989529', 'step': 23399, 'epoch': 3}
{'type': 'loss', 'content': 0.050937965512275696, 'timestamp': '2025-10-02 00:52:47.998755', 'step': 23400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:48.072044', 'step': 23400, 'epoch': 3}
{'type': 'loss', 'content': 0.09124021232128143, 'timestamp': '2025-10-02 00:52:48.079519', 'step': 23401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:48.151673', 'step': 23401, 'epoch': 3}
{'type': 'loss', 'content': 0.07395558804273605, 'timestamp': '2025-10-02 00:52:48.154359', 'step': 23402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:52:48.247060', 'step': 23402, 'epoch': 3}
{'type': 'loss', 'content': 0.0002594268589746207, 'timestamp': '2025-10-02 00:52:48.260906', 'step': 23403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:48.332219', 'step': 23403, 'epoch': 3}
{'type': 'loss', 'content': 0.062366921454668045, 'timestamp': '2025-10-02 00:52:48.339024', 'step': 23404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:48.397328', 'step': 23404, 'epoch': 3}
{'type': 'loss', 'content': 0.033948857337236404, 'timestamp': '2025-10-02 00:52:48.404925', 'step': 23405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:48.468702', 'step': 23405, 'epoch': 3}
{'type': 'loss', 'content': 0.018498050048947334, 'timestamp': '2025-10-02 00:52:48.476007', 'step': 23406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:52:48.554368', 'step': 23406, 'epoch': 3}
{'type': 'loss', 'content': 0.07657913863658905, 'timestamp': '2025-10-02 00:52:48.566280', 'step': 23407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:48.626837', 'step': 23407, 'epoch': 3}
{'type': 'loss', 'content': 0.09598632156848907, 'timestamp': '2025-10-02 00:52:48.637384', 'step': 23408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:48.699051', 'step': 23408, 'epoch': 3}
{'type': 'loss', 'content': 0.10673161596059799, 'timestamp': '2025-10-02 00:52:48.706656', 'step': 23409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:48.779960', 'step': 23409, 'epoch': 3}
{'type': 'loss', 'content': 0.005750917363911867, 'timestamp': '2025-10-02 00:52:48.786052', 'step': 23410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:48.857553', 'step': 23410, 'epoch': 3}
{'type': 'loss', 'content': 0.08582382649183273, 'timestamp': '2025-10-02 00:52:48.860034', 'step': 23411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:48.919102', 'step': 23411, 'epoch': 3}
{'type': 'loss', 'content': 0.04040270298719406, 'timestamp': '2025-10-02 00:52:48.929165', 'step': 23412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:48.984980', 'step': 23412, 'epoch': 3}
{'type': 'loss', 'content': 0.040085431188344955, 'timestamp': '2025-10-02 00:52:48.991648', 'step': 23413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:49.055189', 'step': 23413, 'epoch': 3}
{'type': 'loss', 'content': 0.004535915795713663, 'timestamp': '2025-10-02 00:52:49.058451', 'step': 23414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:49.121748', 'step': 23414, 'epoch': 3}
{'type': 'loss', 'content': 0.06762415170669556, 'timestamp': '2025-10-02 00:52:49.131286', 'step': 23415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:49.197312', 'step': 23415, 'epoch': 3}
{'type': 'loss', 'content': 0.012682254426181316, 'timestamp': '2025-10-02 00:52:49.203951', 'step': 23416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:49.263410', 'step': 23416, 'epoch': 3}
{'type': 'loss', 'content': 0.036138370633125305, 'timestamp': '2025-10-02 00:52:49.267401', 'step': 23417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:49.331182', 'step': 23417, 'epoch': 3}
{'type': 'loss', 'content': 0.056375857442617416, 'timestamp': '2025-10-02 00:52:49.338615', 'step': 23418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:49.408125', 'step': 23418, 'epoch': 3}
{'type': 'loss', 'content': 0.012539573945105076, 'timestamp': '2025-10-02 00:52:49.419588', 'step': 23419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:49.500604', 'step': 23419, 'epoch': 3}
{'type': 'loss', 'content': 0.08503123372793198, 'timestamp': '2025-10-02 00:52:49.514043', 'step': 23420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:52:49.598478', 'step': 23420, 'epoch': 3}
{'type': 'loss', 'content': 0.014778018929064274, 'timestamp': '2025-10-02 00:52:49.611477', 'step': 23421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:49.689690', 'step': 23421, 'epoch': 3}
{'type': 'loss', 'content': 0.02734348736703396, 'timestamp': '2025-10-02 00:52:49.700189', 'step': 23422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:49.759650', 'step': 23422, 'epoch': 3}
{'type': 'loss', 'content': 0.02190636284649372, 'timestamp': '2025-10-02 00:52:49.769146', 'step': 23423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:49.838614', 'step': 23423, 'epoch': 3}
{'type': 'loss', 'content': 0.0442335344851017, 'timestamp': '2025-10-02 00:52:49.854794', 'step': 23424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:52:49.940181', 'step': 23424, 'epoch': 3}
{'type': 'loss', 'content': 0.03427729383111, 'timestamp': '2025-10-02 00:52:49.953723', 'step': 23425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:50.019126', 'step': 23425, 'epoch': 3}
{'type': 'loss', 'content': 0.07650068402290344, 'timestamp': '2025-10-02 00:52:50.027560', 'step': 23426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:50.114668', 'step': 23426, 'epoch': 3}
{'type': 'loss', 'content': 0.021153569221496582, 'timestamp': '2025-10-02 00:52:50.124795', 'step': 23427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:50.198098', 'step': 23427, 'epoch': 3}
{'type': 'loss', 'content': 0.10053656995296478, 'timestamp': '2025-10-02 00:52:50.209964', 'step': 23428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:50.271806', 'step': 23428, 'epoch': 3}
{'type': 'loss', 'content': 0.022253582254052162, 'timestamp': '2025-10-02 00:52:50.283124', 'step': 23429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:52:50.383523', 'step': 23429, 'epoch': 3}
{'type': 'loss', 'content': 0.007242173887789249, 'timestamp': '2025-10-02 00:52:50.395982', 'step': 23430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:50.485469', 'step': 23430, 'epoch': 3}
{'type': 'loss', 'content': 0.030785977840423584, 'timestamp': '2025-10-02 00:52:50.495660', 'step': 23431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:52:50.570824', 'step': 23431, 'epoch': 3}
{'type': 'loss', 'content': 0.04358121007680893, 'timestamp': '2025-10-02 00:52:50.583139', 'step': 23432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:50.640829', 'step': 23432, 'epoch': 3}
{'type': 'loss', 'content': 0.10860077291727066, 'timestamp': '2025-10-02 00:52:50.648028', 'step': 23433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:50.724357', 'step': 23433, 'epoch': 3}
{'type': 'loss', 'content': 0.0687827318906784, 'timestamp': '2025-10-02 00:52:50.734057', 'step': 23434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:50.800501', 'step': 23434, 'epoch': 3}
{'type': 'loss', 'content': 0.12341134250164032, 'timestamp': '2025-10-02 00:52:50.804458', 'step': 23435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:50.866890', 'step': 23435, 'epoch': 3}
{'type': 'loss', 'content': 0.03270772844552994, 'timestamp': '2025-10-02 00:52:50.878680', 'step': 23436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:50.950169', 'step': 23436, 'epoch': 3}
{'type': 'loss', 'content': 0.041490472853183746, 'timestamp': '2025-10-02 00:52:50.953457', 'step': 23437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:51.030271', 'step': 23437, 'epoch': 3}
{'type': 'loss', 'content': 0.024295242503285408, 'timestamp': '2025-10-02 00:52:51.039062', 'step': 23438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:51.103854', 'step': 23438, 'epoch': 3}
{'type': 'loss', 'content': 0.06295828521251678, 'timestamp': '2025-10-02 00:52:51.115170', 'step': 23439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:51.179322', 'step': 23439, 'epoch': 3}
{'type': 'loss', 'content': 0.07724518328905106, 'timestamp': '2025-10-02 00:52:51.187308', 'step': 23440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:51.266988', 'step': 23440, 'epoch': 3}
{'type': 'loss', 'content': 0.12283998727798462, 'timestamp': '2025-10-02 00:52:51.270822', 'step': 23441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:52:51.391732', 'step': 23441, 'epoch': 3}
{'type': 'loss', 'content': 0.04066409915685654, 'timestamp': '2025-10-02 00:52:51.411135', 'step': 23442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:51.486533', 'step': 23442, 'epoch': 3}
{'type': 'loss', 'content': 0.04126960039138794, 'timestamp': '2025-10-02 00:52:51.494352', 'step': 23443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:51.562860', 'step': 23443, 'epoch': 3}
{'type': 'loss', 'content': 0.03899829834699631, 'timestamp': '2025-10-02 00:52:51.570861', 'step': 23444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:51.631388', 'step': 23444, 'epoch': 3}
{'type': 'loss', 'content': 0.04415630176663399, 'timestamp': '2025-10-02 00:52:51.637256', 'step': 23445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:51.709452', 'step': 23445, 'epoch': 3}
{'type': 'loss', 'content': 0.02827189303934574, 'timestamp': '2025-10-02 00:52:51.720123', 'step': 23446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:51.788981', 'step': 23446, 'epoch': 3}
{'type': 'loss', 'content': 0.022570351138710976, 'timestamp': '2025-10-02 00:52:51.791405', 'step': 23447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:51.850613', 'step': 23447, 'epoch': 3}
{'type': 'loss', 'content': 0.03574560582637787, 'timestamp': '2025-10-02 00:52:51.856965', 'step': 23448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:51.911602', 'step': 23448, 'epoch': 3}
{'type': 'loss', 'content': 0.022981148213148117, 'timestamp': '2025-10-02 00:52:51.921189', 'step': 23449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:51.986918', 'step': 23449, 'epoch': 3}
{'type': 'loss', 'content': 0.014076540246605873, 'timestamp': '2025-10-02 00:52:51.990215', 'step': 23450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:52.055987', 'step': 23450, 'epoch': 3}
{'type': 'loss', 'content': 0.005300021730363369, 'timestamp': '2025-10-02 00:52:52.058538', 'step': 23451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:52.113199', 'step': 23451, 'epoch': 3}
{'type': 'loss', 'content': 0.050679538398981094, 'timestamp': '2025-10-02 00:52:52.119619', 'step': 23452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:52.176564', 'step': 23452, 'epoch': 3}
{'type': 'loss', 'content': 0.02938299998641014, 'timestamp': '2025-10-02 00:52:52.185474', 'step': 23453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:52.258834', 'step': 23453, 'epoch': 3}
{'type': 'loss', 'content': 0.04306967929005623, 'timestamp': '2025-10-02 00:52:52.264856', 'step': 23454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:52:52.356940', 'step': 23454, 'epoch': 3}
{'type': 'loss', 'content': 0.03314971178770065, 'timestamp': '2025-10-02 00:52:52.367612', 'step': 23455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:52.427918', 'step': 23455, 'epoch': 3}
{'type': 'loss', 'content': 0.08095409721136093, 'timestamp': '2025-10-02 00:52:52.435234', 'step': 23456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:52.535702', 'step': 23456, 'epoch': 3}
{'type': 'loss', 'content': 0.1252971589565277, 'timestamp': '2025-10-02 00:52:52.542126', 'step': 23457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:52.629610', 'step': 23457, 'epoch': 3}
{'type': 'loss', 'content': 0.04559654742479324, 'timestamp': '2025-10-02 00:52:52.643838', 'step': 23458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:52.747099', 'step': 23458, 'epoch': 3}
{'type': 'loss', 'content': 0.0888802707195282, 'timestamp': '2025-10-02 00:52:52.754247', 'step': 23459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:52.834663', 'step': 23459, 'epoch': 3}
{'type': 'loss', 'content': 0.13542762398719788, 'timestamp': '2025-10-02 00:52:52.849972', 'step': 23460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:52.936025', 'step': 23460, 'epoch': 3}
{'type': 'loss', 'content': 0.048775192350149155, 'timestamp': '2025-10-02 00:52:52.944941', 'step': 23461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:53.020510', 'step': 23461, 'epoch': 3}
{'type': 'loss', 'content': 0.07647456973791122, 'timestamp': '2025-10-02 00:52:53.027777', 'step': 23462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:53.088526', 'step': 23462, 'epoch': 3}
{'type': 'loss', 'content': 0.05379442498087883, 'timestamp': '2025-10-02 00:52:53.093975', 'step': 23463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:53.159090', 'step': 23463, 'epoch': 3}
{'type': 'loss', 'content': 0.00020869958098046482, 'timestamp': '2025-10-02 00:52:53.173865', 'step': 23464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:53.249488', 'step': 23464, 'epoch': 3}
{'type': 'loss', 'content': 0.03062988445162773, 'timestamp': '2025-10-02 00:52:53.258709', 'step': 23465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:53.318502', 'step': 23465, 'epoch': 3}
{'type': 'loss', 'content': 0.11957203596830368, 'timestamp': '2025-10-02 00:52:53.325976', 'step': 23466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:52:53.413339', 'step': 23466, 'epoch': 3}
{'type': 'loss', 'content': 0.01884389854967594, 'timestamp': '2025-10-02 00:52:53.425290', 'step': 23467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:52:53.507897', 'step': 23467, 'epoch': 3}
{'type': 'loss', 'content': 0.03879549726843834, 'timestamp': '2025-10-02 00:52:53.520644', 'step': 23468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:53.594779', 'step': 23468, 'epoch': 3}
{'type': 'loss', 'content': 0.15049894154071808, 'timestamp': '2025-10-02 00:52:53.600390', 'step': 23469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:53.661488', 'step': 23469, 'epoch': 3}
{'type': 'loss', 'content': 0.025707382708787918, 'timestamp': '2025-10-02 00:52:53.668547', 'step': 23470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:53.731738', 'step': 23470, 'epoch': 3}
{'type': 'loss', 'content': 0.015194362960755825, 'timestamp': '2025-10-02 00:52:53.738778', 'step': 23471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:53.807059', 'step': 23471, 'epoch': 3}
{'type': 'loss', 'content': 0.042671654373407364, 'timestamp': '2025-10-02 00:52:53.813828', 'step': 23472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:52:53.896075', 'step': 23472, 'epoch': 3}
{'type': 'loss', 'content': 0.005634161178022623, 'timestamp': '2025-10-02 00:52:53.909424', 'step': 23473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:53.982948', 'step': 23473, 'epoch': 3}
{'type': 'loss', 'content': 0.03907686471939087, 'timestamp': '2025-10-02 00:52:53.990770', 'step': 23474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:52:54.072710', 'step': 23474, 'epoch': 3}
{'type': 'loss', 'content': 0.008319268934428692, 'timestamp': '2025-10-02 00:52:54.085184', 'step': 23475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:54.149788', 'step': 23475, 'epoch': 3}
{'type': 'loss', 'content': 0.03983593359589577, 'timestamp': '2025-10-02 00:52:54.156646', 'step': 23476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:54.212123', 'step': 23476, 'epoch': 3}
{'type': 'loss', 'content': 0.0527118481695652, 'timestamp': '2025-10-02 00:52:54.221361', 'step': 23477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:54.296393', 'step': 23477, 'epoch': 3}
{'type': 'loss', 'content': 0.02765345200896263, 'timestamp': '2025-10-02 00:52:54.308690', 'step': 23478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:54.394363', 'step': 23478, 'epoch': 3}
{'type': 'loss', 'content': 0.04201402887701988, 'timestamp': '2025-10-02 00:52:54.404586', 'step': 23479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:54.468493', 'step': 23479, 'epoch': 3}
{'type': 'loss', 'content': 0.011112885549664497, 'timestamp': '2025-10-02 00:52:54.479695', 'step': 23480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:54.547749', 'step': 23480, 'epoch': 3}
{'type': 'loss', 'content': 0.05153285712003708, 'timestamp': '2025-10-02 00:52:54.551243', 'step': 23481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:54.629410', 'step': 23481, 'epoch': 3}
{'type': 'loss', 'content': 0.0635218545794487, 'timestamp': '2025-10-02 00:52:54.631777', 'step': 23482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:54.687592', 'step': 23482, 'epoch': 3}
{'type': 'loss', 'content': 0.017308533191680908, 'timestamp': '2025-10-02 00:52:54.690387', 'step': 23483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:54.755780', 'step': 23483, 'epoch': 3}
{'type': 'loss', 'content': 0.012681580148637295, 'timestamp': '2025-10-02 00:52:54.766963', 'step': 23484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:54.861283', 'step': 23484, 'epoch': 3}
{'type': 'loss', 'content': 0.02016468718647957, 'timestamp': '2025-10-02 00:52:54.870463', 'step': 23485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:54.970836', 'step': 23485, 'epoch': 3}
{'type': 'loss', 'content': 0.19735507667064667, 'timestamp': '2025-10-02 00:52:54.976128', 'step': 23486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:55.082151', 'step': 23486, 'epoch': 3}
{'type': 'loss', 'content': 0.026975447311997414, 'timestamp': '2025-10-02 00:52:55.097797', 'step': 23487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:55.202797', 'step': 23487, 'epoch': 3}
{'type': 'loss', 'content': 0.06790770590305328, 'timestamp': '2025-10-02 00:52:55.210171', 'step': 23488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:55.279226', 'step': 23488, 'epoch': 3}
{'type': 'loss', 'content': 0.08380303531885147, 'timestamp': '2025-10-02 00:52:55.282274', 'step': 23489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:55.352936', 'step': 23489, 'epoch': 3}
{'type': 'loss', 'content': 0.038632337003946304, 'timestamp': '2025-10-02 00:52:55.358523', 'step': 23490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:55.420130', 'step': 23490, 'epoch': 3}
{'type': 'loss', 'content': 0.026410534977912903, 'timestamp': '2025-10-02 00:52:55.427787', 'step': 23491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:55.495198', 'step': 23491, 'epoch': 3}
{'type': 'loss', 'content': 0.0754694864153862, 'timestamp': '2025-10-02 00:52:55.503171', 'step': 23492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:55.573746', 'step': 23492, 'epoch': 3}
{'type': 'loss', 'content': 0.12185797840356827, 'timestamp': '2025-10-02 00:52:55.579730', 'step': 23493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:55.652636', 'step': 23493, 'epoch': 3}
{'type': 'loss', 'content': 0.044070590287446976, 'timestamp': '2025-10-02 00:52:55.656926', 'step': 23494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:55.722475', 'step': 23494, 'epoch': 3}
{'type': 'loss', 'content': 0.029635349288582802, 'timestamp': '2025-10-02 00:52:55.729883', 'step': 23495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:55.791833', 'step': 23495, 'epoch': 3}
{'type': 'loss', 'content': 0.032005425542593, 'timestamp': '2025-10-02 00:52:55.804157', 'step': 23496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:55.877247', 'step': 23496, 'epoch': 3}
{'type': 'loss', 'content': 0.0705874115228653, 'timestamp': '2025-10-02 00:52:55.888498', 'step': 23497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:55.952994', 'step': 23497, 'epoch': 3}
{'type': 'loss', 'content': 0.010337447747588158, 'timestamp': '2025-10-02 00:52:55.958990', 'step': 23498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:56.027777', 'step': 23498, 'epoch': 3}
{'type': 'loss', 'content': 0.1082058921456337, 'timestamp': '2025-10-02 00:52:56.035834', 'step': 23499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:56.112589', 'step': 23499, 'epoch': 3}
{'type': 'loss', 'content': 0.05454622581601143, 'timestamp': '2025-10-02 00:52:56.119236', 'step': 23500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 23500', 'timestamp': '2025-10-02 00:52:56.546553', 'step': 23500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:56.612995', 'step': 23500, 'epoch': 3}
{'type': 'loss', 'content': 0.09541460126638412, 'timestamp': '2025-10-02 00:52:56.618042', 'step': 23501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:56.680723', 'step': 23501, 'epoch': 3}
{'type': 'loss', 'content': 0.04832519218325615, 'timestamp': '2025-10-02 00:52:56.688150', 'step': 23502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:56.758665', 'step': 23502, 'epoch': 3}
{'type': 'loss', 'content': 0.04756299406290054, 'timestamp': '2025-10-02 00:52:56.767130', 'step': 23503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:56.839108', 'step': 23503, 'epoch': 3}
{'type': 'loss', 'content': 0.0949547067284584, 'timestamp': '2025-10-02 00:52:56.846060', 'step': 23504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:56.934454', 'step': 23504, 'epoch': 3}
{'type': 'loss', 'content': 0.05445695295929909, 'timestamp': '2025-10-02 00:52:56.947936', 'step': 23505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:57.005460', 'step': 23505, 'epoch': 3}
{'type': 'loss', 'content': 0.03451668098568916, 'timestamp': '2025-10-02 00:52:57.015000', 'step': 23506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:52:57.094844', 'step': 23506, 'epoch': 3}
{'type': 'loss', 'content': 0.023137370124459267, 'timestamp': '2025-10-02 00:52:57.105247', 'step': 23507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:57.184929', 'step': 23507, 'epoch': 3}
{'type': 'loss', 'content': 0.06121349707245827, 'timestamp': '2025-10-02 00:52:57.192987', 'step': 23508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:57.261324', 'step': 23508, 'epoch': 3}
{'type': 'loss', 'content': 0.11164645850658417, 'timestamp': '2025-10-02 00:52:57.270050', 'step': 23509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:57.332311', 'step': 23509, 'epoch': 3}
{'type': 'loss', 'content': 0.028117891401052475, 'timestamp': '2025-10-02 00:52:57.341368', 'step': 23510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:57.414368', 'step': 23510, 'epoch': 3}
{'type': 'loss', 'content': 0.15062914788722992, 'timestamp': '2025-10-02 00:52:57.421903', 'step': 23511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:57.498712', 'step': 23511, 'epoch': 3}
{'type': 'loss', 'content': 0.05670388415455818, 'timestamp': '2025-10-02 00:52:57.510691', 'step': 23512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:57.567383', 'step': 23512, 'epoch': 3}
{'type': 'loss', 'content': 0.14692331850528717, 'timestamp': '2025-10-02 00:52:57.577391', 'step': 23513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:57.658630', 'step': 23513, 'epoch': 3}
{'type': 'loss', 'content': 0.0570734441280365, 'timestamp': '2025-10-02 00:52:57.664779', 'step': 23514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:57.737803', 'step': 23514, 'epoch': 3}
{'type': 'loss', 'content': 0.06319276988506317, 'timestamp': '2025-10-02 00:52:57.741542', 'step': 23515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:57.808692', 'step': 23515, 'epoch': 3}
{'type': 'loss', 'content': 0.030000094324350357, 'timestamp': '2025-10-02 00:52:57.822310', 'step': 23516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:57.906069', 'step': 23516, 'epoch': 3}
{'type': 'loss', 'content': 0.06865382939577103, 'timestamp': '2025-10-02 00:52:57.913323', 'step': 23517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:52:58.000908', 'step': 23517, 'epoch': 3}
{'type': 'loss', 'content': 0.039556991308927536, 'timestamp': '2025-10-02 00:52:58.005173', 'step': 23518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:58.082611', 'step': 23518, 'epoch': 3}
{'type': 'loss', 'content': 0.18667477369308472, 'timestamp': '2025-10-02 00:52:58.094651', 'step': 23519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:58.174808', 'step': 23519, 'epoch': 3}
{'type': 'loss', 'content': 0.0980648398399353, 'timestamp': '2025-10-02 00:52:58.182314', 'step': 23520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:58.273218', 'step': 23520, 'epoch': 3}
{'type': 'loss', 'content': 0.03399800881743431, 'timestamp': '2025-10-02 00:52:58.278475', 'step': 23521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:58.355993', 'step': 23521, 'epoch': 3}
{'type': 'loss', 'content': 0.1301751732826233, 'timestamp': '2025-10-02 00:52:58.359997', 'step': 23522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:58.435450', 'step': 23522, 'epoch': 3}
{'type': 'loss', 'content': 0.13308663666248322, 'timestamp': '2025-10-02 00:52:58.445652', 'step': 23523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:58.505691', 'step': 23523, 'epoch': 3}
{'type': 'loss', 'content': 0.08403365314006805, 'timestamp': '2025-10-02 00:52:58.516931', 'step': 23524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:58.580794', 'step': 23524, 'epoch': 3}
{'type': 'loss', 'content': 0.026041867211461067, 'timestamp': '2025-10-02 00:52:58.590175', 'step': 23525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:58.649759', 'step': 23525, 'epoch': 3}
{'type': 'loss', 'content': 0.015440335497260094, 'timestamp': '2025-10-02 00:52:58.653337', 'step': 23526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:52:58.717071', 'step': 23526, 'epoch': 3}
{'type': 'loss', 'content': 0.04854893684387207, 'timestamp': '2025-10-02 00:52:58.719720', 'step': 23527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:58.788081', 'step': 23527, 'epoch': 3}
{'type': 'loss', 'content': 0.07862202078104019, 'timestamp': '2025-10-02 00:52:58.799418', 'step': 23528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:58.854487', 'step': 23528, 'epoch': 3}
{'type': 'loss', 'content': 0.024046996608376503, 'timestamp': '2025-10-02 00:52:58.861452', 'step': 23529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:52:58.925925', 'step': 23529, 'epoch': 3}
{'type': 'loss', 'content': 0.019470859318971634, 'timestamp': '2025-10-02 00:52:58.933641', 'step': 23530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:52:58.995677', 'step': 23530, 'epoch': 3}
{'type': 'loss', 'content': 0.038181155920028687, 'timestamp': '2025-10-02 00:52:59.004774', 'step': 23531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:52:59.062992', 'step': 23531, 'epoch': 3}
{'type': 'loss', 'content': 0.03671547770500183, 'timestamp': '2025-10-02 00:52:59.070598', 'step': 23532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:52:59.135652', 'step': 23532, 'epoch': 3}
{'type': 'loss', 'content': 0.0756039097905159, 'timestamp': '2025-10-02 00:52:59.137875', 'step': 23533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:59.204606', 'step': 23533, 'epoch': 3}
{'type': 'loss', 'content': 0.15879228711128235, 'timestamp': '2025-10-02 00:52:59.207552', 'step': 23534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:52:59.266178', 'step': 23534, 'epoch': 3}
{'type': 'loss', 'content': 0.006793591659516096, 'timestamp': '2025-10-02 00:52:59.272140', 'step': 23535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:52:59.345522', 'step': 23535, 'epoch': 3}
{'type': 'loss', 'content': 0.07906507700681686, 'timestamp': '2025-10-02 00:52:59.352342', 'step': 23536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:52:59.411762', 'step': 23536, 'epoch': 3}
{'type': 'loss', 'content': 0.02440393716096878, 'timestamp': '2025-10-02 00:52:59.415351', 'step': 23537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:52:59.482850', 'step': 23537, 'epoch': 3}
{'type': 'loss', 'content': 0.06417477875947952, 'timestamp': '2025-10-02 00:52:59.485843', 'step': 23538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:52:59.555949', 'step': 23538, 'epoch': 3}
{'type': 'loss', 'content': 0.0036867675371468067, 'timestamp': '2025-10-02 00:52:59.565508', 'step': 23539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:52:59.625951', 'step': 23539, 'epoch': 3}
{'type': 'loss', 'content': 0.06334353983402252, 'timestamp': '2025-10-02 00:52:59.632134', 'step': 23540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:52:59.703889', 'step': 23540, 'epoch': 3}
{'type': 'loss', 'content': 0.026163239032030106, 'timestamp': '2025-10-02 00:52:59.712346', 'step': 23541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:59.772932', 'step': 23541, 'epoch': 3}
{'type': 'loss', 'content': 0.03184249997138977, 'timestamp': '2025-10-02 00:52:59.783134', 'step': 23542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:52:59.840391', 'step': 23542, 'epoch': 3}
{'type': 'loss', 'content': 0.10164450854063034, 'timestamp': '2025-10-02 00:52:59.850675', 'step': 23543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:52:59.937869', 'step': 23543, 'epoch': 3}
{'type': 'loss', 'content': 0.009956800378859043, 'timestamp': '2025-10-02 00:52:59.948850', 'step': 23544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:00.015152', 'step': 23544, 'epoch': 3}
{'type': 'loss', 'content': 0.1457509845495224, 'timestamp': '2025-10-02 00:53:00.018049', 'step': 23545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:00.074854', 'step': 23545, 'epoch': 3}
{'type': 'loss', 'content': 0.07228181511163712, 'timestamp': '2025-10-02 00:53:00.078465', 'step': 23546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:00.142306', 'step': 23546, 'epoch': 3}
{'type': 'loss', 'content': 0.06938427686691284, 'timestamp': '2025-10-02 00:53:00.145385', 'step': 23547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:00.216074', 'step': 23547, 'epoch': 3}
{'type': 'loss', 'content': 0.02327694743871689, 'timestamp': '2025-10-02 00:53:00.227361', 'step': 23548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:00.283897', 'step': 23548, 'epoch': 3}
{'type': 'loss', 'content': 0.008869731798768044, 'timestamp': '2025-10-02 00:53:00.293017', 'step': 23549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:00.356185', 'step': 23549, 'epoch': 3}
{'type': 'loss', 'content': 0.08338866382837296, 'timestamp': '2025-10-02 00:53:00.361319', 'step': 23550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:00.419101', 'step': 23550, 'epoch': 3}
{'type': 'loss', 'content': 0.029420899227261543, 'timestamp': '2025-10-02 00:53:00.422794', 'step': 23551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:00.482860', 'step': 23551, 'epoch': 3}
{'type': 'loss', 'content': 0.00019123558013234288, 'timestamp': '2025-10-02 00:53:00.492799', 'step': 23552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:00.559877', 'step': 23552, 'epoch': 3}
{'type': 'loss', 'content': 0.060677286237478256, 'timestamp': '2025-10-02 00:53:00.570157', 'step': 23553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:00.626312', 'step': 23553, 'epoch': 3}
{'type': 'loss', 'content': 0.04725874960422516, 'timestamp': '2025-10-02 00:53:00.629394', 'step': 23554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:00.691847', 'step': 23554, 'epoch': 3}
{'type': 'loss', 'content': 0.1552305966615677, 'timestamp': '2025-10-02 00:53:00.694647', 'step': 23555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:00.752340', 'step': 23555, 'epoch': 3}
{'type': 'loss', 'content': 0.18795830011367798, 'timestamp': '2025-10-02 00:53:00.759321', 'step': 23556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:00.815175', 'step': 23556, 'epoch': 3}
{'type': 'loss', 'content': 0.03414525091648102, 'timestamp': '2025-10-02 00:53:00.822891', 'step': 23557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:00.897391', 'step': 23557, 'epoch': 3}
{'type': 'loss', 'content': 0.021128999069333076, 'timestamp': '2025-10-02 00:53:00.907292', 'step': 23558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:00.979645', 'step': 23558, 'epoch': 3}
{'type': 'loss', 'content': 0.06088995933532715, 'timestamp': '2025-10-02 00:53:00.988204', 'step': 23559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:01.060669', 'step': 23559, 'epoch': 3}
{'type': 'loss', 'content': 0.06517957895994186, 'timestamp': '2025-10-02 00:53:01.067565', 'step': 23560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:01.139185', 'step': 23560, 'epoch': 3}
{'type': 'loss', 'content': 0.037386100739240646, 'timestamp': '2025-10-02 00:53:01.145342', 'step': 23561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:01.206196', 'step': 23561, 'epoch': 3}
{'type': 'loss', 'content': 0.09921115636825562, 'timestamp': '2025-10-02 00:53:01.215775', 'step': 23562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:01.278750', 'step': 23562, 'epoch': 3}
{'type': 'loss', 'content': 0.036029886454343796, 'timestamp': '2025-10-02 00:53:01.285791', 'step': 23563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:01.356221', 'step': 23563, 'epoch': 3}
{'type': 'loss', 'content': 0.03773829713463783, 'timestamp': '2025-10-02 00:53:01.366486', 'step': 23564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:01.441077', 'step': 23564, 'epoch': 3}
{'type': 'loss', 'content': 0.06814339756965637, 'timestamp': '2025-10-02 00:53:01.448728', 'step': 23565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:01.513135', 'step': 23565, 'epoch': 3}
{'type': 'loss', 'content': 0.008089433424174786, 'timestamp': '2025-10-02 00:53:01.522676', 'step': 23566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:01.587357', 'step': 23566, 'epoch': 3}
{'type': 'loss', 'content': 0.0692509114742279, 'timestamp': '2025-10-02 00:53:01.592279', 'step': 23567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:01.658123', 'step': 23567, 'epoch': 3}
{'type': 'loss', 'content': 0.040533628314733505, 'timestamp': '2025-10-02 00:53:01.668241', 'step': 23568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:01.732066', 'step': 23568, 'epoch': 3}
{'type': 'loss', 'content': 0.05428240820765495, 'timestamp': '2025-10-02 00:53:01.737758', 'step': 23569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:01.797534', 'step': 23569, 'epoch': 3}
{'type': 'loss', 'content': 0.008413570933043957, 'timestamp': '2025-10-02 00:53:01.815125', 'step': 23570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:01.899328', 'step': 23570, 'epoch': 3}
{'type': 'loss', 'content': 0.05920999124646187, 'timestamp': '2025-10-02 00:53:01.902138', 'step': 23571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:53:01.975463', 'step': 23571, 'epoch': 3}
{'type': 'loss', 'content': 0.02495494857430458, 'timestamp': '2025-10-02 00:53:01.988166', 'step': 23572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:02.044917', 'step': 23572, 'epoch': 3}
{'type': 'loss', 'content': 0.016755877062678337, 'timestamp': '2025-10-02 00:53:02.048536', 'step': 23573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:02.111028', 'step': 23573, 'epoch': 3}
{'type': 'loss', 'content': 0.15361209213733673, 'timestamp': '2025-10-02 00:53:02.118363', 'step': 23574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:02.179392', 'step': 23574, 'epoch': 3}
{'type': 'loss', 'content': 0.044717539101839066, 'timestamp': '2025-10-02 00:53:02.184960', 'step': 23575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:02.244055', 'step': 23575, 'epoch': 3}
{'type': 'loss', 'content': 0.01044465508311987, 'timestamp': '2025-10-02 00:53:02.249939', 'step': 23576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:02.306614', 'step': 23576, 'epoch': 3}
{'type': 'loss', 'content': 0.007707288488745689, 'timestamp': '2025-10-02 00:53:02.309657', 'step': 23577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:02.375359', 'step': 23577, 'epoch': 3}
{'type': 'loss', 'content': 0.001211774768307805, 'timestamp': '2025-10-02 00:53:02.385988', 'step': 23578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:02.455279', 'step': 23578, 'epoch': 3}
{'type': 'loss', 'content': 0.011978874914348125, 'timestamp': '2025-10-02 00:53:02.462726', 'step': 23579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:02.534124', 'step': 23579, 'epoch': 3}
{'type': 'loss', 'content': 0.07572385668754578, 'timestamp': '2025-10-02 00:53:02.544341', 'step': 23580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:02.605199', 'step': 23580, 'epoch': 3}
{'type': 'loss', 'content': 0.0931062325835228, 'timestamp': '2025-10-02 00:53:02.611661', 'step': 23581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:02.673239', 'step': 23581, 'epoch': 3}
{'type': 'loss', 'content': 0.014684394933283329, 'timestamp': '2025-10-02 00:53:02.680001', 'step': 23582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:02.750249', 'step': 23582, 'epoch': 3}
{'type': 'loss', 'content': 0.06567785888910294, 'timestamp': '2025-10-02 00:53:02.759717', 'step': 23583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:02.816323', 'step': 23583, 'epoch': 3}
{'type': 'loss', 'content': 0.03318500891327858, 'timestamp': '2025-10-02 00:53:02.823371', 'step': 23584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:02.882199', 'step': 23584, 'epoch': 3}
{'type': 'loss', 'content': 0.07635769993066788, 'timestamp': '2025-10-02 00:53:02.885284', 'step': 23585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:02.939741', 'step': 23585, 'epoch': 3}
{'type': 'loss', 'content': 0.03389711678028107, 'timestamp': '2025-10-02 00:53:02.943265', 'step': 23586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:02.999518', 'step': 23586, 'epoch': 3}
{'type': 'loss', 'content': 0.06713385879993439, 'timestamp': '2025-10-02 00:53:03.002765', 'step': 23587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:03.063691', 'step': 23587, 'epoch': 3}
{'type': 'loss', 'content': 0.013150162063539028, 'timestamp': '2025-10-02 00:53:03.070447', 'step': 23588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:03.130383', 'step': 23588, 'epoch': 3}
{'type': 'loss', 'content': 0.07105563580989838, 'timestamp': '2025-10-02 00:53:03.133191', 'step': 23589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:03.202593', 'step': 23589, 'epoch': 3}
{'type': 'loss', 'content': 0.03222014755010605, 'timestamp': '2025-10-02 00:53:03.212828', 'step': 23590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:03.269702', 'step': 23590, 'epoch': 3}
{'type': 'loss', 'content': 0.025522196665406227, 'timestamp': '2025-10-02 00:53:03.277589', 'step': 23591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:03.342614', 'step': 23591, 'epoch': 3}
{'type': 'loss', 'content': 0.09566491842269897, 'timestamp': '2025-10-02 00:53:03.349746', 'step': 23592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:03.442705', 'step': 23592, 'epoch': 3}
{'type': 'loss', 'content': 0.06928706169128418, 'timestamp': '2025-10-02 00:53:03.445311', 'step': 23593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:03.507529', 'step': 23593, 'epoch': 3}
{'type': 'loss', 'content': 0.09553102403879166, 'timestamp': '2025-10-02 00:53:03.512484', 'step': 23594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:03.576794', 'step': 23594, 'epoch': 3}
{'type': 'loss', 'content': 0.05812903121113777, 'timestamp': '2025-10-02 00:53:03.580742', 'step': 23595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:03.656307', 'step': 23595, 'epoch': 3}
{'type': 'loss', 'content': 0.03027278371155262, 'timestamp': '2025-10-02 00:53:03.663792', 'step': 23596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:03.719294', 'step': 23596, 'epoch': 3}
{'type': 'loss', 'content': 0.07779207825660706, 'timestamp': '2025-10-02 00:53:03.722640', 'step': 23597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:03.814513', 'step': 23597, 'epoch': 3}
{'type': 'loss', 'content': 0.002583554945886135, 'timestamp': '2025-10-02 00:53:03.819809', 'step': 23598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:03.880795', 'step': 23598, 'epoch': 3}
{'type': 'loss', 'content': 0.05667196959257126, 'timestamp': '2025-10-02 00:53:03.883700', 'step': 23599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:03.941048', 'step': 23599, 'epoch': 3}
{'type': 'loss', 'content': 0.06080031022429466, 'timestamp': '2025-10-02 00:53:03.947314', 'step': 23600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:04.010122', 'step': 23600, 'epoch': 3}
{'type': 'loss', 'content': 0.07112742960453033, 'timestamp': '2025-10-02 00:53:04.013123', 'step': 23601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:04.071530', 'step': 23601, 'epoch': 3}
{'type': 'loss', 'content': 0.05015997588634491, 'timestamp': '2025-10-02 00:53:04.074557', 'step': 23602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:53:04.134531', 'step': 23602, 'epoch': 3}
{'type': 'loss', 'content': 0.12255381792783737, 'timestamp': '2025-10-02 00:53:04.141574', 'step': 23603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:04.202751', 'step': 23603, 'epoch': 3}
{'type': 'loss', 'content': 0.009056846611201763, 'timestamp': '2025-10-02 00:53:04.209911', 'step': 23604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:04.276391', 'step': 23604, 'epoch': 3}
{'type': 'loss', 'content': 0.00805695354938507, 'timestamp': '2025-10-02 00:53:04.287268', 'step': 23605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:04.346725', 'step': 23605, 'epoch': 3}
{'type': 'loss', 'content': 0.05312158539891243, 'timestamp': '2025-10-02 00:53:04.350227', 'step': 23606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:04.414553', 'step': 23606, 'epoch': 3}
{'type': 'loss', 'content': 0.05567312613129616, 'timestamp': '2025-10-02 00:53:04.417477', 'step': 23607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:04.475045', 'step': 23607, 'epoch': 3}
{'type': 'loss', 'content': 0.06629041582345963, 'timestamp': '2025-10-02 00:53:04.483137', 'step': 23608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:04.552110', 'step': 23608, 'epoch': 3}
{'type': 'loss', 'content': 0.07757290452718735, 'timestamp': '2025-10-02 00:53:04.559457', 'step': 23609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:04.622144', 'step': 23609, 'epoch': 3}
{'type': 'loss', 'content': 0.009477213956415653, 'timestamp': '2025-10-02 00:53:04.631101', 'step': 23610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:04.697546', 'step': 23610, 'epoch': 3}
{'type': 'loss', 'content': 0.02134004607796669, 'timestamp': '2025-10-02 00:53:04.700275', 'step': 23611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:04.761963', 'step': 23611, 'epoch': 3}
{'type': 'loss', 'content': 0.1700737029314041, 'timestamp': '2025-10-02 00:53:04.768466', 'step': 23612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:04.823927', 'step': 23612, 'epoch': 3}
{'type': 'loss', 'content': 0.010631240904331207, 'timestamp': '2025-10-02 00:53:04.834185', 'step': 23613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:04.892020', 'step': 23613, 'epoch': 3}
{'type': 'loss', 'content': 0.012243970297276974, 'timestamp': '2025-10-02 00:53:04.900725', 'step': 23614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:04.961931', 'step': 23614, 'epoch': 3}
{'type': 'loss', 'content': 0.040320202708244324, 'timestamp': '2025-10-02 00:53:04.964933', 'step': 23615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:53:05.040032', 'step': 23615, 'epoch': 3}
{'type': 'loss', 'content': 0.05224253609776497, 'timestamp': '2025-10-02 00:53:05.053156', 'step': 23616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:05.110508', 'step': 23616, 'epoch': 3}
{'type': 'loss', 'content': 0.06689061969518661, 'timestamp': '2025-10-02 00:53:05.113525', 'step': 23617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:05.172977', 'step': 23617, 'epoch': 3}
{'type': 'loss', 'content': 0.10703717917203903, 'timestamp': '2025-10-02 00:53:05.175819', 'step': 23618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:05.233320', 'step': 23618, 'epoch': 3}
{'type': 'loss', 'content': 0.14906825125217438, 'timestamp': '2025-10-02 00:53:05.237461', 'step': 23619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:05.299306', 'step': 23619, 'epoch': 3}
{'type': 'loss', 'content': 0.08194329589605331, 'timestamp': '2025-10-02 00:53:05.308948', 'step': 23620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:05.372732', 'step': 23620, 'epoch': 3}
{'type': 'loss', 'content': 0.09467292577028275, 'timestamp': '2025-10-02 00:53:05.376451', 'step': 23621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:05.441650', 'step': 23621, 'epoch': 3}
{'type': 'loss', 'content': 0.0500318743288517, 'timestamp': '2025-10-02 00:53:05.444259', 'step': 23622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:05.509298', 'step': 23622, 'epoch': 3}
{'type': 'loss', 'content': 0.031207822263240814, 'timestamp': '2025-10-02 00:53:05.516141', 'step': 23623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:05.574105', 'step': 23623, 'epoch': 3}
{'type': 'loss', 'content': 0.02286483161151409, 'timestamp': '2025-10-02 00:53:05.580965', 'step': 23624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:05.644789', 'step': 23624, 'epoch': 3}
{'type': 'loss', 'content': 0.050573885440826416, 'timestamp': '2025-10-02 00:53:05.651427', 'step': 23625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:05.720673', 'step': 23625, 'epoch': 3}
{'type': 'loss', 'content': 0.08869625627994537, 'timestamp': '2025-10-02 00:53:05.726153', 'step': 23626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:05.790383', 'step': 23626, 'epoch': 3}
{'type': 'loss', 'content': 0.05958934500813484, 'timestamp': '2025-10-02 00:53:05.797470', 'step': 23627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:05.862712', 'step': 23627, 'epoch': 3}
{'type': 'loss', 'content': 0.05518995225429535, 'timestamp': '2025-10-02 00:53:05.870576', 'step': 23628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:05.932036', 'step': 23628, 'epoch': 3}
{'type': 'loss', 'content': 0.0647687017917633, 'timestamp': '2025-10-02 00:53:05.934817', 'step': 23629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:05.992833', 'step': 23629, 'epoch': 3}
{'type': 'loss', 'content': 0.07231735438108444, 'timestamp': '2025-10-02 00:53:05.999644', 'step': 23630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:06.064162', 'step': 23630, 'epoch': 3}
{'type': 'loss', 'content': 0.019990967586636543, 'timestamp': '2025-10-02 00:53:06.066961', 'step': 23631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:06.124837', 'step': 23631, 'epoch': 3}
{'type': 'loss', 'content': 0.08331380039453506, 'timestamp': '2025-10-02 00:53:06.132009', 'step': 23632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:06.197509', 'step': 23632, 'epoch': 3}
{'type': 'loss', 'content': 0.015232332050800323, 'timestamp': '2025-10-02 00:53:06.206626', 'step': 23633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:53:06.272858', 'step': 23633, 'epoch': 3}
{'type': 'loss', 'content': 0.004094745498150587, 'timestamp': '2025-10-02 00:53:06.283346', 'step': 23634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:53:06.361459', 'step': 23634, 'epoch': 3}
{'type': 'loss', 'content': 0.006185475736856461, 'timestamp': '2025-10-02 00:53:06.375113', 'step': 23635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:06.432012', 'step': 23635, 'epoch': 3}
{'type': 'loss', 'content': 0.1271173059940338, 'timestamp': '2025-10-02 00:53:06.438310', 'step': 23636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:06.492890', 'step': 23636, 'epoch': 3}
{'type': 'loss', 'content': 0.05543094128370285, 'timestamp': '2025-10-02 00:53:06.495585', 'step': 23637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:06.550316', 'step': 23637, 'epoch': 3}
{'type': 'loss', 'content': 0.02239227294921875, 'timestamp': '2025-10-02 00:53:06.555914', 'step': 23638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:06.612011', 'step': 23638, 'epoch': 3}
{'type': 'loss', 'content': 0.075928695499897, 'timestamp': '2025-10-02 00:53:06.614522', 'step': 23639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:06.669720', 'step': 23639, 'epoch': 3}
{'type': 'loss', 'content': 0.06323492527008057, 'timestamp': '2025-10-02 00:53:06.678790', 'step': 23640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:06.734054', 'step': 23640, 'epoch': 3}
{'type': 'loss', 'content': 0.04071343317627907, 'timestamp': '2025-10-02 00:53:06.736932', 'step': 23641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:06.792798', 'step': 23641, 'epoch': 3}
{'type': 'loss', 'content': 0.07861293852329254, 'timestamp': '2025-10-02 00:53:06.795840', 'step': 23642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:06.859821', 'step': 23642, 'epoch': 3}
{'type': 'loss', 'content': 0.06748364120721817, 'timestamp': '2025-10-02 00:53:06.862783', 'step': 23643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:06.920420', 'step': 23643, 'epoch': 3}
{'type': 'loss', 'content': 0.07916370034217834, 'timestamp': '2025-10-02 00:53:06.926407', 'step': 23644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:06.980380', 'step': 23644, 'epoch': 3}
{'type': 'loss', 'content': 0.10463487356901169, 'timestamp': '2025-10-02 00:53:06.983747', 'step': 23645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:07.037957', 'step': 23645, 'epoch': 3}
{'type': 'loss', 'content': 0.022744862362742424, 'timestamp': '2025-10-02 00:53:07.040531', 'step': 23646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:07.095721', 'step': 23646, 'epoch': 3}
{'type': 'loss', 'content': 0.0459417924284935, 'timestamp': '2025-10-02 00:53:07.101410', 'step': 23647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:07.157076', 'step': 23647, 'epoch': 3}
{'type': 'loss', 'content': 0.08457934111356735, 'timestamp': '2025-10-02 00:53:07.163618', 'step': 23648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:07.218309', 'step': 23648, 'epoch': 3}
{'type': 'loss', 'content': 0.02391698956489563, 'timestamp': '2025-10-02 00:53:07.220617', 'step': 23649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:07.275372', 'step': 23649, 'epoch': 3}
{'type': 'loss', 'content': 0.019462786614894867, 'timestamp': '2025-10-02 00:53:07.279285', 'step': 23650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:07.333984', 'step': 23650, 'epoch': 3}
{'type': 'loss', 'content': 0.028852438554167747, 'timestamp': '2025-10-02 00:53:07.336852', 'step': 23651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:07.392110', 'step': 23651, 'epoch': 3}
{'type': 'loss', 'content': 0.07734973728656769, 'timestamp': '2025-10-02 00:53:07.398469', 'step': 23652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:07.452425', 'step': 23652, 'epoch': 3}
{'type': 'loss', 'content': 0.10192541033029556, 'timestamp': '2025-10-02 00:53:07.454889', 'step': 23653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:07.510280', 'step': 23653, 'epoch': 3}
{'type': 'loss', 'content': 0.0371384359896183, 'timestamp': '2025-10-02 00:53:07.512842', 'step': 23654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:07.569078', 'step': 23654, 'epoch': 3}
{'type': 'loss', 'content': 0.05462566390633583, 'timestamp': '2025-10-02 00:53:07.572039', 'step': 23655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:07.627630', 'step': 23655, 'epoch': 3}
{'type': 'loss', 'content': 0.016930676996707916, 'timestamp': '2025-10-02 00:53:07.633597', 'step': 23656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:07.688689', 'step': 23656, 'epoch': 3}
{'type': 'loss', 'content': 0.02741886116564274, 'timestamp': '2025-10-02 00:53:07.696006', 'step': 23657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:53:07.751401', 'step': 23657, 'epoch': 3}
{'type': 'loss', 'content': 0.1895393580198288, 'timestamp': '2025-10-02 00:53:07.755488', 'step': 23658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:07.815883', 'step': 23658, 'epoch': 3}
{'type': 'loss', 'content': 0.004851314704865217, 'timestamp': '2025-10-02 00:53:07.826009', 'step': 23659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:07.881285', 'step': 23659, 'epoch': 3}
{'type': 'loss', 'content': 0.048467379063367844, 'timestamp': '2025-10-02 00:53:07.887468', 'step': 23660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:07.943183', 'step': 23660, 'epoch': 3}
{'type': 'loss', 'content': 0.07506916671991348, 'timestamp': '2025-10-02 00:53:07.950408', 'step': 23661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:08.007528', 'step': 23661, 'epoch': 3}
{'type': 'loss', 'content': 0.1691615730524063, 'timestamp': '2025-10-02 00:53:08.010220', 'step': 23662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:08.066995', 'step': 23662, 'epoch': 3}
{'type': 'loss', 'content': 0.031707148998975754, 'timestamp': '2025-10-02 00:53:08.074011', 'step': 23663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:08.128882', 'step': 23663, 'epoch': 3}
{'type': 'loss', 'content': 0.05491221696138382, 'timestamp': '2025-10-02 00:53:08.135335', 'step': 23664, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:53:38.807373', 'step': 23664, 'epoch': 3}
{'type': 'pplx', 'content': 113.50052135412113, 'timestamp': '2025-10-02 00:53:38.812690', 'step': 23664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:38.880283', 'step': 23664, 'epoch': 3}
{'type': 'loss', 'content': 0.032196398824453354, 'timestamp': '2025-10-02 00:53:38.886548', 'step': 23665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:38.951505', 'step': 23665, 'epoch': 3}
{'type': 'loss', 'content': 0.007756850216537714, 'timestamp': '2025-10-02 00:53:38.959704', 'step': 23666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:53:39.051504', 'step': 23666, 'epoch': 3}
{'type': 'loss', 'content': 0.04180970415472984, 'timestamp': '2025-10-02 00:53:39.063926', 'step': 23667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:39.140427', 'step': 23667, 'epoch': 3}
{'type': 'loss', 'content': 0.061102334409952164, 'timestamp': '2025-10-02 00:53:39.148010', 'step': 23668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:53:39.226793', 'step': 23668, 'epoch': 3}
{'type': 'loss', 'content': 0.006364610977470875, 'timestamp': '2025-10-02 00:53:39.240428', 'step': 23669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:39.307859', 'step': 23669, 'epoch': 3}
{'type': 'loss', 'content': 0.0010674289660528302, 'timestamp': '2025-10-02 00:53:39.310870', 'step': 23670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:39.373769', 'step': 23670, 'epoch': 3}
{'type': 'loss', 'content': 0.03832213953137398, 'timestamp': '2025-10-02 00:53:39.382425', 'step': 23671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:39.458510', 'step': 23671, 'epoch': 3}
{'type': 'loss', 'content': 0.07267071306705475, 'timestamp': '2025-10-02 00:53:39.468819', 'step': 23672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:39.526342', 'step': 23672, 'epoch': 3}
{'type': 'loss', 'content': 0.023496773093938828, 'timestamp': '2025-10-02 00:53:39.535946', 'step': 23673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:39.609328', 'step': 23673, 'epoch': 3}
{'type': 'loss', 'content': 0.05313792824745178, 'timestamp': '2025-10-02 00:53:39.618483', 'step': 23674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:39.674925', 'step': 23674, 'epoch': 3}
{'type': 'loss', 'content': 0.07952455431222916, 'timestamp': '2025-10-02 00:53:39.681028', 'step': 23675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:39.742081', 'step': 23675, 'epoch': 3}
{'type': 'loss', 'content': 0.0765882059931755, 'timestamp': '2025-10-02 00:53:39.748022', 'step': 23676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:39.814593', 'step': 23676, 'epoch': 3}
{'type': 'loss', 'content': 0.032644763588905334, 'timestamp': '2025-10-02 00:53:39.825585', 'step': 23677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:39.909438', 'step': 23677, 'epoch': 3}
{'type': 'loss', 'content': 0.0659816712141037, 'timestamp': '2025-10-02 00:53:39.920947', 'step': 23678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:39.979020', 'step': 23678, 'epoch': 3}
{'type': 'loss', 'content': 0.02533656358718872, 'timestamp': '2025-10-02 00:53:39.988394', 'step': 23679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:40.079205', 'step': 23679, 'epoch': 3}
{'type': 'loss', 'content': 0.09687171876430511, 'timestamp': '2025-10-02 00:53:40.087438', 'step': 23680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:40.144491', 'step': 23680, 'epoch': 3}
{'type': 'loss', 'content': 0.058206673711538315, 'timestamp': '2025-10-02 00:53:40.147557', 'step': 23681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:40.214354', 'step': 23681, 'epoch': 3}
{'type': 'loss', 'content': 0.021520882844924927, 'timestamp': '2025-10-02 00:53:40.223717', 'step': 23682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:40.283296', 'step': 23682, 'epoch': 3}
{'type': 'loss', 'content': 0.014717933721840382, 'timestamp': '2025-10-02 00:53:40.286540', 'step': 23683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:40.352912', 'step': 23683, 'epoch': 3}
{'type': 'loss', 'content': 0.10255628079175949, 'timestamp': '2025-10-02 00:53:40.362830', 'step': 23684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:40.436840', 'step': 23684, 'epoch': 3}
{'type': 'loss', 'content': 0.04331270977854729, 'timestamp': '2025-10-02 00:53:40.448956', 'step': 23685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:40.509207', 'step': 23685, 'epoch': 3}
{'type': 'loss', 'content': 0.035209640860557556, 'timestamp': '2025-10-02 00:53:40.518765', 'step': 23686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:40.576117', 'step': 23686, 'epoch': 3}
{'type': 'loss', 'content': 0.09003031998872757, 'timestamp': '2025-10-02 00:53:40.578851', 'step': 23687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:40.635733', 'step': 23687, 'epoch': 3}
{'type': 'loss', 'content': 0.043474674224853516, 'timestamp': '2025-10-02 00:53:40.643211', 'step': 23688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:40.698413', 'step': 23688, 'epoch': 3}
{'type': 'loss', 'content': 0.04644932225346565, 'timestamp': '2025-10-02 00:53:40.708656', 'step': 23689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:40.769939', 'step': 23689, 'epoch': 3}
{'type': 'loss', 'content': 0.19571688771247864, 'timestamp': '2025-10-02 00:53:40.780071', 'step': 23690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:40.851913', 'step': 23690, 'epoch': 3}
{'type': 'loss', 'content': 0.07738327980041504, 'timestamp': '2025-10-02 00:53:40.863337', 'step': 23691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:40.922653', 'step': 23691, 'epoch': 3}
{'type': 'loss', 'content': 0.018951382488012314, 'timestamp': '2025-10-02 00:53:40.929912', 'step': 23692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:41.005601', 'step': 23692, 'epoch': 3}
{'type': 'loss', 'content': 0.022805724292993546, 'timestamp': '2025-10-02 00:53:41.016582', 'step': 23693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:41.073670', 'step': 23693, 'epoch': 3}
{'type': 'loss', 'content': 0.06569307297468185, 'timestamp': '2025-10-02 00:53:41.082134', 'step': 23694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:41.164307', 'step': 23694, 'epoch': 3}
{'type': 'loss', 'content': 0.03529614582657814, 'timestamp': '2025-10-02 00:53:41.173664', 'step': 23695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:41.230618', 'step': 23695, 'epoch': 3}
{'type': 'loss', 'content': 0.15938474237918854, 'timestamp': '2025-10-02 00:53:41.245395', 'step': 23696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:41.304397', 'step': 23696, 'epoch': 3}
{'type': 'loss', 'content': 0.03897273167967796, 'timestamp': '2025-10-02 00:53:41.307139', 'step': 23697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:41.363138', 'step': 23697, 'epoch': 3}
{'type': 'loss', 'content': 0.0009784629801288247, 'timestamp': '2025-10-02 00:53:41.369173', 'step': 23698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:41.428817', 'step': 23698, 'epoch': 3}
{'type': 'loss', 'content': 0.060332272201776505, 'timestamp': '2025-10-02 00:53:41.438180', 'step': 23699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:41.502694', 'step': 23699, 'epoch': 3}
{'type': 'loss', 'content': 0.03121342882514, 'timestamp': '2025-10-02 00:53:41.511055', 'step': 23700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:41.568379', 'step': 23700, 'epoch': 3}
{'type': 'loss', 'content': 0.08498020470142365, 'timestamp': '2025-10-02 00:53:41.581848', 'step': 23701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:41.641675', 'step': 23701, 'epoch': 3}
{'type': 'loss', 'content': 0.10789734125137329, 'timestamp': '2025-10-02 00:53:41.653216', 'step': 23702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:41.717638', 'step': 23702, 'epoch': 3}
{'type': 'loss', 'content': 0.010780837386846542, 'timestamp': '2025-10-02 00:53:41.721365', 'step': 23703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:41.785728', 'step': 23703, 'epoch': 3}
{'type': 'loss', 'content': 0.022875357419252396, 'timestamp': '2025-10-02 00:53:41.793361', 'step': 23704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:41.864348', 'step': 23704, 'epoch': 3}
{'type': 'loss', 'content': 0.03412830829620361, 'timestamp': '2025-10-02 00:53:41.875316', 'step': 23705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:41.932510', 'step': 23705, 'epoch': 3}
{'type': 'loss', 'content': 0.03582720831036568, 'timestamp': '2025-10-02 00:53:41.940496', 'step': 23706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:42.017502', 'step': 23706, 'epoch': 3}
{'type': 'loss', 'content': 0.01847093179821968, 'timestamp': '2025-10-02 00:53:42.025439', 'step': 23707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:42.097720', 'step': 23707, 'epoch': 3}
{'type': 'loss', 'content': 0.06366689503192902, 'timestamp': '2025-10-02 00:53:42.108944', 'step': 23708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:42.164101', 'step': 23708, 'epoch': 3}
{'type': 'loss', 'content': 0.04559335112571716, 'timestamp': '2025-10-02 00:53:42.167054', 'step': 23709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:42.237693', 'step': 23709, 'epoch': 3}
{'type': 'loss', 'content': 0.09189651906490326, 'timestamp': '2025-10-02 00:53:42.247868', 'step': 23710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:42.307692', 'step': 23710, 'epoch': 3}
{'type': 'loss', 'content': 0.08612450212240219, 'timestamp': '2025-10-02 00:53:42.317179', 'step': 23711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:42.374597', 'step': 23711, 'epoch': 3}
{'type': 'loss', 'content': 0.027126312255859375, 'timestamp': '2025-10-02 00:53:42.381453', 'step': 23712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:42.445078', 'step': 23712, 'epoch': 3}
{'type': 'loss', 'content': 0.006047495640814304, 'timestamp': '2025-10-02 00:53:42.448019', 'step': 23713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:42.504427', 'step': 23713, 'epoch': 3}
{'type': 'loss', 'content': 0.12727025151252747, 'timestamp': '2025-10-02 00:53:42.506829', 'step': 23714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:42.571912', 'step': 23714, 'epoch': 3}
{'type': 'loss', 'content': 0.013025017455220222, 'timestamp': '2025-10-02 00:53:42.581281', 'step': 23715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:42.648246', 'step': 23715, 'epoch': 3}
{'type': 'loss', 'content': 0.0935933068394661, 'timestamp': '2025-10-02 00:53:42.655609', 'step': 23716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:42.718354', 'step': 23716, 'epoch': 3}
{'type': 'loss', 'content': 0.015324988402426243, 'timestamp': '2025-10-02 00:53:42.729364', 'step': 23717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:42.794542', 'step': 23717, 'epoch': 3}
{'type': 'loss', 'content': 0.05222957581281662, 'timestamp': '2025-10-02 00:53:42.797687', 'step': 23718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:42.858317', 'step': 23718, 'epoch': 3}
{'type': 'loss', 'content': 0.016010859981179237, 'timestamp': '2025-10-02 00:53:42.861273', 'step': 23719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:42.933940', 'step': 23719, 'epoch': 3}
{'type': 'loss', 'content': 0.0750652477145195, 'timestamp': '2025-10-02 00:53:42.949671', 'step': 23720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:43.006193', 'step': 23720, 'epoch': 3}
{'type': 'loss', 'content': 0.07455787807703018, 'timestamp': '2025-10-02 00:53:43.017275', 'step': 23721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:43.077963', 'step': 23721, 'epoch': 3}
{'type': 'loss', 'content': 0.013397657312452793, 'timestamp': '2025-10-02 00:53:43.093710', 'step': 23722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:43.179152', 'step': 23722, 'epoch': 3}
{'type': 'loss', 'content': 0.02238779328763485, 'timestamp': '2025-10-02 00:53:43.190122', 'step': 23723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:43.273774', 'step': 23723, 'epoch': 3}
{'type': 'loss', 'content': 0.07475990802049637, 'timestamp': '2025-10-02 00:53:43.282126', 'step': 23724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:43.363784', 'step': 23724, 'epoch': 3}
{'type': 'loss', 'content': 0.042404502630233765, 'timestamp': '2025-10-02 00:53:43.373469', 'step': 23725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:43.439702', 'step': 23725, 'epoch': 3}
{'type': 'loss', 'content': 0.0943550243973732, 'timestamp': '2025-10-02 00:53:43.443138', 'step': 23726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:43.524141', 'step': 23726, 'epoch': 3}
{'type': 'loss', 'content': 0.07325112074613571, 'timestamp': '2025-10-02 00:53:43.528801', 'step': 23727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:53:43.594074', 'step': 23727, 'epoch': 3}
{'type': 'loss', 'content': 0.02291545830667019, 'timestamp': '2025-10-02 00:53:43.606785', 'step': 23728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:43.671992', 'step': 23728, 'epoch': 3}
{'type': 'loss', 'content': 0.017188314348459244, 'timestamp': '2025-10-02 00:53:43.676172', 'step': 23729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:43.756079', 'step': 23729, 'epoch': 3}
{'type': 'loss', 'content': 0.07625401020050049, 'timestamp': '2025-10-02 00:53:43.768335', 'step': 23730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:43.839813', 'step': 23730, 'epoch': 3}
{'type': 'loss', 'content': 0.036945048719644547, 'timestamp': '2025-10-02 00:53:43.849997', 'step': 23731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:43.926883', 'step': 23731, 'epoch': 3}
{'type': 'loss', 'content': 0.022845571860671043, 'timestamp': '2025-10-02 00:53:43.934336', 'step': 23732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:44.016660', 'step': 23732, 'epoch': 3}
{'type': 'loss', 'content': 0.018848344683647156, 'timestamp': '2025-10-02 00:53:44.021121', 'step': 23733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:44.088847', 'step': 23733, 'epoch': 3}
{'type': 'loss', 'content': 0.046000197529792786, 'timestamp': '2025-10-02 00:53:44.092629', 'step': 23734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:44.170595', 'step': 23734, 'epoch': 3}
{'type': 'loss', 'content': 0.04404172673821449, 'timestamp': '2025-10-02 00:53:44.176542', 'step': 23735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:44.243626', 'step': 23735, 'epoch': 3}
{'type': 'loss', 'content': 0.061544228345155716, 'timestamp': '2025-10-02 00:53:44.251907', 'step': 23736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:44.316834', 'step': 23736, 'epoch': 3}
{'type': 'loss', 'content': 0.06003227457404137, 'timestamp': '2025-10-02 00:53:44.321087', 'step': 23737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:44.378833', 'step': 23737, 'epoch': 3}
{'type': 'loss', 'content': 0.021378785371780396, 'timestamp': '2025-10-02 00:53:44.386512', 'step': 23738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:53:44.452260', 'step': 23738, 'epoch': 3}
{'type': 'loss', 'content': 0.036012060940265656, 'timestamp': '2025-10-02 00:53:44.462697', 'step': 23739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:53:44.575942', 'step': 23739, 'epoch': 3}
{'type': 'loss', 'content': 0.0013398423325270414, 'timestamp': '2025-10-02 00:53:44.590186', 'step': 23740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:53:44.689333', 'step': 23740, 'epoch': 3}
{'type': 'loss', 'content': 0.10363149642944336, 'timestamp': '2025-10-02 00:53:44.694895', 'step': 23741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:44.767519', 'step': 23741, 'epoch': 3}
{'type': 'loss', 'content': 0.0291031152009964, 'timestamp': '2025-10-02 00:53:44.771644', 'step': 23742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:44.855921', 'step': 23742, 'epoch': 3}
{'type': 'loss', 'content': 0.024007627740502357, 'timestamp': '2025-10-02 00:53:44.869800', 'step': 23743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:53:44.936394', 'step': 23743, 'epoch': 3}
{'type': 'loss', 'content': 0.02438417449593544, 'timestamp': '2025-10-02 00:53:44.947646', 'step': 23744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:53:45.013454', 'step': 23744, 'epoch': 3}
{'type': 'loss', 'content': 0.03508973866701126, 'timestamp': '2025-10-02 00:53:45.025251', 'step': 23745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:45.087499', 'step': 23745, 'epoch': 3}
{'type': 'loss', 'content': 0.05108099803328514, 'timestamp': '2025-10-02 00:53:45.098552', 'step': 23746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:53:45.185810', 'step': 23746, 'epoch': 3}
{'type': 'loss', 'content': 0.010445110499858856, 'timestamp': '2025-10-02 00:53:45.200871', 'step': 23747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:45.266450', 'step': 23747, 'epoch': 3}
{'type': 'loss', 'content': 0.07260057330131531, 'timestamp': '2025-10-02 00:53:45.274598', 'step': 23748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:45.344750', 'step': 23748, 'epoch': 3}
{'type': 'loss', 'content': 0.050961922854185104, 'timestamp': '2025-10-02 00:53:45.349217', 'step': 23749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:45.419165', 'step': 23749, 'epoch': 3}
{'type': 'loss', 'content': 0.05873088911175728, 'timestamp': '2025-10-02 00:53:45.428508', 'step': 23750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:45.497793', 'step': 23750, 'epoch': 3}
{'type': 'loss', 'content': 0.07196090370416641, 'timestamp': '2025-10-02 00:53:45.506451', 'step': 23751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:45.569375', 'step': 23751, 'epoch': 3}
{'type': 'loss', 'content': 0.05163197219371796, 'timestamp': '2025-10-02 00:53:45.578565', 'step': 23752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:53:45.643945', 'step': 23752, 'epoch': 3}
{'type': 'loss', 'content': 0.03187070041894913, 'timestamp': '2025-10-02 00:53:45.656115', 'step': 23753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:45.730845', 'step': 23753, 'epoch': 3}
{'type': 'loss', 'content': 0.033821746706962585, 'timestamp': '2025-10-02 00:53:45.733951', 'step': 23754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:45.811374', 'step': 23754, 'epoch': 3}
{'type': 'loss', 'content': 0.027703475207090378, 'timestamp': '2025-10-02 00:53:45.815916', 'step': 23755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:53:45.891523', 'step': 23755, 'epoch': 3}
{'type': 'loss', 'content': 0.017039034515619278, 'timestamp': '2025-10-02 00:53:45.908505', 'step': 23756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:53:45.977508', 'step': 23756, 'epoch': 3}
{'type': 'loss', 'content': 0.013247687369585037, 'timestamp': '2025-10-02 00:53:45.988924', 'step': 23757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:46.049926', 'step': 23757, 'epoch': 3}
{'type': 'loss', 'content': 0.043163787573575974, 'timestamp': '2025-10-02 00:53:46.060146', 'step': 23758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:46.121008', 'step': 23758, 'epoch': 3}
{'type': 'loss', 'content': 0.08400923758745193, 'timestamp': '2025-10-02 00:53:46.126947', 'step': 23759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:46.182751', 'step': 23759, 'epoch': 3}
{'type': 'loss', 'content': 0.052456945180892944, 'timestamp': '2025-10-02 00:53:46.196027', 'step': 23760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:46.252714', 'step': 23760, 'epoch': 3}
{'type': 'loss', 'content': 0.020364876836538315, 'timestamp': '2025-10-02 00:53:46.257756', 'step': 23761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:46.325683', 'step': 23761, 'epoch': 3}
{'type': 'loss', 'content': 0.029580790549516678, 'timestamp': '2025-10-02 00:53:46.328878', 'step': 23762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:46.391869', 'step': 23762, 'epoch': 3}
{'type': 'loss', 'content': 0.10061015188694, 'timestamp': '2025-10-02 00:53:46.402213', 'step': 23763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:46.473148', 'step': 23763, 'epoch': 3}
{'type': 'loss', 'content': 0.00871377531439066, 'timestamp': '2025-10-02 00:53:46.480568', 'step': 23764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:46.537600', 'step': 23764, 'epoch': 3}
{'type': 'loss', 'content': 0.04142910614609718, 'timestamp': '2025-10-02 00:53:46.541524', 'step': 23765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:46.615989', 'step': 23765, 'epoch': 3}
{'type': 'loss', 'content': 0.018499936908483505, 'timestamp': '2025-10-02 00:53:46.623460', 'step': 23766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:46.685136', 'step': 23766, 'epoch': 3}
{'type': 'loss', 'content': 0.0333380363881588, 'timestamp': '2025-10-02 00:53:46.694405', 'step': 23767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:46.750176', 'step': 23767, 'epoch': 3}
{'type': 'loss', 'content': 0.030929183587431908, 'timestamp': '2025-10-02 00:53:46.756205', 'step': 23768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:46.816650', 'step': 23768, 'epoch': 3}
{'type': 'loss', 'content': 0.1488150805234909, 'timestamp': '2025-10-02 00:53:46.819466', 'step': 23769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:46.884192', 'step': 23769, 'epoch': 3}
{'type': 'loss', 'content': 0.01758146658539772, 'timestamp': '2025-10-02 00:53:46.894396', 'step': 23770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:46.959949', 'step': 23770, 'epoch': 3}
{'type': 'loss', 'content': 0.02729353867471218, 'timestamp': '2025-10-02 00:53:46.969296', 'step': 23771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:47.032591', 'step': 23771, 'epoch': 3}
{'type': 'loss', 'content': 0.05607049539685249, 'timestamp': '2025-10-02 00:53:47.042635', 'step': 23772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:47.100332', 'step': 23772, 'epoch': 3}
{'type': 'loss', 'content': 0.10982802510261536, 'timestamp': '2025-10-02 00:53:47.102562', 'step': 23773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:47.161811', 'step': 23773, 'epoch': 3}
{'type': 'loss', 'content': 0.02750314027070999, 'timestamp': '2025-10-02 00:53:47.164730', 'step': 23774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:47.236177', 'step': 23774, 'epoch': 3}
{'type': 'loss', 'content': 0.037319861352443695, 'timestamp': '2025-10-02 00:53:47.239335', 'step': 23775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:47.296458', 'step': 23775, 'epoch': 3}
{'type': 'loss', 'content': 0.040579985827207565, 'timestamp': '2025-10-02 00:53:47.306609', 'step': 23776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:47.363241', 'step': 23776, 'epoch': 3}
{'type': 'loss', 'content': 0.08012542128562927, 'timestamp': '2025-10-02 00:53:47.366126', 'step': 23777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:47.422399', 'step': 23777, 'epoch': 3}
{'type': 'loss', 'content': 0.012028797529637814, 'timestamp': '2025-10-02 00:53:47.425509', 'step': 23778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:47.482679', 'step': 23778, 'epoch': 3}
{'type': 'loss', 'content': 0.024085329845547676, 'timestamp': '2025-10-02 00:53:47.491177', 'step': 23779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:47.563623', 'step': 23779, 'epoch': 3}
{'type': 'loss', 'content': 0.007799339015036821, 'timestamp': '2025-10-02 00:53:47.574717', 'step': 23780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:47.645742', 'step': 23780, 'epoch': 3}
{'type': 'loss', 'content': 0.03181171789765358, 'timestamp': '2025-10-02 00:53:47.649201', 'step': 23781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:47.704871', 'step': 23781, 'epoch': 3}
{'type': 'loss', 'content': 0.07314077019691467, 'timestamp': '2025-10-02 00:53:47.708465', 'step': 23782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:47.775979', 'step': 23782, 'epoch': 3}
{'type': 'loss', 'content': 0.0010423014173284173, 'timestamp': '2025-10-02 00:53:47.779091', 'step': 23783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:47.836819', 'step': 23783, 'epoch': 3}
{'type': 'loss', 'content': 0.030269617214798927, 'timestamp': '2025-10-02 00:53:47.843059', 'step': 23784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:47.901859', 'step': 23784, 'epoch': 3}
{'type': 'loss', 'content': 0.10735628753900528, 'timestamp': '2025-10-02 00:53:47.904529', 'step': 23785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:47.961499', 'step': 23785, 'epoch': 3}
{'type': 'loss', 'content': 0.09758514165878296, 'timestamp': '2025-10-02 00:53:47.971096', 'step': 23786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:48.041676', 'step': 23786, 'epoch': 3}
{'type': 'loss', 'content': 0.024530040100216866, 'timestamp': '2025-10-02 00:53:48.045387', 'step': 23787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:48.112142', 'step': 23787, 'epoch': 3}
{'type': 'loss', 'content': 0.07397828996181488, 'timestamp': '2025-10-02 00:53:48.128919', 'step': 23788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:48.195537', 'step': 23788, 'epoch': 3}
{'type': 'loss', 'content': 0.06561867147684097, 'timestamp': '2025-10-02 00:53:48.205384', 'step': 23789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:48.275622', 'step': 23789, 'epoch': 3}
{'type': 'loss', 'content': 0.016814451664686203, 'timestamp': '2025-10-02 00:53:48.282762', 'step': 23790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:48.360627', 'step': 23790, 'epoch': 3}
{'type': 'loss', 'content': 0.10001359134912491, 'timestamp': '2025-10-02 00:53:48.368862', 'step': 23791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:48.426847', 'step': 23791, 'epoch': 3}
{'type': 'loss', 'content': 0.07908975332975388, 'timestamp': '2025-10-02 00:53:48.433742', 'step': 23792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:48.496204', 'step': 23792, 'epoch': 3}
{'type': 'loss', 'content': 0.02839478850364685, 'timestamp': '2025-10-02 00:53:48.505748', 'step': 23793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:53:48.578780', 'step': 23793, 'epoch': 3}
{'type': 'loss', 'content': 0.023513108491897583, 'timestamp': '2025-10-02 00:53:48.589446', 'step': 23794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:48.652382', 'step': 23794, 'epoch': 3}
{'type': 'loss', 'content': 0.020249731838703156, 'timestamp': '2025-10-02 00:53:48.655700', 'step': 23795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:48.717280', 'step': 23795, 'epoch': 3}
{'type': 'loss', 'content': 0.10779532045125961, 'timestamp': '2025-10-02 00:53:48.730809', 'step': 23796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:48.788714', 'step': 23796, 'epoch': 3}
{'type': 'loss', 'content': 0.035366810858249664, 'timestamp': '2025-10-02 00:53:48.798954', 'step': 23797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:48.854433', 'step': 23797, 'epoch': 3}
{'type': 'loss', 'content': 0.018873607739806175, 'timestamp': '2025-10-02 00:53:48.857445', 'step': 23798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:48.912786', 'step': 23798, 'epoch': 3}
{'type': 'loss', 'content': 0.05699392408132553, 'timestamp': '2025-10-02 00:53:48.918726', 'step': 23799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:48.976355', 'step': 23799, 'epoch': 3}
{'type': 'loss', 'content': 0.04961702972650528, 'timestamp': '2025-10-02 00:53:48.982527', 'step': 23800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:49.037876', 'step': 23800, 'epoch': 3}
{'type': 'loss', 'content': 0.08413311094045639, 'timestamp': '2025-10-02 00:53:49.043760', 'step': 23801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:49.100687', 'step': 23801, 'epoch': 3}
{'type': 'loss', 'content': 0.10690922290086746, 'timestamp': '2025-10-02 00:53:49.103216', 'step': 23802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:49.166784', 'step': 23802, 'epoch': 3}
{'type': 'loss', 'content': 0.014292209409177303, 'timestamp': '2025-10-02 00:53:49.176985', 'step': 23803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:49.242578', 'step': 23803, 'epoch': 3}
{'type': 'loss', 'content': 0.02686254121363163, 'timestamp': '2025-10-02 00:53:49.248512', 'step': 23804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:49.305493', 'step': 23804, 'epoch': 3}
{'type': 'loss', 'content': 0.05181979760527611, 'timestamp': '2025-10-02 00:53:49.314115', 'step': 23805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:49.385746', 'step': 23805, 'epoch': 3}
{'type': 'loss', 'content': 0.10085256397724152, 'timestamp': '2025-10-02 00:53:49.388365', 'step': 23806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:49.446623', 'step': 23806, 'epoch': 3}
{'type': 'loss', 'content': 0.10522076487541199, 'timestamp': '2025-10-02 00:53:49.449169', 'step': 23807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:49.517366', 'step': 23807, 'epoch': 3}
{'type': 'loss', 'content': 0.07758526504039764, 'timestamp': '2025-10-02 00:53:49.524014', 'step': 23808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:49.582242', 'step': 23808, 'epoch': 3}
{'type': 'loss', 'content': 0.060274139046669006, 'timestamp': '2025-10-02 00:53:49.587826', 'step': 23809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:49.655751', 'step': 23809, 'epoch': 3}
{'type': 'loss', 'content': 0.056272659450769424, 'timestamp': '2025-10-02 00:53:49.658384', 'step': 23810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:49.728033', 'step': 23810, 'epoch': 3}
{'type': 'loss', 'content': 0.10012388974428177, 'timestamp': '2025-10-02 00:53:49.737371', 'step': 23811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:53:49.806046', 'step': 23811, 'epoch': 3}
{'type': 'loss', 'content': 0.01567898876965046, 'timestamp': '2025-10-02 00:53:49.818763', 'step': 23812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:49.876482', 'step': 23812, 'epoch': 3}
{'type': 'loss', 'content': 0.010854209773242474, 'timestamp': '2025-10-02 00:53:49.887448', 'step': 23813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:49.942295', 'step': 23813, 'epoch': 3}
{'type': 'loss', 'content': 0.05191561207175255, 'timestamp': '2025-10-02 00:53:49.944657', 'step': 23814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:53:50.006157', 'step': 23814, 'epoch': 3}
{'type': 'loss', 'content': 0.022577689960598946, 'timestamp': '2025-10-02 00:53:50.016624', 'step': 23815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:53:50.071019', 'step': 23815, 'epoch': 3}
{'type': 'loss', 'content': 0.14472264051437378, 'timestamp': '2025-10-02 00:53:50.076703', 'step': 23816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:50.129883', 'step': 23816, 'epoch': 3}
{'type': 'loss', 'content': 0.060298629105091095, 'timestamp': '2025-10-02 00:53:50.131998', 'step': 23817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:50.186229', 'step': 23817, 'epoch': 3}
{'type': 'loss', 'content': 0.031725939363241196, 'timestamp': '2025-10-02 00:53:50.188749', 'step': 23818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:50.242480', 'step': 23818, 'epoch': 3}
{'type': 'loss', 'content': 0.05964118614792824, 'timestamp': '2025-10-02 00:53:50.245391', 'step': 23819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:50.300114', 'step': 23819, 'epoch': 3}
{'type': 'loss', 'content': 0.0024192985147237778, 'timestamp': '2025-10-02 00:53:50.305906', 'step': 23820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:50.359045', 'step': 23820, 'epoch': 3}
{'type': 'loss', 'content': 0.09513033181428909, 'timestamp': '2025-10-02 00:53:50.361444', 'step': 23821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:50.416170', 'step': 23821, 'epoch': 3}
{'type': 'loss', 'content': 0.049773216247558594, 'timestamp': '2025-10-02 00:53:50.421150', 'step': 23822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:50.484996', 'step': 23822, 'epoch': 3}
{'type': 'loss', 'content': 0.08310787379741669, 'timestamp': '2025-10-02 00:53:50.494258', 'step': 23823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:50.554545', 'step': 23823, 'epoch': 3}
{'type': 'loss', 'content': 0.06268643587827682, 'timestamp': '2025-10-02 00:53:50.560605', 'step': 23824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:50.613612', 'step': 23824, 'epoch': 3}
{'type': 'loss', 'content': 0.1298486739397049, 'timestamp': '2025-10-02 00:53:50.616974', 'step': 23825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:50.673765', 'step': 23825, 'epoch': 3}
{'type': 'loss', 'content': 0.13980107009410858, 'timestamp': '2025-10-02 00:53:50.676776', 'step': 23826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:50.733011', 'step': 23826, 'epoch': 3}
{'type': 'loss', 'content': 0.05333864316344261, 'timestamp': '2025-10-02 00:53:50.737845', 'step': 23827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:50.794715', 'step': 23827, 'epoch': 3}
{'type': 'loss', 'content': 0.026898667216300964, 'timestamp': '2025-10-02 00:53:50.802979', 'step': 23828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:50.858686', 'step': 23828, 'epoch': 3}
{'type': 'loss', 'content': 0.03636682406067848, 'timestamp': '2025-10-02 00:53:50.860840', 'step': 23829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:50.918313', 'step': 23829, 'epoch': 3}
{'type': 'loss', 'content': 0.024181336164474487, 'timestamp': '2025-10-02 00:53:50.927649', 'step': 23830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:50.983780', 'step': 23830, 'epoch': 3}
{'type': 'loss', 'content': 0.062266889959573746, 'timestamp': '2025-10-02 00:53:50.991342', 'step': 23831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:51.047618', 'step': 23831, 'epoch': 3}
{'type': 'loss', 'content': 0.021416641771793365, 'timestamp': '2025-10-02 00:53:51.054033', 'step': 23832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:51.111337', 'step': 23832, 'epoch': 3}
{'type': 'loss', 'content': 0.08823346346616745, 'timestamp': '2025-10-02 00:53:51.114458', 'step': 23833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:51.170402', 'step': 23833, 'epoch': 3}
{'type': 'loss', 'content': 0.06887467950582504, 'timestamp': '2025-10-02 00:53:51.172692', 'step': 23834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:51.231617', 'step': 23834, 'epoch': 3}
{'type': 'loss', 'content': 0.11827239394187927, 'timestamp': '2025-10-02 00:53:51.234866', 'step': 23835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:51.290742', 'step': 23835, 'epoch': 3}
{'type': 'loss', 'content': 0.04930659383535385, 'timestamp': '2025-10-02 00:53:51.297939', 'step': 23836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:51.354487', 'step': 23836, 'epoch': 3}
{'type': 'loss', 'content': 0.06319271773099899, 'timestamp': '2025-10-02 00:53:51.357289', 'step': 23837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:51.413426', 'step': 23837, 'epoch': 3}
{'type': 'loss', 'content': 0.05975574254989624, 'timestamp': '2025-10-02 00:53:51.416073', 'step': 23838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:53:51.480516', 'step': 23838, 'epoch': 3}
{'type': 'loss', 'content': 0.02309621125459671, 'timestamp': '2025-10-02 00:53:51.490979', 'step': 23839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:51.549694', 'step': 23839, 'epoch': 3}
{'type': 'loss', 'content': 0.01242175605148077, 'timestamp': '2025-10-02 00:53:51.560359', 'step': 23840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:51.615683', 'step': 23840, 'epoch': 3}
{'type': 'loss', 'content': 0.015403611585497856, 'timestamp': '2025-10-02 00:53:51.623375', 'step': 23841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:51.689000', 'step': 23841, 'epoch': 3}
{'type': 'loss', 'content': 0.011060990393161774, 'timestamp': '2025-10-02 00:53:51.695727', 'step': 23842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:51.756257', 'step': 23842, 'epoch': 3}
{'type': 'loss', 'content': 0.10282052308320999, 'timestamp': '2025-10-02 00:53:51.759248', 'step': 23843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:51.814116', 'step': 23843, 'epoch': 3}
{'type': 'loss', 'content': 0.08414527028799057, 'timestamp': '2025-10-02 00:53:51.821956', 'step': 23844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:53:51.888180', 'step': 23844, 'epoch': 3}
{'type': 'loss', 'content': 0.012521284632384777, 'timestamp': '2025-10-02 00:53:51.901149', 'step': 23845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:53:51.962940', 'step': 23845, 'epoch': 3}
{'type': 'loss', 'content': 0.011627922765910625, 'timestamp': '2025-10-02 00:53:51.973586', 'step': 23846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:52.028454', 'step': 23846, 'epoch': 3}
{'type': 'loss', 'content': 0.010674512013792992, 'timestamp': '2025-10-02 00:53:52.031659', 'step': 23847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:53:52.096135', 'step': 23847, 'epoch': 3}
{'type': 'loss', 'content': 0.012216635048389435, 'timestamp': '2025-10-02 00:53:52.107546', 'step': 23848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:52.175232', 'step': 23848, 'epoch': 3}
{'type': 'loss', 'content': 0.1302737593650818, 'timestamp': '2025-10-02 00:53:52.177866', 'step': 23849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:52.246034', 'step': 23849, 'epoch': 3}
{'type': 'loss', 'content': 0.017802149057388306, 'timestamp': '2025-10-02 00:53:52.248998', 'step': 23850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:52.304771', 'step': 23850, 'epoch': 3}
{'type': 'loss', 'content': 0.0728965550661087, 'timestamp': '2025-10-02 00:53:52.307144', 'step': 23851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:52.362207', 'step': 23851, 'epoch': 3}
{'type': 'loss', 'content': 0.013031103648245335, 'timestamp': '2025-10-02 00:53:52.368711', 'step': 23852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:52.422028', 'step': 23852, 'epoch': 3}
{'type': 'loss', 'content': 0.039462365210056305, 'timestamp': '2025-10-02 00:53:52.424280', 'step': 23853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:52.478643', 'step': 23853, 'epoch': 3}
{'type': 'loss', 'content': 0.011527417227625847, 'timestamp': '2025-10-02 00:53:52.481242', 'step': 23854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:52.535484', 'step': 23854, 'epoch': 3}
{'type': 'loss', 'content': 0.03173502907156944, 'timestamp': '2025-10-02 00:53:52.538125', 'step': 23855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:52.592545', 'step': 23855, 'epoch': 3}
{'type': 'loss', 'content': 0.10398810356855392, 'timestamp': '2025-10-02 00:53:52.600703', 'step': 23856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:52.656852', 'step': 23856, 'epoch': 3}
{'type': 'loss', 'content': 0.03456948325037956, 'timestamp': '2025-10-02 00:53:52.659289', 'step': 23857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:52.715174', 'step': 23857, 'epoch': 3}
{'type': 'loss', 'content': 0.020150460302829742, 'timestamp': '2025-10-02 00:53:52.724734', 'step': 23858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:52.780722', 'step': 23858, 'epoch': 3}
{'type': 'loss', 'content': 0.07204775512218475, 'timestamp': '2025-10-02 00:53:52.783233', 'step': 23859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:52.842573', 'step': 23859, 'epoch': 3}
{'type': 'loss', 'content': 0.03829804062843323, 'timestamp': '2025-10-02 00:53:52.849788', 'step': 23860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:52.905206', 'step': 23860, 'epoch': 3}
{'type': 'loss', 'content': 0.032817739993333817, 'timestamp': '2025-10-02 00:53:52.914873', 'step': 23861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:52.972797', 'step': 23861, 'epoch': 3}
{'type': 'loss', 'content': 0.02027631551027298, 'timestamp': '2025-10-02 00:53:52.982297', 'step': 23862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:53.040815', 'step': 23862, 'epoch': 3}
{'type': 'loss', 'content': 0.05177047476172447, 'timestamp': '2025-10-02 00:53:53.050966', 'step': 23863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:53.106839', 'step': 23863, 'epoch': 3}
{'type': 'loss', 'content': 0.04572314769029617, 'timestamp': '2025-10-02 00:53:53.112872', 'step': 23864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:53.166594', 'step': 23864, 'epoch': 3}
{'type': 'loss', 'content': 0.03162117674946785, 'timestamp': '2025-10-02 00:53:53.169496', 'step': 23865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:53.223809', 'step': 23865, 'epoch': 3}
{'type': 'loss', 'content': 0.05939671769738197, 'timestamp': '2025-10-02 00:53:53.226382', 'step': 23866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:53:53.287989', 'step': 23866, 'epoch': 3}
{'type': 'loss', 'content': 0.030018040910363197, 'timestamp': '2025-10-02 00:53:53.298407', 'step': 23867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:53.354302', 'step': 23867, 'epoch': 3}
{'type': 'loss', 'content': 0.03712006285786629, 'timestamp': '2025-10-02 00:53:53.361157', 'step': 23868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:53.415951', 'step': 23868, 'epoch': 3}
{'type': 'loss', 'content': 0.10419793426990509, 'timestamp': '2025-10-02 00:53:53.418297', 'step': 23869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:53.473224', 'step': 23869, 'epoch': 3}
{'type': 'loss', 'content': 0.08474624902009964, 'timestamp': '2025-10-02 00:53:53.479240', 'step': 23870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:53.534093', 'step': 23870, 'epoch': 3}
{'type': 'loss', 'content': 0.11065462231636047, 'timestamp': '2025-10-02 00:53:53.536437', 'step': 23871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:53.591384', 'step': 23871, 'epoch': 3}
{'type': 'loss', 'content': 0.17484302818775177, 'timestamp': '2025-10-02 00:53:53.597110', 'step': 23872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:53.651054', 'step': 23872, 'epoch': 3}
{'type': 'loss', 'content': 0.035339999943971634, 'timestamp': '2025-10-02 00:53:53.661312', 'step': 23873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:53.716040', 'step': 23873, 'epoch': 3}
{'type': 'loss', 'content': 0.05820470675826073, 'timestamp': '2025-10-02 00:53:53.718353', 'step': 23874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:53.772754', 'step': 23874, 'epoch': 3}
{'type': 'loss', 'content': 0.09846769273281097, 'timestamp': '2025-10-02 00:53:53.775215', 'step': 23875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:53.829874', 'step': 23875, 'epoch': 3}
{'type': 'loss', 'content': 0.024897180497646332, 'timestamp': '2025-10-02 00:53:53.840000', 'step': 23876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:53.895656', 'step': 23876, 'epoch': 3}
{'type': 'loss', 'content': 0.03900761157274246, 'timestamp': '2025-10-02 00:53:53.901475', 'step': 23877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:53.961074', 'step': 23877, 'epoch': 3}
{'type': 'loss', 'content': 0.07136514782905579, 'timestamp': '2025-10-02 00:53:53.964979', 'step': 23878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:54.024547', 'step': 23878, 'epoch': 3}
{'type': 'loss', 'content': 0.04355199635028839, 'timestamp': '2025-10-02 00:53:54.033846', 'step': 23879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:54.088610', 'step': 23879, 'epoch': 3}
{'type': 'loss', 'content': 0.018932411447167397, 'timestamp': '2025-10-02 00:53:54.098717', 'step': 23880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:54.153159', 'step': 23880, 'epoch': 3}
{'type': 'loss', 'content': 0.00031999003840610385, 'timestamp': '2025-10-02 00:53:54.159178', 'step': 23881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:54.213396', 'step': 23881, 'epoch': 3}
{'type': 'loss', 'content': 0.10343046486377716, 'timestamp': '2025-10-02 00:53:54.215462', 'step': 23882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:53:54.269728', 'step': 23882, 'epoch': 3}
{'type': 'loss', 'content': 0.028250310570001602, 'timestamp': '2025-10-02 00:53:54.273162', 'step': 23883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:54.328817', 'step': 23883, 'epoch': 3}
{'type': 'loss', 'content': 0.01607573963701725, 'timestamp': '2025-10-02 00:53:54.334934', 'step': 23884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:54.388590', 'step': 23884, 'epoch': 3}
{'type': 'loss', 'content': 0.062179096043109894, 'timestamp': '2025-10-02 00:53:54.391028', 'step': 23885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:54.445508', 'step': 23885, 'epoch': 3}
{'type': 'loss', 'content': 0.036545176059007645, 'timestamp': '2025-10-02 00:53:54.447559', 'step': 23886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:54.501218', 'step': 23886, 'epoch': 3}
{'type': 'loss', 'content': 0.01138819195330143, 'timestamp': '2025-10-02 00:53:54.503332', 'step': 23887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:54.557233', 'step': 23887, 'epoch': 3}
{'type': 'loss', 'content': 0.03300846368074417, 'timestamp': '2025-10-02 00:53:54.563373', 'step': 23888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:54.617310', 'step': 23888, 'epoch': 3}
{'type': 'loss', 'content': 0.015025551430881023, 'timestamp': '2025-10-02 00:53:54.619849', 'step': 23889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:54.674604', 'step': 23889, 'epoch': 3}
{'type': 'loss', 'content': 0.010430323891341686, 'timestamp': '2025-10-02 00:53:54.676989', 'step': 23890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:54.730358', 'step': 23890, 'epoch': 3}
{'type': 'loss', 'content': 0.13735903799533844, 'timestamp': '2025-10-02 00:53:54.732709', 'step': 23891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:54.787368', 'step': 23891, 'epoch': 3}
{'type': 'loss', 'content': 0.11478061228990555, 'timestamp': '2025-10-02 00:53:54.794225', 'step': 23892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:54.848777', 'step': 23892, 'epoch': 3}
{'type': 'loss', 'content': 0.07077853381633759, 'timestamp': '2025-10-02 00:53:54.851374', 'step': 23893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:54.905927', 'step': 23893, 'epoch': 3}
{'type': 'loss', 'content': 0.03853480517864227, 'timestamp': '2025-10-02 00:53:54.911970', 'step': 23894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:54.966314', 'step': 23894, 'epoch': 3}
{'type': 'loss', 'content': 0.051829587668180466, 'timestamp': '2025-10-02 00:53:54.968512', 'step': 23895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:53:55.022089', 'step': 23895, 'epoch': 3}
{'type': 'loss', 'content': 0.05772733315825462, 'timestamp': '2025-10-02 00:53:55.030053', 'step': 23896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:55.092906', 'step': 23896, 'epoch': 3}
{'type': 'loss', 'content': 0.12551239132881165, 'timestamp': '2025-10-02 00:53:55.099893', 'step': 23897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:55.160291', 'step': 23897, 'epoch': 3}
{'type': 'loss', 'content': 0.05252798646688461, 'timestamp': '2025-10-02 00:53:55.162962', 'step': 23898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:55.216831', 'step': 23898, 'epoch': 3}
{'type': 'loss', 'content': 0.0626552402973175, 'timestamp': '2025-10-02 00:53:55.219466', 'step': 23899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:55.274389', 'step': 23899, 'epoch': 3}
{'type': 'loss', 'content': 0.06726256757974625, 'timestamp': '2025-10-02 00:53:55.282738', 'step': 23900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:55.339517', 'step': 23900, 'epoch': 3}
{'type': 'loss', 'content': 0.004259153734892607, 'timestamp': '2025-10-02 00:53:55.341697', 'step': 23901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:55.395923', 'step': 23901, 'epoch': 3}
{'type': 'loss', 'content': 0.001157634425908327, 'timestamp': '2025-10-02 00:53:55.401951', 'step': 23902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:55.456287', 'step': 23902, 'epoch': 3}
{'type': 'loss', 'content': 0.1412423998117447, 'timestamp': '2025-10-02 00:53:55.458603', 'step': 23903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:55.514736', 'step': 23903, 'epoch': 3}
{'type': 'loss', 'content': 0.036620598286390305, 'timestamp': '2025-10-02 00:53:55.523260', 'step': 23904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:55.577342', 'step': 23904, 'epoch': 3}
{'type': 'loss', 'content': 0.04150675609707832, 'timestamp': '2025-10-02 00:53:55.579771', 'step': 23905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:55.634194', 'step': 23905, 'epoch': 3}
{'type': 'loss', 'content': 0.038444675505161285, 'timestamp': '2025-10-02 00:53:55.636782', 'step': 23906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:55.692044', 'step': 23906, 'epoch': 3}
{'type': 'loss', 'content': 0.00959467887878418, 'timestamp': '2025-10-02 00:53:55.701598', 'step': 23907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:55.769424', 'step': 23907, 'epoch': 3}
{'type': 'loss', 'content': 0.1051027700304985, 'timestamp': '2025-10-02 00:53:55.775452', 'step': 23908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:55.829781', 'step': 23908, 'epoch': 3}
{'type': 'loss', 'content': 0.00864982046186924, 'timestamp': '2025-10-02 00:53:55.832320', 'step': 23909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:55.887329', 'step': 23909, 'epoch': 3}
{'type': 'loss', 'content': 0.024992698803544044, 'timestamp': '2025-10-02 00:53:55.894872', 'step': 23910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:55.949241', 'step': 23910, 'epoch': 3}
{'type': 'loss', 'content': 0.04493286460638046, 'timestamp': '2025-10-02 00:53:55.951952', 'step': 23911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:56.006452', 'step': 23911, 'epoch': 3}
{'type': 'loss', 'content': 0.07288555055856705, 'timestamp': '2025-10-02 00:53:56.013066', 'step': 23912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:53:56.066448', 'step': 23912, 'epoch': 3}
{'type': 'loss', 'content': 0.19419041275978088, 'timestamp': '2025-10-02 00:53:56.068966', 'step': 23913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:56.122118', 'step': 23913, 'epoch': 3}
{'type': 'loss', 'content': 0.07050759345293045, 'timestamp': '2025-10-02 00:53:56.128110', 'step': 23914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:53:56.203215', 'step': 23914, 'epoch': 3}
{'type': 'loss', 'content': 0.05179612711071968, 'timestamp': '2025-10-02 00:53:56.213856', 'step': 23915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:56.271358', 'step': 23915, 'epoch': 3}
{'type': 'loss', 'content': 0.0017991694621741772, 'timestamp': '2025-10-02 00:53:56.281499', 'step': 23916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:56.345607', 'step': 23916, 'epoch': 3}
{'type': 'loss', 'content': 0.07391193509101868, 'timestamp': '2025-10-02 00:53:56.347770', 'step': 23917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:56.401833', 'step': 23917, 'epoch': 3}
{'type': 'loss', 'content': 0.05279045179486275, 'timestamp': '2025-10-02 00:53:56.407910', 'step': 23918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:56.462614', 'step': 23918, 'epoch': 3}
{'type': 'loss', 'content': 0.07820335775613785, 'timestamp': '2025-10-02 00:53:56.465232', 'step': 23919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:56.531071', 'step': 23919, 'epoch': 3}
{'type': 'loss', 'content': 0.050488825887441635, 'timestamp': '2025-10-02 00:53:56.537043', 'step': 23920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:56.591145', 'step': 23920, 'epoch': 3}
{'type': 'loss', 'content': 0.018250087276101112, 'timestamp': '2025-10-02 00:53:56.593672', 'step': 23921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:56.650262', 'step': 23921, 'epoch': 3}
{'type': 'loss', 'content': 0.03963768482208252, 'timestamp': '2025-10-02 00:53:56.659841', 'step': 23922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:56.714913', 'step': 23922, 'epoch': 3}
{'type': 'loss', 'content': 0.043351661413908005, 'timestamp': '2025-10-02 00:53:56.722451', 'step': 23923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:56.778301', 'step': 23923, 'epoch': 3}
{'type': 'loss', 'content': 0.09173795580863953, 'timestamp': '2025-10-02 00:53:56.784348', 'step': 23924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:56.837659', 'step': 23924, 'epoch': 3}
{'type': 'loss', 'content': 0.03130960464477539, 'timestamp': '2025-10-02 00:53:56.839883', 'step': 23925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:56.894361', 'step': 23925, 'epoch': 3}
{'type': 'loss', 'content': 0.03930969163775444, 'timestamp': '2025-10-02 00:53:56.896956', 'step': 23926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:56.951998', 'step': 23926, 'epoch': 3}
{'type': 'loss', 'content': 0.11242634803056717, 'timestamp': '2025-10-02 00:53:56.954744', 'step': 23927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:57.008958', 'step': 23927, 'epoch': 3}
{'type': 'loss', 'content': 0.06062885373830795, 'timestamp': '2025-10-02 00:53:57.015861', 'step': 23928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:53:57.072667', 'step': 23928, 'epoch': 3}
{'type': 'loss', 'content': 0.026114201173186302, 'timestamp': '2025-10-02 00:53:57.075780', 'step': 23929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:57.131683', 'step': 23929, 'epoch': 3}
{'type': 'loss', 'content': 0.0182342566549778, 'timestamp': '2025-10-02 00:53:57.134707', 'step': 23930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:57.194218', 'step': 23930, 'epoch': 3}
{'type': 'loss', 'content': 0.04345580190420151, 'timestamp': '2025-10-02 00:53:57.200025', 'step': 23931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:53:57.270619', 'step': 23931, 'epoch': 3}
{'type': 'loss', 'content': 0.037619948387145996, 'timestamp': '2025-10-02 00:53:57.284008', 'step': 23932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:57.339858', 'step': 23932, 'epoch': 3}
{'type': 'loss', 'content': 0.04780343174934387, 'timestamp': '2025-10-02 00:53:57.342870', 'step': 23933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:57.408133', 'step': 23933, 'epoch': 3}
{'type': 'loss', 'content': 0.08345801383256912, 'timestamp': '2025-10-02 00:53:57.413745', 'step': 23934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:57.473277', 'step': 23934, 'epoch': 3}
{'type': 'loss', 'content': 0.0275407861918211, 'timestamp': '2025-10-02 00:53:57.475794', 'step': 23935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:57.533277', 'step': 23935, 'epoch': 3}
{'type': 'loss', 'content': 0.03155377879738808, 'timestamp': '2025-10-02 00:53:57.539248', 'step': 23936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:53:57.600064', 'step': 23936, 'epoch': 3}
{'type': 'loss', 'content': 0.06118711456656456, 'timestamp': '2025-10-02 00:53:57.611374', 'step': 23937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:57.666734', 'step': 23937, 'epoch': 3}
{'type': 'loss', 'content': 0.015388715080916882, 'timestamp': '2025-10-02 00:53:57.668728', 'step': 23938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:53:57.730093', 'step': 23938, 'epoch': 3}
{'type': 'loss', 'content': 0.050828419625759125, 'timestamp': '2025-10-02 00:53:57.740737', 'step': 23939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:57.795713', 'step': 23939, 'epoch': 3}
{'type': 'loss', 'content': 0.020185094326734543, 'timestamp': '2025-10-02 00:53:57.802385', 'step': 23940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:57.856115', 'step': 23940, 'epoch': 3}
{'type': 'loss', 'content': 0.06325129419565201, 'timestamp': '2025-10-02 00:53:57.865462', 'step': 23941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:53:57.919669', 'step': 23941, 'epoch': 3}
{'type': 'loss', 'content': 0.011013343930244446, 'timestamp': '2025-10-02 00:53:57.925664', 'step': 23942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:57.980412', 'step': 23942, 'epoch': 3}
{'type': 'loss', 'content': 0.11958476155996323, 'timestamp': '2025-10-02 00:53:57.982600', 'step': 23943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:58.037357', 'step': 23943, 'epoch': 3}
{'type': 'loss', 'content': 0.07411236315965652, 'timestamp': '2025-10-02 00:53:58.043085', 'step': 23944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:58.096371', 'step': 23944, 'epoch': 3}
{'type': 'loss', 'content': 0.0439763218164444, 'timestamp': '2025-10-02 00:53:58.099010', 'step': 23945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:58.153933', 'step': 23945, 'epoch': 3}
{'type': 'loss', 'content': 0.15092094242572784, 'timestamp': '2025-10-02 00:53:58.156349', 'step': 23946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:58.210656', 'step': 23946, 'epoch': 3}
{'type': 'loss', 'content': 0.0576600581407547, 'timestamp': '2025-10-02 00:53:58.213056', 'step': 23947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:58.270283', 'step': 23947, 'epoch': 3}
{'type': 'loss', 'content': 0.07744157314300537, 'timestamp': '2025-10-02 00:53:58.277087', 'step': 23948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:58.330128', 'step': 23948, 'epoch': 3}
{'type': 'loss', 'content': 0.07941050827503204, 'timestamp': '2025-10-02 00:53:58.332357', 'step': 23949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:58.386500', 'step': 23949, 'epoch': 3}
{'type': 'loss', 'content': 0.00592216569930315, 'timestamp': '2025-10-02 00:53:58.393741', 'step': 23950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:58.449313', 'step': 23950, 'epoch': 3}
{'type': 'loss', 'content': 0.10822562128305435, 'timestamp': '2025-10-02 00:53:58.451447', 'step': 23951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:58.507618', 'step': 23951, 'epoch': 3}
{'type': 'loss', 'content': 0.01134609803557396, 'timestamp': '2025-10-02 00:53:58.517730', 'step': 23952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:58.582334', 'step': 23952, 'epoch': 3}
{'type': 'loss', 'content': 0.016897033900022507, 'timestamp': '2025-10-02 00:53:58.593308', 'step': 23953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:53:58.650950', 'step': 23953, 'epoch': 3}
{'type': 'loss', 'content': 0.08899617195129395, 'timestamp': '2025-10-02 00:53:58.655303', 'step': 23954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:58.716758', 'step': 23954, 'epoch': 3}
{'type': 'loss', 'content': 0.05747385695576668, 'timestamp': '2025-10-02 00:53:58.718852', 'step': 23955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:53:58.772897', 'step': 23955, 'epoch': 3}
{'type': 'loss', 'content': 0.014998574741184711, 'timestamp': '2025-10-02 00:53:58.778250', 'step': 23956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:53:58.838672', 'step': 23956, 'epoch': 3}
{'type': 'loss', 'content': 0.037587970495224, 'timestamp': '2025-10-02 00:53:58.850178', 'step': 23957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:53:58.905432', 'step': 23957, 'epoch': 3}
{'type': 'loss', 'content': 0.07203289121389389, 'timestamp': '2025-10-02 00:53:58.907626', 'step': 23958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:58.962643', 'step': 23958, 'epoch': 3}
{'type': 'loss', 'content': 0.16200660169124603, 'timestamp': '2025-10-02 00:53:58.971976', 'step': 23959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:59.026616', 'step': 23959, 'epoch': 3}
{'type': 'loss', 'content': 0.09160696715116501, 'timestamp': '2025-10-02 00:53:59.032229', 'step': 23960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:53:59.086306', 'step': 23960, 'epoch': 3}
{'type': 'loss', 'content': 0.03133720904588699, 'timestamp': '2025-10-02 00:53:59.095763', 'step': 23961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:53:59.150445', 'step': 23961, 'epoch': 3}
{'type': 'loss', 'content': 0.05952738597989082, 'timestamp': '2025-10-02 00:53:59.152294', 'step': 23962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:59.206690', 'step': 23962, 'epoch': 3}
{'type': 'loss', 'content': 0.04910853132605553, 'timestamp': '2025-10-02 00:53:59.208568', 'step': 23963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:53:59.262624', 'step': 23963, 'epoch': 3}
{'type': 'loss', 'content': 0.10955170542001724, 'timestamp': '2025-10-02 00:53:59.268330', 'step': 23964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:53:59.327855', 'step': 23964, 'epoch': 3}
{'type': 'loss', 'content': 0.04993968456983566, 'timestamp': '2025-10-02 00:53:59.330461', 'step': 23965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:53:59.384576', 'step': 23965, 'epoch': 3}
{'type': 'loss', 'content': 0.01147609855979681, 'timestamp': '2025-10-02 00:53:59.387088', 'step': 23966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:53:59.446321', 'step': 23966, 'epoch': 3}
{'type': 'loss', 'content': 0.05284091830253601, 'timestamp': '2025-10-02 00:53:59.456517', 'step': 23967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:53:59.510790', 'step': 23967, 'epoch': 3}
{'type': 'loss', 'content': 0.00876153539866209, 'timestamp': '2025-10-02 00:53:59.519212', 'step': 23968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:53:59.573651', 'step': 23968, 'epoch': 3}
{'type': 'loss', 'content': 0.04267776384949684, 'timestamp': '2025-10-02 00:53:59.575946', 'step': 23969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:59.631599', 'step': 23969, 'epoch': 3}
{'type': 'loss', 'content': 0.12423532456159592, 'timestamp': '2025-10-02 00:53:59.633405', 'step': 23970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:53:59.688781', 'step': 23970, 'epoch': 3}
{'type': 'loss', 'content': 0.08740241080522537, 'timestamp': '2025-10-02 00:53:59.698323', 'step': 23971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:53:59.758308', 'step': 23971, 'epoch': 3}
{'type': 'loss', 'content': 0.02487126551568508, 'timestamp': '2025-10-02 00:53:59.764454', 'step': 23972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:53:59.819741', 'step': 23972, 'epoch': 3}
{'type': 'loss', 'content': 0.12265722453594208, 'timestamp': '2025-10-02 00:53:59.822492', 'step': 23973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:53:59.887149', 'step': 23973, 'epoch': 3}
{'type': 'loss', 'content': 0.034718818962574005, 'timestamp': '2025-10-02 00:53:59.897906', 'step': 23974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:53:59.953871', 'step': 23974, 'epoch': 3}
{'type': 'loss', 'content': 0.07697834819555283, 'timestamp': '2025-10-02 00:53:59.956851', 'step': 23975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:00.018673', 'step': 23975, 'epoch': 3}
{'type': 'loss', 'content': 0.029295388609170914, 'timestamp': '2025-10-02 00:54:00.029908', 'step': 23976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:00.085797', 'step': 23976, 'epoch': 3}
{'type': 'loss', 'content': 0.02034086547791958, 'timestamp': '2025-10-02 00:54:00.091710', 'step': 23977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:00.148747', 'step': 23977, 'epoch': 3}
{'type': 'loss', 'content': 0.08950024843215942, 'timestamp': '2025-10-02 00:54:00.151186', 'step': 23978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:00.207447', 'step': 23978, 'epoch': 3}
{'type': 'loss', 'content': 0.02480972930788994, 'timestamp': '2025-10-02 00:54:00.210668', 'step': 23979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:00.267124', 'step': 23979, 'epoch': 3}
{'type': 'loss', 'content': 0.04882833734154701, 'timestamp': '2025-10-02 00:54:00.273989', 'step': 23980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:00.329976', 'step': 23980, 'epoch': 3}
{'type': 'loss', 'content': 0.12074270844459534, 'timestamp': '2025-10-02 00:54:00.332815', 'step': 23981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:00.391771', 'step': 23981, 'epoch': 3}
{'type': 'loss', 'content': 0.04568886011838913, 'timestamp': '2025-10-02 00:54:00.401328', 'step': 23982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:00.456675', 'step': 23982, 'epoch': 3}
{'type': 'loss', 'content': 0.027039533481001854, 'timestamp': '2025-10-02 00:54:00.459695', 'step': 23983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:00.516675', 'step': 23983, 'epoch': 3}
{'type': 'loss', 'content': 0.013027159497141838, 'timestamp': '2025-10-02 00:54:00.522378', 'step': 23984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:00.577951', 'step': 23984, 'epoch': 3}
{'type': 'loss', 'content': 0.02927619218826294, 'timestamp': '2025-10-02 00:54:00.579956', 'step': 23985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:00.633683', 'step': 23985, 'epoch': 3}
{'type': 'loss', 'content': 0.05153963714838028, 'timestamp': '2025-10-02 00:54:00.636168', 'step': 23986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:00.691774', 'step': 23986, 'epoch': 3}
{'type': 'loss', 'content': 0.04444700479507446, 'timestamp': '2025-10-02 00:54:00.701040', 'step': 23987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:00.762821', 'step': 23987, 'epoch': 3}
{'type': 'loss', 'content': 0.033259645104408264, 'timestamp': '2025-10-02 00:54:00.774076', 'step': 23988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:00.829640', 'step': 23988, 'epoch': 3}
{'type': 'loss', 'content': 0.03404683247208595, 'timestamp': '2025-10-02 00:54:00.837408', 'step': 23989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:00.893645', 'step': 23989, 'epoch': 3}
{'type': 'loss', 'content': 0.005066297948360443, 'timestamp': '2025-10-02 00:54:00.902974', 'step': 23990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:00.957630', 'step': 23990, 'epoch': 3}
{'type': 'loss', 'content': 0.030082762241363525, 'timestamp': '2025-10-02 00:54:00.960535', 'step': 23991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:01.016449', 'step': 23991, 'epoch': 3}
{'type': 'loss', 'content': 0.02411603555083275, 'timestamp': '2025-10-02 00:54:01.023025', 'step': 23992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:01.086348', 'step': 23992, 'epoch': 3}
{'type': 'loss', 'content': 0.020976876839995384, 'timestamp': '2025-10-02 00:54:01.089447', 'step': 23993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:01.143734', 'step': 23993, 'epoch': 3}
{'type': 'loss', 'content': 0.06013575196266174, 'timestamp': '2025-10-02 00:54:01.146551', 'step': 23994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:01.202630', 'step': 23994, 'epoch': 3}
{'type': 'loss', 'content': 0.057975880801677704, 'timestamp': '2025-10-02 00:54:01.205123', 'step': 23995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:01.261989', 'step': 23995, 'epoch': 3}
{'type': 'loss', 'content': 0.0709385946393013, 'timestamp': '2025-10-02 00:54:01.268728', 'step': 23996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:01.328611', 'step': 23996, 'epoch': 3}
{'type': 'loss', 'content': 0.005308300722390413, 'timestamp': '2025-10-02 00:54:01.339584', 'step': 23997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:01.395080', 'step': 23997, 'epoch': 3}
{'type': 'loss', 'content': 0.03179825842380524, 'timestamp': '2025-10-02 00:54:01.402727', 'step': 23998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:01.459391', 'step': 23998, 'epoch': 3}
{'type': 'loss', 'content': 0.08633967489004135, 'timestamp': '2025-10-02 00:54:01.461748', 'step': 23999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:01.519113', 'step': 23999, 'epoch': 3}
{'type': 'loss', 'content': 0.011761299334466457, 'timestamp': '2025-10-02 00:54:01.529426', 'step': 24000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 24000', 'timestamp': '2025-10-02 00:54:01.930510', 'step': 24000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:01.984838', 'step': 24000, 'epoch': 3}
{'type': 'loss', 'content': 0.017501315101981163, 'timestamp': '2025-10-02 00:54:01.987715', 'step': 24001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:02.043753', 'step': 24001, 'epoch': 3}
{'type': 'loss', 'content': 0.04728437215089798, 'timestamp': '2025-10-02 00:54:02.046325', 'step': 24002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:02.102954', 'step': 24002, 'epoch': 3}
{'type': 'loss', 'content': 0.03058774769306183, 'timestamp': '2025-10-02 00:54:02.105722', 'step': 24003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:02.162695', 'step': 24003, 'epoch': 3}
{'type': 'loss', 'content': 0.042196452617645264, 'timestamp': '2025-10-02 00:54:02.173047', 'step': 24004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:02.232376', 'step': 24004, 'epoch': 3}
{'type': 'loss', 'content': 0.07802966982126236, 'timestamp': '2025-10-02 00:54:02.234954', 'step': 24005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:02.291097', 'step': 24005, 'epoch': 3}
{'type': 'loss', 'content': 0.04080965369939804, 'timestamp': '2025-10-02 00:54:02.294263', 'step': 24006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:02.355829', 'step': 24006, 'epoch': 3}
{'type': 'loss', 'content': 0.035330384969711304, 'timestamp': '2025-10-02 00:54:02.365112', 'step': 24007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:02.428293', 'step': 24007, 'epoch': 3}
{'type': 'loss', 'content': 0.0700564980506897, 'timestamp': '2025-10-02 00:54:02.437486', 'step': 24008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:02.503656', 'step': 24008, 'epoch': 3}
{'type': 'loss', 'content': 0.08547092974185944, 'timestamp': '2025-10-02 00:54:02.506872', 'step': 24009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:02.563691', 'step': 24009, 'epoch': 3}
{'type': 'loss', 'content': 0.02236962504684925, 'timestamp': '2025-10-02 00:54:02.570724', 'step': 24010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:02.628911', 'step': 24010, 'epoch': 3}
{'type': 'loss', 'content': 0.03925894573330879, 'timestamp': '2025-10-02 00:54:02.630679', 'step': 24011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:02.685272', 'step': 24011, 'epoch': 3}
{'type': 'loss', 'content': 0.08802474290132523, 'timestamp': '2025-10-02 00:54:02.691211', 'step': 24012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:02.746219', 'step': 24012, 'epoch': 3}
{'type': 'loss', 'content': 0.031117063015699387, 'timestamp': '2025-10-02 00:54:02.752272', 'step': 24013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:02.807292', 'step': 24013, 'epoch': 3}
{'type': 'loss', 'content': 0.031120743602514267, 'timestamp': '2025-10-02 00:54:02.813184', 'step': 24014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:54:02.875235', 'step': 24014, 'epoch': 3}
{'type': 'loss', 'content': 0.05059128999710083, 'timestamp': '2025-10-02 00:54:02.885859', 'step': 24015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:02.942145', 'step': 24015, 'epoch': 3}
{'type': 'loss', 'content': 0.016200583428144455, 'timestamp': '2025-10-02 00:54:02.952272', 'step': 24016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:03.006587', 'step': 24016, 'epoch': 3}
{'type': 'loss', 'content': 0.08149399608373642, 'timestamp': '2025-10-02 00:54:03.008331', 'step': 24017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:54:03.075724', 'step': 24017, 'epoch': 3}
{'type': 'loss', 'content': 0.03877659887075424, 'timestamp': '2025-10-02 00:54:03.087762', 'step': 24018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:03.143929', 'step': 24018, 'epoch': 3}
{'type': 'loss', 'content': 0.0025806010235100985, 'timestamp': '2025-10-02 00:54:03.147023', 'step': 24019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:03.201711', 'step': 24019, 'epoch': 3}
{'type': 'loss', 'content': 0.07062941789627075, 'timestamp': '2025-10-02 00:54:03.207924', 'step': 24020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:03.262249', 'step': 24020, 'epoch': 3}
{'type': 'loss', 'content': 0.04550347477197647, 'timestamp': '2025-10-02 00:54:03.267958', 'step': 24021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:03.323180', 'step': 24021, 'epoch': 3}
{'type': 'loss', 'content': 0.010261339135468006, 'timestamp': '2025-10-02 00:54:03.325788', 'step': 24022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:03.384180', 'step': 24022, 'epoch': 3}
{'type': 'loss', 'content': 0.01765831932425499, 'timestamp': '2025-10-02 00:54:03.393714', 'step': 24023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:03.449815', 'step': 24023, 'epoch': 3}
{'type': 'loss', 'content': 0.0633736178278923, 'timestamp': '2025-10-02 00:54:03.456092', 'step': 24024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:03.510512', 'step': 24024, 'epoch': 3}
{'type': 'loss', 'content': 0.017152411863207817, 'timestamp': '2025-10-02 00:54:03.515011', 'step': 24025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:03.577808', 'step': 24025, 'epoch': 3}
{'type': 'loss', 'content': 0.08613849431276321, 'timestamp': '2025-10-02 00:54:03.583031', 'step': 24026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:03.643789', 'step': 24026, 'epoch': 3}
{'type': 'loss', 'content': 0.0014752527931705117, 'timestamp': '2025-10-02 00:54:03.651156', 'step': 24027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:03.709763', 'step': 24027, 'epoch': 3}
{'type': 'loss', 'content': 0.06246556341648102, 'timestamp': '2025-10-02 00:54:03.715537', 'step': 24028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:03.770066', 'step': 24028, 'epoch': 3}
{'type': 'loss', 'content': 0.046991489827632904, 'timestamp': '2025-10-02 00:54:03.772895', 'step': 24029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:03.829777', 'step': 24029, 'epoch': 3}
{'type': 'loss', 'content': 0.033763762563467026, 'timestamp': '2025-10-02 00:54:03.832049', 'step': 24030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:03.887325', 'step': 24030, 'epoch': 3}
{'type': 'loss', 'content': 0.01778181828558445, 'timestamp': '2025-10-02 00:54:03.890138', 'step': 24031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:03.944351', 'step': 24031, 'epoch': 3}
{'type': 'loss', 'content': 0.06338775902986526, 'timestamp': '2025-10-02 00:54:03.950385', 'step': 24032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:04.005202', 'step': 24032, 'epoch': 3}
{'type': 'loss', 'content': 0.041116952896118164, 'timestamp': '2025-10-02 00:54:04.007562', 'step': 24033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:04.062878', 'step': 24033, 'epoch': 3}
{'type': 'loss', 'content': 0.013899150304496288, 'timestamp': '2025-10-02 00:54:04.065558', 'step': 24034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:04.119912', 'step': 24034, 'epoch': 3}
{'type': 'loss', 'content': 0.04809281975030899, 'timestamp': '2025-10-02 00:54:04.122525', 'step': 24035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:04.177695', 'step': 24035, 'epoch': 3}
{'type': 'loss', 'content': 0.05167137458920479, 'timestamp': '2025-10-02 00:54:04.183838', 'step': 24036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:04.237796', 'step': 24036, 'epoch': 3}
{'type': 'loss', 'content': 0.030521094799041748, 'timestamp': '2025-10-02 00:54:04.240241', 'step': 24037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:04.294573', 'step': 24037, 'epoch': 3}
{'type': 'loss', 'content': 0.05864141881465912, 'timestamp': '2025-10-02 00:54:04.297055', 'step': 24038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:04.351872', 'step': 24038, 'epoch': 3}
{'type': 'loss', 'content': 0.008443726226687431, 'timestamp': '2025-10-02 00:54:04.354104', 'step': 24039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:04.409000', 'step': 24039, 'epoch': 3}
{'type': 'loss', 'content': 0.009943637996912003, 'timestamp': '2025-10-02 00:54:04.416022', 'step': 24040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:04.481145', 'step': 24040, 'epoch': 3}
{'type': 'loss', 'content': 0.0025848716031759977, 'timestamp': '2025-10-02 00:54:04.488957', 'step': 24041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:04.562530', 'step': 24041, 'epoch': 3}
{'type': 'loss', 'content': 0.11186753958463669, 'timestamp': '2025-10-02 00:54:04.565049', 'step': 24042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:04.620020', 'step': 24042, 'epoch': 3}
{'type': 'loss', 'content': 0.1581808179616928, 'timestamp': '2025-10-02 00:54:04.622125', 'step': 24043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:04.677255', 'step': 24043, 'epoch': 3}
{'type': 'loss', 'content': 0.06219927594065666, 'timestamp': '2025-10-02 00:54:04.684040', 'step': 24044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:04.739912', 'step': 24044, 'epoch': 3}
{'type': 'loss', 'content': 0.06757853925228119, 'timestamp': '2025-10-02 00:54:04.743123', 'step': 24045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:04.803819', 'step': 24045, 'epoch': 3}
{'type': 'loss', 'content': 0.018529916182160378, 'timestamp': '2025-10-02 00:54:04.809411', 'step': 24046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:04.870859', 'step': 24046, 'epoch': 3}
{'type': 'loss', 'content': 0.02236226573586464, 'timestamp': '2025-10-02 00:54:04.874575', 'step': 24047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:04.934908', 'step': 24047, 'epoch': 3}
{'type': 'loss', 'content': 0.06032532826066017, 'timestamp': '2025-10-02 00:54:04.945847', 'step': 24048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:05.001055', 'step': 24048, 'epoch': 3}
{'type': 'loss', 'content': 0.014301395043730736, 'timestamp': '2025-10-02 00:54:05.006643', 'step': 24049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:05.061028', 'step': 24049, 'epoch': 3}
{'type': 'loss', 'content': 0.039678990840911865, 'timestamp': '2025-10-02 00:54:05.063413', 'step': 24050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:05.118100', 'step': 24050, 'epoch': 3}
{'type': 'loss', 'content': 0.06031762436032295, 'timestamp': '2025-10-02 00:54:05.120504', 'step': 24051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:05.175349', 'step': 24051, 'epoch': 3}
{'type': 'loss', 'content': 0.019302891567349434, 'timestamp': '2025-10-02 00:54:05.183390', 'step': 24052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:05.237076', 'step': 24052, 'epoch': 3}
{'type': 'loss', 'content': 0.07922040671110153, 'timestamp': '2025-10-02 00:54:05.242839', 'step': 24053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:05.297999', 'step': 24053, 'epoch': 3}
{'type': 'loss', 'content': 0.08761440217494965, 'timestamp': '2025-10-02 00:54:05.300292', 'step': 24054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:05.355633', 'step': 24054, 'epoch': 3}
{'type': 'loss', 'content': 0.09094901382923126, 'timestamp': '2025-10-02 00:54:05.361016', 'step': 24055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:05.420713', 'step': 24055, 'epoch': 3}
{'type': 'loss', 'content': 0.10293073952198029, 'timestamp': '2025-10-02 00:54:05.431653', 'step': 24056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:05.486443', 'step': 24056, 'epoch': 3}
{'type': 'loss', 'content': 0.04654678702354431, 'timestamp': '2025-10-02 00:54:05.493544', 'step': 24057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:05.547881', 'step': 24057, 'epoch': 3}
{'type': 'loss', 'content': 0.12202589213848114, 'timestamp': '2025-10-02 00:54:05.550241', 'step': 24058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:05.604059', 'step': 24058, 'epoch': 3}
{'type': 'loss', 'content': 0.09256497770547867, 'timestamp': '2025-10-02 00:54:05.606619', 'step': 24059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:05.661140', 'step': 24059, 'epoch': 3}
{'type': 'loss', 'content': 0.006774082779884338, 'timestamp': '2025-10-02 00:54:05.667582', 'step': 24060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:05.725929', 'step': 24060, 'epoch': 3}
{'type': 'loss', 'content': 0.09037075191736221, 'timestamp': '2025-10-02 00:54:05.728394', 'step': 24061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:05.783780', 'step': 24061, 'epoch': 3}
{'type': 'loss', 'content': 0.06502565741539001, 'timestamp': '2025-10-02 00:54:05.793286', 'step': 24062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:54:05.856295', 'step': 24062, 'epoch': 3}
{'type': 'loss', 'content': 0.019629618152976036, 'timestamp': '2025-10-02 00:54:05.867153', 'step': 24063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:05.928141', 'step': 24063, 'epoch': 3}
{'type': 'loss', 'content': 0.05890043079853058, 'timestamp': '2025-10-02 00:54:05.939057', 'step': 24064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:05.996129', 'step': 24064, 'epoch': 3}
{'type': 'loss', 'content': 0.029522566124796867, 'timestamp': '2025-10-02 00:54:05.999873', 'step': 24065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:06.060029', 'step': 24065, 'epoch': 3}
{'type': 'loss', 'content': 0.13008585572242737, 'timestamp': '2025-10-02 00:54:06.062444', 'step': 24066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:06.117788', 'step': 24066, 'epoch': 3}
{'type': 'loss', 'content': 0.01081223413348198, 'timestamp': '2025-10-02 00:54:06.124709', 'step': 24067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:06.184418', 'step': 24067, 'epoch': 3}
{'type': 'loss', 'content': 0.03940575197339058, 'timestamp': '2025-10-02 00:54:06.195385', 'step': 24068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:06.249381', 'step': 24068, 'epoch': 3}
{'type': 'loss', 'content': 0.038178130984306335, 'timestamp': '2025-10-02 00:54:06.251747', 'step': 24069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:06.307436', 'step': 24069, 'epoch': 3}
{'type': 'loss', 'content': 0.04875737801194191, 'timestamp': '2025-10-02 00:54:06.314746', 'step': 24070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:06.369580', 'step': 24070, 'epoch': 3}
{'type': 'loss', 'content': 0.053247153759002686, 'timestamp': '2025-10-02 00:54:06.372087', 'step': 24071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:54:06.449257', 'step': 24071, 'epoch': 3}
{'type': 'loss', 'content': 0.01290026307106018, 'timestamp': '2025-10-02 00:54:06.463839', 'step': 24072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:06.519338', 'step': 24072, 'epoch': 3}
{'type': 'loss', 'content': 0.07940401881933212, 'timestamp': '2025-10-02 00:54:06.521731', 'step': 24073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:06.576961', 'step': 24073, 'epoch': 3}
{'type': 'loss', 'content': 0.025915630161762238, 'timestamp': '2025-10-02 00:54:06.582641', 'step': 24074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:06.637808', 'step': 24074, 'epoch': 3}
{'type': 'loss', 'content': 0.0044705853797495365, 'timestamp': '2025-10-02 00:54:06.640115', 'step': 24075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:06.694681', 'step': 24075, 'epoch': 3}
{'type': 'loss', 'content': 0.05366721749305725, 'timestamp': '2025-10-02 00:54:06.700919', 'step': 24076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:06.756773', 'step': 24076, 'epoch': 3}
{'type': 'loss', 'content': 0.13063764572143555, 'timestamp': '2025-10-02 00:54:06.759013', 'step': 24077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:06.813150', 'step': 24077, 'epoch': 3}
{'type': 'loss', 'content': 0.04149652272462845, 'timestamp': '2025-10-02 00:54:06.815592', 'step': 24078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:06.872672', 'step': 24078, 'epoch': 3}
{'type': 'loss', 'content': 0.08194701373577118, 'timestamp': '2025-10-02 00:54:06.875044', 'step': 24079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:06.929947', 'step': 24079, 'epoch': 3}
{'type': 'loss', 'content': 0.041110455989837646, 'timestamp': '2025-10-02 00:54:06.940099', 'step': 24080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:06.994097', 'step': 24080, 'epoch': 3}
{'type': 'loss', 'content': 0.043453846126794815, 'timestamp': '2025-10-02 00:54:06.996468', 'step': 24081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:07.060034', 'step': 24081, 'epoch': 3}
{'type': 'loss', 'content': 0.08618640899658203, 'timestamp': '2025-10-02 00:54:07.069374', 'step': 24082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:07.128450', 'step': 24082, 'epoch': 3}
{'type': 'loss', 'content': 0.023635290563106537, 'timestamp': '2025-10-02 00:54:07.134408', 'step': 24083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:07.196231', 'step': 24083, 'epoch': 3}
{'type': 'loss', 'content': 0.049143433570861816, 'timestamp': '2025-10-02 00:54:07.205803', 'step': 24084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:07.271324', 'step': 24084, 'epoch': 3}
{'type': 'loss', 'content': 0.020097514614462852, 'timestamp': '2025-10-02 00:54:07.281562', 'step': 24085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:07.336959', 'step': 24085, 'epoch': 3}
{'type': 'loss', 'content': 0.012134009040892124, 'timestamp': '2025-10-02 00:54:07.346083', 'step': 24086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:07.401948', 'step': 24086, 'epoch': 3}
{'type': 'loss', 'content': 0.031296685338020325, 'timestamp': '2025-10-02 00:54:07.404295', 'step': 24087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:07.458527', 'step': 24087, 'epoch': 3}
{'type': 'loss', 'content': 0.019823823124170303, 'timestamp': '2025-10-02 00:54:07.464437', 'step': 24088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:07.518689', 'step': 24088, 'epoch': 3}
{'type': 'loss', 'content': 0.040970150381326675, 'timestamp': '2025-10-02 00:54:07.524657', 'step': 24089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:07.579402', 'step': 24089, 'epoch': 3}
{'type': 'loss', 'content': 0.002513292944058776, 'timestamp': '2025-10-02 00:54:07.582026', 'step': 24090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:07.638330', 'step': 24090, 'epoch': 3}
{'type': 'loss', 'content': 0.08822796493768692, 'timestamp': '2025-10-02 00:54:07.640353', 'step': 24091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:07.694757', 'step': 24091, 'epoch': 3}
{'type': 'loss', 'content': 0.05456250160932541, 'timestamp': '2025-10-02 00:54:07.700721', 'step': 24092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:07.758425', 'step': 24092, 'epoch': 3}
{'type': 'loss', 'content': 0.018991459161043167, 'timestamp': '2025-10-02 00:54:07.760841', 'step': 24093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:07.816331', 'step': 24093, 'epoch': 3}
{'type': 'loss', 'content': 0.052148569375276566, 'timestamp': '2025-10-02 00:54:07.818482', 'step': 24094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:07.872896', 'step': 24094, 'epoch': 3}
{'type': 'loss', 'content': 0.07762236893177032, 'timestamp': '2025-10-02 00:54:07.875684', 'step': 24095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:07.931653', 'step': 24095, 'epoch': 3}
{'type': 'loss', 'content': 0.0119648901745677, 'timestamp': '2025-10-02 00:54:07.937739', 'step': 24096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:54:08.018341', 'step': 24096, 'epoch': 3}
{'type': 'loss', 'content': 0.01377546600997448, 'timestamp': '2025-10-02 00:54:08.034677', 'step': 24097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:08.096932', 'step': 24097, 'epoch': 3}
{'type': 'loss', 'content': 0.003204992739483714, 'timestamp': '2025-10-02 00:54:08.107388', 'step': 24098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:08.163330', 'step': 24098, 'epoch': 3}
{'type': 'loss', 'content': 0.13340163230895996, 'timestamp': '2025-10-02 00:54:08.172872', 'step': 24099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:54:08.233666', 'step': 24099, 'epoch': 3}
{'type': 'loss', 'content': 0.005635441280901432, 'timestamp': '2025-10-02 00:54:08.242283', 'step': 24100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:08.302933', 'step': 24100, 'epoch': 3}
{'type': 'loss', 'content': 0.007908368483185768, 'timestamp': '2025-10-02 00:54:08.310475', 'step': 24101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:08.368334', 'step': 24101, 'epoch': 3}
{'type': 'loss', 'content': 0.09024536609649658, 'timestamp': '2025-10-02 00:54:08.370884', 'step': 24102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:08.425432', 'step': 24102, 'epoch': 3}
{'type': 'loss', 'content': 0.0782800242304802, 'timestamp': '2025-10-02 00:54:08.427838', 'step': 24103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:08.483051', 'step': 24103, 'epoch': 3}
{'type': 'loss', 'content': 0.11271803081035614, 'timestamp': '2025-10-02 00:54:08.491493', 'step': 24104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:08.549075', 'step': 24104, 'epoch': 3}
{'type': 'loss', 'content': 0.04197706654667854, 'timestamp': '2025-10-02 00:54:08.556193', 'step': 24105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:08.614695', 'step': 24105, 'epoch': 3}
{'type': 'loss', 'content': 0.1363990604877472, 'timestamp': '2025-10-02 00:54:08.617416', 'step': 24106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:08.671993', 'step': 24106, 'epoch': 3}
{'type': 'loss', 'content': 0.07753197848796844, 'timestamp': '2025-10-02 00:54:08.674550', 'step': 24107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:08.729417', 'step': 24107, 'epoch': 3}
{'type': 'loss', 'content': 0.020495837554335594, 'timestamp': '2025-10-02 00:54:08.739465', 'step': 24108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:08.795121', 'step': 24108, 'epoch': 3}
{'type': 'loss', 'content': 0.01383152510970831, 'timestamp': '2025-10-02 00:54:08.802360', 'step': 24109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:08.857725', 'step': 24109, 'epoch': 3}
{'type': 'loss', 'content': 0.07414993643760681, 'timestamp': '2025-10-02 00:54:08.866913', 'step': 24110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:08.924618', 'step': 24110, 'epoch': 3}
{'type': 'loss', 'content': 0.06163523718714714, 'timestamp': '2025-10-02 00:54:08.927565', 'step': 24111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:08.985113', 'step': 24111, 'epoch': 3}
{'type': 'loss', 'content': 0.10013710707426071, 'timestamp': '2025-10-02 00:54:08.991612', 'step': 24112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:09.052131', 'step': 24112, 'epoch': 3}
{'type': 'loss', 'content': 0.0038361113984137774, 'timestamp': '2025-10-02 00:54:09.063412', 'step': 24113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:09.120377', 'step': 24113, 'epoch': 3}
{'type': 'loss', 'content': 0.018974589183926582, 'timestamp': '2025-10-02 00:54:09.126082', 'step': 24114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:09.182672', 'step': 24114, 'epoch': 3}
{'type': 'loss', 'content': 0.03578949347138405, 'timestamp': '2025-10-02 00:54:09.186366', 'step': 24115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:09.243118', 'step': 24115, 'epoch': 3}
{'type': 'loss', 'content': 0.02927258238196373, 'timestamp': '2025-10-02 00:54:09.249541', 'step': 24116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:09.305007', 'step': 24116, 'epoch': 3}
{'type': 'loss', 'content': 0.04346379265189171, 'timestamp': '2025-10-02 00:54:09.310809', 'step': 24117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:54:09.388823', 'step': 24117, 'epoch': 3}
{'type': 'loss', 'content': 0.016883697360754013, 'timestamp': '2025-10-02 00:54:09.401110', 'step': 24118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:09.471773', 'step': 24118, 'epoch': 3}
{'type': 'loss', 'content': 0.09903991222381592, 'timestamp': '2025-10-02 00:54:09.475398', 'step': 24119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:54:09.554622', 'step': 24119, 'epoch': 3}
{'type': 'loss', 'content': 0.017193563282489777, 'timestamp': '2025-10-02 00:54:09.568766', 'step': 24120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:09.623718', 'step': 24120, 'epoch': 3}
{'type': 'loss', 'content': 0.13736003637313843, 'timestamp': '2025-10-02 00:54:09.626742', 'step': 24121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:54:09.693690', 'step': 24121, 'epoch': 3}
{'type': 'loss', 'content': 0.04790883883833885, 'timestamp': '2025-10-02 00:54:09.704540', 'step': 24122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:09.762301', 'step': 24122, 'epoch': 3}
{'type': 'loss', 'content': 0.05951310321688652, 'timestamp': '2025-10-02 00:54:09.765550', 'step': 24123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:09.823504', 'step': 24123, 'epoch': 3}
{'type': 'loss', 'content': 0.07012747973203659, 'timestamp': '2025-10-02 00:54:09.830419', 'step': 24124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:09.884851', 'step': 24124, 'epoch': 3}
{'type': 'loss', 'content': 0.058241698890924454, 'timestamp': '2025-10-02 00:54:09.888075', 'step': 24125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:09.943607', 'step': 24125, 'epoch': 3}
{'type': 'loss', 'content': 0.0880640521645546, 'timestamp': '2025-10-02 00:54:09.946956', 'step': 24126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:10.004679', 'step': 24126, 'epoch': 3}
{'type': 'loss', 'content': 0.00936462264508009, 'timestamp': '2025-10-02 00:54:10.007158', 'step': 24127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:10.063174', 'step': 24127, 'epoch': 3}
{'type': 'loss', 'content': 0.09104487299919128, 'timestamp': '2025-10-02 00:54:10.069674', 'step': 24128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:10.131265', 'step': 24128, 'epoch': 3}
{'type': 'loss', 'content': 0.03296661004424095, 'timestamp': '2025-10-02 00:54:10.142584', 'step': 24129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:10.200675', 'step': 24129, 'epoch': 3}
{'type': 'loss', 'content': 0.05557991936802864, 'timestamp': '2025-10-02 00:54:10.204127', 'step': 24130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:10.261672', 'step': 24130, 'epoch': 3}
{'type': 'loss', 'content': 0.1515205353498459, 'timestamp': '2025-10-02 00:54:10.266025', 'step': 24131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:10.324120', 'step': 24131, 'epoch': 3}
{'type': 'loss', 'content': 0.053582288324832916, 'timestamp': '2025-10-02 00:54:10.330881', 'step': 24132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:10.385788', 'step': 24132, 'epoch': 3}
{'type': 'loss', 'content': 0.06707049906253815, 'timestamp': '2025-10-02 00:54:10.395971', 'step': 24133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:10.451048', 'step': 24133, 'epoch': 3}
{'type': 'loss', 'content': 0.04605814814567566, 'timestamp': '2025-10-02 00:54:10.456038', 'step': 24134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:10.512759', 'step': 24134, 'epoch': 3}
{'type': 'loss', 'content': 0.04580779746174812, 'timestamp': '2025-10-02 00:54:10.515581', 'step': 24135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:10.573888', 'step': 24135, 'epoch': 3}
{'type': 'loss', 'content': 0.036514800041913986, 'timestamp': '2025-10-02 00:54:10.580411', 'step': 24136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:10.635235', 'step': 24136, 'epoch': 3}
{'type': 'loss', 'content': 0.07609385251998901, 'timestamp': '2025-10-02 00:54:10.638623', 'step': 24137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:10.695722', 'step': 24137, 'epoch': 3}
{'type': 'loss', 'content': 0.059052031487226486, 'timestamp': '2025-10-02 00:54:10.698382', 'step': 24138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:54:10.768667', 'step': 24138, 'epoch': 3}
{'type': 'loss', 'content': 0.013241574168205261, 'timestamp': '2025-10-02 00:54:10.780789', 'step': 24139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:10.835666', 'step': 24139, 'epoch': 3}
{'type': 'loss', 'content': 0.07696682214736938, 'timestamp': '2025-10-02 00:54:10.842139', 'step': 24140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:10.897050', 'step': 24140, 'epoch': 3}
{'type': 'loss', 'content': 0.019644316285848618, 'timestamp': '2025-10-02 00:54:10.904374', 'step': 24141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:10.966980', 'step': 24141, 'epoch': 3}
{'type': 'loss', 'content': 0.005989451892673969, 'timestamp': '2025-10-02 00:54:10.977430', 'step': 24142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:11.032832', 'step': 24142, 'epoch': 3}
{'type': 'loss', 'content': 0.029203543439507484, 'timestamp': '2025-10-02 00:54:11.035312', 'step': 24143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:11.090003', 'step': 24143, 'epoch': 3}
{'type': 'loss', 'content': 0.014199021272361279, 'timestamp': '2025-10-02 00:54:11.100119', 'step': 24144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:11.153782', 'step': 24144, 'epoch': 3}
{'type': 'loss', 'content': 0.05897027626633644, 'timestamp': '2025-10-02 00:54:11.159826', 'step': 24145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:11.214610', 'step': 24145, 'epoch': 3}
{'type': 'loss', 'content': 0.07007890194654465, 'timestamp': '2025-10-02 00:54:11.220301', 'step': 24146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:11.275161', 'step': 24146, 'epoch': 3}
{'type': 'loss', 'content': 0.11257606744766235, 'timestamp': '2025-10-02 00:54:11.277516', 'step': 24147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:11.331215', 'step': 24147, 'epoch': 3}
{'type': 'loss', 'content': 0.021838076412677765, 'timestamp': '2025-10-02 00:54:11.337202', 'step': 24148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:11.391899', 'step': 24148, 'epoch': 3}
{'type': 'loss', 'content': 0.019218817353248596, 'timestamp': '2025-10-02 00:54:11.402172', 'step': 24149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:11.456498', 'step': 24149, 'epoch': 3}
{'type': 'loss', 'content': 0.030145583674311638, 'timestamp': '2025-10-02 00:54:11.459042', 'step': 24150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:11.515137', 'step': 24150, 'epoch': 3}
{'type': 'loss', 'content': 0.22573308646678925, 'timestamp': '2025-10-02 00:54:11.518286', 'step': 24151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:11.572750', 'step': 24151, 'epoch': 3}
{'type': 'loss', 'content': 0.11912763118743896, 'timestamp': '2025-10-02 00:54:11.579176', 'step': 24152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:11.632753', 'step': 24152, 'epoch': 3}
{'type': 'loss', 'content': 0.06433633714914322, 'timestamp': '2025-10-02 00:54:11.637027', 'step': 24153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:11.699065', 'step': 24153, 'epoch': 3}
{'type': 'loss', 'content': 0.0034223676193505526, 'timestamp': '2025-10-02 00:54:11.708609', 'step': 24154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:54:11.776330', 'step': 24154, 'epoch': 3}
{'type': 'loss', 'content': 0.06455539166927338, 'timestamp': '2025-10-02 00:54:11.786937', 'step': 24155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:54:11.858628', 'step': 24155, 'epoch': 3}
{'type': 'loss', 'content': 0.08404514193534851, 'timestamp': '2025-10-02 00:54:11.871874', 'step': 24156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:54:11.932676', 'step': 24156, 'epoch': 3}
{'type': 'loss', 'content': 0.057721350342035294, 'timestamp': '2025-10-02 00:54:11.944242', 'step': 24157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:11.998998', 'step': 24157, 'epoch': 3}
{'type': 'loss', 'content': 0.023728542029857635, 'timestamp': '2025-10-02 00:54:12.001532', 'step': 24158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:12.057128', 'step': 24158, 'epoch': 3}
{'type': 'loss', 'content': 0.006514341104775667, 'timestamp': '2025-10-02 00:54:12.063043', 'step': 24159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:12.117273', 'step': 24159, 'epoch': 3}
{'type': 'loss', 'content': 0.04302356019616127, 'timestamp': '2025-10-02 00:54:12.123691', 'step': 24160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:12.178506', 'step': 24160, 'epoch': 3}
{'type': 'loss', 'content': 0.06917302310466766, 'timestamp': '2025-10-02 00:54:12.180993', 'step': 24161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:12.237267', 'step': 24161, 'epoch': 3}
{'type': 'loss', 'content': 0.028486179187893867, 'timestamp': '2025-10-02 00:54:12.239505', 'step': 24162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:12.293948', 'step': 24162, 'epoch': 3}
{'type': 'loss', 'content': 0.062001783400774, 'timestamp': '2025-10-02 00:54:12.296494', 'step': 24163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:12.351627', 'step': 24163, 'epoch': 3}
{'type': 'loss', 'content': 0.046060651540756226, 'timestamp': '2025-10-02 00:54:12.361685', 'step': 24164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:12.417934', 'step': 24164, 'epoch': 3}
{'type': 'loss', 'content': 0.07856950163841248, 'timestamp': '2025-10-02 00:54:12.428196', 'step': 24165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:12.487279', 'step': 24165, 'epoch': 3}
{'type': 'loss', 'content': 0.04046744480729103, 'timestamp': '2025-10-02 00:54:12.489616', 'step': 24166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:12.544209', 'step': 24166, 'epoch': 3}
{'type': 'loss', 'content': 0.06483880430459976, 'timestamp': '2025-10-02 00:54:12.546699', 'step': 24167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:12.601922', 'step': 24167, 'epoch': 3}
{'type': 'loss', 'content': 0.04504445195198059, 'timestamp': '2025-10-02 00:54:12.612027', 'step': 24168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:12.667017', 'step': 24168, 'epoch': 3}
{'type': 'loss', 'content': 0.02877948060631752, 'timestamp': '2025-10-02 00:54:12.676540', 'step': 24169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:12.731458', 'step': 24169, 'epoch': 3}
{'type': 'loss', 'content': 0.09849169105291367, 'timestamp': '2025-10-02 00:54:12.734151', 'step': 24170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:12.792678', 'step': 24170, 'epoch': 3}
{'type': 'loss', 'content': 0.03505988046526909, 'timestamp': '2025-10-02 00:54:12.802763', 'step': 24171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:54:12.874283', 'step': 24171, 'epoch': 3}
{'type': 'loss', 'content': 0.025876423344016075, 'timestamp': '2025-10-02 00:54:12.887462', 'step': 24172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:12.943378', 'step': 24172, 'epoch': 3}
{'type': 'loss', 'content': 0.060130953788757324, 'timestamp': '2025-10-02 00:54:12.945718', 'step': 24173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:13.000093', 'step': 24173, 'epoch': 3}
{'type': 'loss', 'content': 0.042555004358291626, 'timestamp': '2025-10-02 00:54:13.002485', 'step': 24174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:13.061429', 'step': 24174, 'epoch': 3}
{'type': 'loss', 'content': 0.024950360879302025, 'timestamp': '2025-10-02 00:54:13.071583', 'step': 24175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:13.126097', 'step': 24175, 'epoch': 3}
{'type': 'loss', 'content': 0.01688091643154621, 'timestamp': '2025-10-02 00:54:13.136147', 'step': 24176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:13.195492', 'step': 24176, 'epoch': 3}
{'type': 'loss', 'content': 0.0064594317227602005, 'timestamp': '2025-10-02 00:54:13.206480', 'step': 24177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:13.262915', 'step': 24177, 'epoch': 3}
{'type': 'loss', 'content': 0.0368904210627079, 'timestamp': '2025-10-02 00:54:13.272487', 'step': 24178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:13.327306', 'step': 24178, 'epoch': 3}
{'type': 'loss', 'content': 0.07633928954601288, 'timestamp': '2025-10-02 00:54:13.329655', 'step': 24179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:13.384417', 'step': 24179, 'epoch': 3}
{'type': 'loss', 'content': 0.028480300679802895, 'timestamp': '2025-10-02 00:54:13.390319', 'step': 24180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:13.444421', 'step': 24180, 'epoch': 3}
{'type': 'loss', 'content': 0.045689936727285385, 'timestamp': '2025-10-02 00:54:13.447509', 'step': 24181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:13.501789', 'step': 24181, 'epoch': 3}
{'type': 'loss', 'content': 0.08714397251605988, 'timestamp': '2025-10-02 00:54:13.504000', 'step': 24182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:13.559042', 'step': 24182, 'epoch': 3}
{'type': 'loss', 'content': 0.04358748346567154, 'timestamp': '2025-10-02 00:54:13.561604', 'step': 24183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:13.616522', 'step': 24183, 'epoch': 3}
{'type': 'loss', 'content': 0.024059731513261795, 'timestamp': '2025-10-02 00:54:13.622399', 'step': 24184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:13.677670', 'step': 24184, 'epoch': 3}
{'type': 'loss', 'content': 0.04324794188141823, 'timestamp': '2025-10-02 00:54:13.687875', 'step': 24185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:13.742284', 'step': 24185, 'epoch': 3}
{'type': 'loss', 'content': 0.03946985676884651, 'timestamp': '2025-10-02 00:54:13.744555', 'step': 24186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:13.798629', 'step': 24186, 'epoch': 3}
{'type': 'loss', 'content': 0.031226111575961113, 'timestamp': '2025-10-02 00:54:13.800999', 'step': 24187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:13.854731', 'step': 24187, 'epoch': 3}
{'type': 'loss', 'content': 0.09405878931283951, 'timestamp': '2025-10-02 00:54:13.861450', 'step': 24188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:13.917368', 'step': 24188, 'epoch': 3}
{'type': 'loss', 'content': 0.019385354593396187, 'timestamp': '2025-10-02 00:54:13.919672', 'step': 24189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:13.973811', 'step': 24189, 'epoch': 3}
{'type': 'loss', 'content': 0.028484443202614784, 'timestamp': '2025-10-02 00:54:13.983150', 'step': 24190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:14.038397', 'step': 24190, 'epoch': 3}
{'type': 'loss', 'content': 0.010891455225646496, 'timestamp': '2025-10-02 00:54:14.045912', 'step': 24191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:14.100411', 'step': 24191, 'epoch': 3}
{'type': 'loss', 'content': 0.027003539726138115, 'timestamp': '2025-10-02 00:54:14.106220', 'step': 24192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:14.160165', 'step': 24192, 'epoch': 3}
{'type': 'loss', 'content': 0.02276073954999447, 'timestamp': '2025-10-02 00:54:14.162610', 'step': 24193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:14.218267', 'step': 24193, 'epoch': 3}
{'type': 'loss', 'content': 0.03929795324802399, 'timestamp': '2025-10-02 00:54:14.227563', 'step': 24194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:14.282513', 'step': 24194, 'epoch': 3}
{'type': 'loss', 'content': 0.09614778310060501, 'timestamp': '2025-10-02 00:54:14.284932', 'step': 24195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:14.339210', 'step': 24195, 'epoch': 3}
{'type': 'loss', 'content': 0.18162991106510162, 'timestamp': '2025-10-02 00:54:14.344861', 'step': 24196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:14.398491', 'step': 24196, 'epoch': 3}
{'type': 'loss', 'content': 0.026160499081015587, 'timestamp': '2025-10-02 00:54:14.400761', 'step': 24197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:14.455153', 'step': 24197, 'epoch': 3}
{'type': 'loss', 'content': 0.06639954447746277, 'timestamp': '2025-10-02 00:54:14.457398', 'step': 24198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:14.511660', 'step': 24198, 'epoch': 3}
{'type': 'loss', 'content': 0.05537497624754906, 'timestamp': '2025-10-02 00:54:14.513973', 'step': 24199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:14.568147', 'step': 24199, 'epoch': 3}
{'type': 'loss', 'content': 0.07268368452787399, 'timestamp': '2025-10-02 00:54:14.574158', 'step': 24200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:14.627727', 'step': 24200, 'epoch': 3}
{'type': 'loss', 'content': 0.06356034427881241, 'timestamp': '2025-10-02 00:54:14.630103', 'step': 24201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:14.684956', 'step': 24201, 'epoch': 3}
{'type': 'loss', 'content': 0.012362122535705566, 'timestamp': '2025-10-02 00:54:14.687285', 'step': 24202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:14.746393', 'step': 24202, 'epoch': 3}
{'type': 'loss', 'content': 0.03850046917796135, 'timestamp': '2025-10-02 00:54:14.755970', 'step': 24203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:14.811135', 'step': 24203, 'epoch': 3}
{'type': 'loss', 'content': 0.10978309065103531, 'timestamp': '2025-10-02 00:54:14.817012', 'step': 24204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:14.871481', 'step': 24204, 'epoch': 3}
{'type': 'loss', 'content': 0.13778004050254822, 'timestamp': '2025-10-02 00:54:14.874916', 'step': 24205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:14.931147', 'step': 24205, 'epoch': 3}
{'type': 'loss', 'content': 0.05516057834029198, 'timestamp': '2025-10-02 00:54:14.933711', 'step': 24206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:14.989080', 'step': 24206, 'epoch': 3}
{'type': 'loss', 'content': 0.057865213602781296, 'timestamp': '2025-10-02 00:54:14.992235', 'step': 24207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:15.047315', 'step': 24207, 'epoch': 3}
{'type': 'loss', 'content': 0.03396947681903839, 'timestamp': '2025-10-02 00:54:15.053701', 'step': 24208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:15.107400', 'step': 24208, 'epoch': 3}
{'type': 'loss', 'content': 0.05502284690737724, 'timestamp': '2025-10-02 00:54:15.114930', 'step': 24209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:15.170865', 'step': 24209, 'epoch': 3}
{'type': 'loss', 'content': 0.12761123478412628, 'timestamp': '2025-10-02 00:54:15.173358', 'step': 24210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:15.233814', 'step': 24210, 'epoch': 3}
{'type': 'loss', 'content': 0.02166120707988739, 'timestamp': '2025-10-02 00:54:15.243974', 'step': 24211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:15.299508', 'step': 24211, 'epoch': 3}
{'type': 'loss', 'content': 0.02170441299676895, 'timestamp': '2025-10-02 00:54:15.305657', 'step': 24212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:15.361061', 'step': 24212, 'epoch': 3}
{'type': 'loss', 'content': 0.026502354070544243, 'timestamp': '2025-10-02 00:54:15.371322', 'step': 24213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:15.426348', 'step': 24213, 'epoch': 3}
{'type': 'loss', 'content': 0.02158452942967415, 'timestamp': '2025-10-02 00:54:15.435692', 'step': 24214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:15.491797', 'step': 24214, 'epoch': 3}
{'type': 'loss', 'content': 0.08085180819034576, 'timestamp': '2025-10-02 00:54:15.494266', 'step': 24215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:15.549360', 'step': 24215, 'epoch': 3}
{'type': 'loss', 'content': 0.16405071318149567, 'timestamp': '2025-10-02 00:54:15.555341', 'step': 24216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:15.609665', 'step': 24216, 'epoch': 3}
{'type': 'loss', 'content': 0.033188436180353165, 'timestamp': '2025-10-02 00:54:15.615728', 'step': 24217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:15.671413', 'step': 24217, 'epoch': 3}
{'type': 'loss', 'content': 0.04064273461699486, 'timestamp': '2025-10-02 00:54:15.680960', 'step': 24218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:15.743511', 'step': 24218, 'epoch': 3}
{'type': 'loss', 'content': 0.1010313406586647, 'timestamp': '2025-10-02 00:54:15.745922', 'step': 24219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:15.801394', 'step': 24219, 'epoch': 3}
{'type': 'loss', 'content': 0.040577832609415054, 'timestamp': '2025-10-02 00:54:15.807052', 'step': 24220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:15.860650', 'step': 24220, 'epoch': 3}
{'type': 'loss', 'content': 0.07966963201761246, 'timestamp': '2025-10-02 00:54:15.864245', 'step': 24221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:15.922682', 'step': 24221, 'epoch': 3}
{'type': 'loss', 'content': 0.06275193393230438, 'timestamp': '2025-10-02 00:54:15.928633', 'step': 24222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:15.986737', 'step': 24222, 'epoch': 3}
{'type': 'loss', 'content': 0.05024047940969467, 'timestamp': '2025-10-02 00:54:15.994242', 'step': 24223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:16.050122', 'step': 24223, 'epoch': 3}
{'type': 'loss', 'content': 0.007735477760434151, 'timestamp': '2025-10-02 00:54:16.060276', 'step': 24224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:16.114829', 'step': 24224, 'epoch': 3}
{'type': 'loss', 'content': 0.08143122494220734, 'timestamp': '2025-10-02 00:54:16.117413', 'step': 24225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:16.171334', 'step': 24225, 'epoch': 3}
{'type': 'loss', 'content': 0.03198002651333809, 'timestamp': '2025-10-02 00:54:16.173946', 'step': 24226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:16.232514', 'step': 24226, 'epoch': 3}
{'type': 'loss', 'content': 0.03853461891412735, 'timestamp': '2025-10-02 00:54:16.241790', 'step': 24227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:16.296681', 'step': 24227, 'epoch': 3}
{'type': 'loss', 'content': 0.07737594842910767, 'timestamp': '2025-10-02 00:54:16.302875', 'step': 24228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:16.357981', 'step': 24228, 'epoch': 3}
{'type': 'loss', 'content': 0.018126726150512695, 'timestamp': '2025-10-02 00:54:16.363678', 'step': 24229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:16.417851', 'step': 24229, 'epoch': 3}
{'type': 'loss', 'content': 0.14666368067264557, 'timestamp': '2025-10-02 00:54:16.420056', 'step': 24230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:16.475230', 'step': 24230, 'epoch': 3}
{'type': 'loss', 'content': 0.07423795014619827, 'timestamp': '2025-10-02 00:54:16.477802', 'step': 24231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:16.531857', 'step': 24231, 'epoch': 3}
{'type': 'loss', 'content': 0.06582831591367722, 'timestamp': '2025-10-02 00:54:16.537729', 'step': 24232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:16.591342', 'step': 24232, 'epoch': 3}
{'type': 'loss', 'content': 0.0927104726433754, 'timestamp': '2025-10-02 00:54:16.594001', 'step': 24233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:16.649339', 'step': 24233, 'epoch': 3}
{'type': 'loss', 'content': 0.042768873274326324, 'timestamp': '2025-10-02 00:54:16.651569', 'step': 24234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:16.706519', 'step': 24234, 'epoch': 3}
{'type': 'loss', 'content': 0.06194354221224785, 'timestamp': '2025-10-02 00:54:16.712309', 'step': 24235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:16.767628', 'step': 24235, 'epoch': 3}
{'type': 'loss', 'content': 0.01365567184984684, 'timestamp': '2025-10-02 00:54:16.777903', 'step': 24236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:16.832648', 'step': 24236, 'epoch': 3}
{'type': 'loss', 'content': 0.06971559673547745, 'timestamp': '2025-10-02 00:54:16.838604', 'step': 24237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:16.895781', 'step': 24237, 'epoch': 3}
{'type': 'loss', 'content': 0.022409282624721527, 'timestamp': '2025-10-02 00:54:16.898941', 'step': 24238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:16.956569', 'step': 24238, 'epoch': 3}
{'type': 'loss', 'content': 0.13521453738212585, 'timestamp': '2025-10-02 00:54:16.960375', 'step': 24239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:17.016954', 'step': 24239, 'epoch': 3}
{'type': 'loss', 'content': 0.031421221792697906, 'timestamp': '2025-10-02 00:54:17.027054', 'step': 24240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:17.081924', 'step': 24240, 'epoch': 3}
{'type': 'loss', 'content': 0.026147929951548576, 'timestamp': '2025-10-02 00:54:17.085539', 'step': 24241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:17.140119', 'step': 24241, 'epoch': 3}
{'type': 'loss', 'content': 0.10837410390377045, 'timestamp': '2025-10-02 00:54:17.142398', 'step': 24242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:17.199421', 'step': 24242, 'epoch': 3}
{'type': 'loss', 'content': 0.034900978207588196, 'timestamp': '2025-10-02 00:54:17.202246', 'step': 24243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:17.256677', 'step': 24243, 'epoch': 3}
{'type': 'loss', 'content': 0.011406424455344677, 'timestamp': '2025-10-02 00:54:17.262744', 'step': 24244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:17.316071', 'step': 24244, 'epoch': 3}
{'type': 'loss', 'content': 0.10538403689861298, 'timestamp': '2025-10-02 00:54:17.318420', 'step': 24245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:17.372214', 'step': 24245, 'epoch': 3}
{'type': 'loss', 'content': 0.10154123604297638, 'timestamp': '2025-10-02 00:54:17.374683', 'step': 24246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:17.429068', 'step': 24246, 'epoch': 3}
{'type': 'loss', 'content': 0.07710793614387512, 'timestamp': '2025-10-02 00:54:17.431788', 'step': 24247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:17.486671', 'step': 24247, 'epoch': 3}
{'type': 'loss', 'content': 0.03993385657668114, 'timestamp': '2025-10-02 00:54:17.492771', 'step': 24248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:17.547633', 'step': 24248, 'epoch': 3}
{'type': 'loss', 'content': 0.03559769690036774, 'timestamp': '2025-10-02 00:54:17.553224', 'step': 24249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:17.608327', 'step': 24249, 'epoch': 3}
{'type': 'loss', 'content': 0.023621292784810066, 'timestamp': '2025-10-02 00:54:17.617656', 'step': 24250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:17.674930', 'step': 24250, 'epoch': 3}
{'type': 'loss', 'content': 0.0452943779528141, 'timestamp': '2025-10-02 00:54:17.677475', 'step': 24251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:17.731914', 'step': 24251, 'epoch': 3}
{'type': 'loss', 'content': 0.07781326770782471, 'timestamp': '2025-10-02 00:54:17.738003', 'step': 24252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:17.795261', 'step': 24252, 'epoch': 3}
{'type': 'loss', 'content': 0.04720098897814751, 'timestamp': '2025-10-02 00:54:17.806219', 'step': 24253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:17.861776', 'step': 24253, 'epoch': 3}
{'type': 'loss', 'content': 0.06243214011192322, 'timestamp': '2025-10-02 00:54:17.864944', 'step': 24254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:17.921057', 'step': 24254, 'epoch': 3}
{'type': 'loss', 'content': 0.02912391535937786, 'timestamp': '2025-10-02 00:54:17.928252', 'step': 24255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:17.987771', 'step': 24255, 'epoch': 3}
{'type': 'loss', 'content': 0.05930210277438164, 'timestamp': '2025-10-02 00:54:17.993688', 'step': 24256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:18.047283', 'step': 24256, 'epoch': 3}
{'type': 'loss', 'content': 0.15545906126499176, 'timestamp': '2025-10-02 00:54:18.049934', 'step': 24257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:18.104236', 'step': 24257, 'epoch': 3}
{'type': 'loss', 'content': 0.07292114943265915, 'timestamp': '2025-10-02 00:54:18.106609', 'step': 24258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:18.161163', 'step': 24258, 'epoch': 3}
{'type': 'loss', 'content': 0.08785034716129303, 'timestamp': '2025-10-02 00:54:18.163982', 'step': 24259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:18.217874', 'step': 24259, 'epoch': 3}
{'type': 'loss', 'content': 0.09939800202846527, 'timestamp': '2025-10-02 00:54:18.223822', 'step': 24260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:18.277649', 'step': 24260, 'epoch': 3}
{'type': 'loss', 'content': 0.05928996950387955, 'timestamp': '2025-10-02 00:54:18.280043', 'step': 24261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:18.335820', 'step': 24261, 'epoch': 3}
{'type': 'loss', 'content': 0.1474311649799347, 'timestamp': '2025-10-02 00:54:18.338391', 'step': 24262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:18.393119', 'step': 24262, 'epoch': 3}
{'type': 'loss', 'content': 0.08274298906326294, 'timestamp': '2025-10-02 00:54:18.400458', 'step': 24263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:18.457108', 'step': 24263, 'epoch': 3}
{'type': 'loss', 'content': 0.0442468486726284, 'timestamp': '2025-10-02 00:54:18.463315', 'step': 24264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:18.516696', 'step': 24264, 'epoch': 3}
{'type': 'loss', 'content': 0.16069194674491882, 'timestamp': '2025-10-02 00:54:18.519185', 'step': 24265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:18.573151', 'step': 24265, 'epoch': 3}
{'type': 'loss', 'content': 0.08738474547863007, 'timestamp': '2025-10-02 00:54:18.575841', 'step': 24266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:18.631848', 'step': 24266, 'epoch': 3}
{'type': 'loss', 'content': 0.04824576526880264, 'timestamp': '2025-10-02 00:54:18.641007', 'step': 24267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:18.697782', 'step': 24267, 'epoch': 3}
{'type': 'loss', 'content': 0.054865360260009766, 'timestamp': '2025-10-02 00:54:18.704266', 'step': 24268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:18.759833', 'step': 24268, 'epoch': 3}
{'type': 'loss', 'content': 0.030705371871590614, 'timestamp': '2025-10-02 00:54:18.762232', 'step': 24269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:18.817396', 'step': 24269, 'epoch': 3}
{'type': 'loss', 'content': 0.04449303075671196, 'timestamp': '2025-10-02 00:54:18.825400', 'step': 24270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:18.881042', 'step': 24270, 'epoch': 3}
{'type': 'loss', 'content': 0.01602138765156269, 'timestamp': '2025-10-02 00:54:18.883125', 'step': 24271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:18.937764', 'step': 24271, 'epoch': 3}
{'type': 'loss', 'content': 0.02070244774222374, 'timestamp': '2025-10-02 00:54:18.946224', 'step': 24272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:19.000388', 'step': 24272, 'epoch': 3}
{'type': 'loss', 'content': 0.07123485207557678, 'timestamp': '2025-10-02 00:54:19.006134', 'step': 24273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:19.061687', 'step': 24273, 'epoch': 3}
{'type': 'loss', 'content': 0.0229258481413126, 'timestamp': '2025-10-02 00:54:19.070959', 'step': 24274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:19.127043', 'step': 24274, 'epoch': 3}
{'type': 'loss', 'content': 0.03746466338634491, 'timestamp': '2025-10-02 00:54:19.132659', 'step': 24275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:19.194993', 'step': 24275, 'epoch': 3}
{'type': 'loss', 'content': 0.024713996797800064, 'timestamp': '2025-10-02 00:54:19.205935', 'step': 24276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:19.278536', 'step': 24276, 'epoch': 3}
{'type': 'loss', 'content': 0.027632087469100952, 'timestamp': '2025-10-02 00:54:19.289820', 'step': 24277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:19.348219', 'step': 24277, 'epoch': 3}
{'type': 'loss', 'content': 0.06062476336956024, 'timestamp': '2025-10-02 00:54:19.351383', 'step': 24278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:19.408900', 'step': 24278, 'epoch': 3}
{'type': 'loss', 'content': 0.0037371977232396603, 'timestamp': '2025-10-02 00:54:19.416039', 'step': 24279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:19.478331', 'step': 24279, 'epoch': 3}
{'type': 'loss', 'content': 0.03219661861658096, 'timestamp': '2025-10-02 00:54:19.489526', 'step': 24280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:19.544816', 'step': 24280, 'epoch': 3}
{'type': 'loss', 'content': 0.03785323351621628, 'timestamp': '2025-10-02 00:54:19.547575', 'step': 24281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:19.607012', 'step': 24281, 'epoch': 3}
{'type': 'loss', 'content': 0.02434353530406952, 'timestamp': '2025-10-02 00:54:19.610048', 'step': 24282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:19.666028', 'step': 24282, 'epoch': 3}
{'type': 'loss', 'content': 0.09008453786373138, 'timestamp': '2025-10-02 00:54:19.668519', 'step': 24283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:19.724600', 'step': 24283, 'epoch': 3}
{'type': 'loss', 'content': 0.01452157087624073, 'timestamp': '2025-10-02 00:54:19.731317', 'step': 24284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:19.785890', 'step': 24284, 'epoch': 3}
{'type': 'loss', 'content': 0.04693033918738365, 'timestamp': '2025-10-02 00:54:19.788359', 'step': 24285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:54:19.851297', 'step': 24285, 'epoch': 3}
{'type': 'loss', 'content': 0.02806236408650875, 'timestamp': '2025-10-02 00:54:19.862096', 'step': 24286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:19.920603', 'step': 24286, 'epoch': 3}
{'type': 'loss', 'content': 0.11223678290843964, 'timestamp': '2025-10-02 00:54:19.923403', 'step': 24287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:19.980448', 'step': 24287, 'epoch': 3}
{'type': 'loss', 'content': 0.07777994126081467, 'timestamp': '2025-10-02 00:54:19.986723', 'step': 24288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:54:20.053115', 'step': 24288, 'epoch': 3}
{'type': 'loss', 'content': 0.00412970082834363, 'timestamp': '2025-10-02 00:54:20.066171', 'step': 24289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:20.122803', 'step': 24289, 'epoch': 3}
{'type': 'loss', 'content': 0.03125991299748421, 'timestamp': '2025-10-02 00:54:20.126242', 'step': 24290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:20.184157', 'step': 24290, 'epoch': 3}
{'type': 'loss', 'content': 0.03194723278284073, 'timestamp': '2025-10-02 00:54:20.191191', 'step': 24291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:20.251711', 'step': 24291, 'epoch': 3}
{'type': 'loss', 'content': 0.057780154049396515, 'timestamp': '2025-10-02 00:54:20.262622', 'step': 24292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:54:20.325528', 'step': 24292, 'epoch': 3}
{'type': 'loss', 'content': 0.013740499503910542, 'timestamp': '2025-10-02 00:54:20.337049', 'step': 24293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:20.394156', 'step': 24293, 'epoch': 3}
{'type': 'loss', 'content': 0.035843897610902786, 'timestamp': '2025-10-02 00:54:20.403662', 'step': 24294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:20.461530', 'step': 24294, 'epoch': 3}
{'type': 'loss', 'content': 0.046762797981500626, 'timestamp': '2025-10-02 00:54:20.464656', 'step': 24295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:20.523673', 'step': 24295, 'epoch': 3}
{'type': 'loss', 'content': 0.16185502707958221, 'timestamp': '2025-10-02 00:54:20.530347', 'step': 24296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:20.592104', 'step': 24296, 'epoch': 3}
{'type': 'loss', 'content': 0.007258094381541014, 'timestamp': '2025-10-02 00:54:20.595817', 'step': 24297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:20.658828', 'step': 24297, 'epoch': 3}
{'type': 'loss', 'content': 0.03476579487323761, 'timestamp': '2025-10-02 00:54:20.669025', 'step': 24298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:20.726177', 'step': 24298, 'epoch': 3}
{'type': 'loss', 'content': 0.09488002210855484, 'timestamp': '2025-10-02 00:54:20.733233', 'step': 24299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:20.791282', 'step': 24299, 'epoch': 3}
{'type': 'loss', 'content': 0.020102214068174362, 'timestamp': '2025-10-02 00:54:20.797757', 'step': 24300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:20.854725', 'step': 24300, 'epoch': 3}
{'type': 'loss', 'content': 0.023911919444799423, 'timestamp': '2025-10-02 00:54:20.864975', 'step': 24301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:20.925284', 'step': 24301, 'epoch': 3}
{'type': 'loss', 'content': 0.018915217369794846, 'timestamp': '2025-10-02 00:54:20.935441', 'step': 24302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:20.992715', 'step': 24302, 'epoch': 3}
{'type': 'loss', 'content': 0.10194655507802963, 'timestamp': '2025-10-02 00:54:20.995523', 'step': 24303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:21.052890', 'step': 24303, 'epoch': 3}
{'type': 'loss', 'content': 0.06687461584806442, 'timestamp': '2025-10-02 00:54:21.060110', 'step': 24304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:21.118072', 'step': 24304, 'epoch': 3}
{'type': 'loss', 'content': 0.03213522210717201, 'timestamp': '2025-10-02 00:54:21.121117', 'step': 24305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:21.178764', 'step': 24305, 'epoch': 3}
{'type': 'loss', 'content': 0.017792226746678352, 'timestamp': '2025-10-02 00:54:21.188251', 'step': 24306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:21.245270', 'step': 24306, 'epoch': 3}
{'type': 'loss', 'content': 0.011981732212007046, 'timestamp': '2025-10-02 00:54:21.252486', 'step': 24307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:21.316079', 'step': 24307, 'epoch': 3}
{'type': 'loss', 'content': 0.014553594402968884, 'timestamp': '2025-10-02 00:54:21.327373', 'step': 24308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:21.384398', 'step': 24308, 'epoch': 3}
{'type': 'loss', 'content': 0.04738389328122139, 'timestamp': '2025-10-02 00:54:21.390000', 'step': 24309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:21.448090', 'step': 24309, 'epoch': 3}
{'type': 'loss', 'content': 0.019009405747056007, 'timestamp': '2025-10-02 00:54:21.457557', 'step': 24310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:21.512894', 'step': 24310, 'epoch': 3}
{'type': 'loss', 'content': 0.056382957845926285, 'timestamp': '2025-10-02 00:54:21.516051', 'step': 24311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:54:21.585084', 'step': 24311, 'epoch': 3}
{'type': 'loss', 'content': 0.011823434382677078, 'timestamp': '2025-10-02 00:54:21.598151', 'step': 24312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:21.653564', 'step': 24312, 'epoch': 3}
{'type': 'loss', 'content': 0.07588716596364975, 'timestamp': '2025-10-02 00:54:21.660821', 'step': 24313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:21.715679', 'step': 24313, 'epoch': 3}
{'type': 'loss', 'content': 0.07249531149864197, 'timestamp': '2025-10-02 00:54:21.718153', 'step': 24314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:21.772635', 'step': 24314, 'epoch': 3}
{'type': 'loss', 'content': 0.09049013257026672, 'timestamp': '2025-10-02 00:54:21.775013', 'step': 24315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:21.828814', 'step': 24315, 'epoch': 3}
{'type': 'loss', 'content': 0.10448291897773743, 'timestamp': '2025-10-02 00:54:21.836016', 'step': 24316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:21.890435', 'step': 24316, 'epoch': 3}
{'type': 'loss', 'content': 0.06393130123615265, 'timestamp': '2025-10-02 00:54:21.892778', 'step': 24317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:21.948166', 'step': 24317, 'epoch': 3}
{'type': 'loss', 'content': 0.12365798652172089, 'timestamp': '2025-10-02 00:54:21.951122', 'step': 24318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:22.005820', 'step': 24318, 'epoch': 3}
{'type': 'loss', 'content': 0.07359142601490021, 'timestamp': '2025-10-02 00:54:22.008141', 'step': 24319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:22.063321', 'step': 24319, 'epoch': 3}
{'type': 'loss', 'content': 0.0817578062415123, 'timestamp': '2025-10-02 00:54:22.069662', 'step': 24320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:22.123194', 'step': 24320, 'epoch': 3}
{'type': 'loss', 'content': 0.06092005595564842, 'timestamp': '2025-10-02 00:54:22.126247', 'step': 24321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:22.180895', 'step': 24321, 'epoch': 3}
{'type': 'loss', 'content': 0.015107153914868832, 'timestamp': '2025-10-02 00:54:22.183368', 'step': 24322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:22.239076', 'step': 24322, 'epoch': 3}
{'type': 'loss', 'content': 0.040839944034814835, 'timestamp': '2025-10-02 00:54:22.241392', 'step': 24323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:22.296428', 'step': 24323, 'epoch': 3}
{'type': 'loss', 'content': 0.05852114036679268, 'timestamp': '2025-10-02 00:54:22.302462', 'step': 24324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:22.357820', 'step': 24324, 'epoch': 3}
{'type': 'loss', 'content': 0.05603962019085884, 'timestamp': '2025-10-02 00:54:22.360391', 'step': 24325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:22.415848', 'step': 24325, 'epoch': 3}
{'type': 'loss', 'content': 0.055488016456365585, 'timestamp': '2025-10-02 00:54:22.419444', 'step': 24326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:22.473787', 'step': 24326, 'epoch': 3}
{'type': 'loss', 'content': 0.02995574288070202, 'timestamp': '2025-10-02 00:54:22.478660', 'step': 24327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:22.544048', 'step': 24327, 'epoch': 3}
{'type': 'loss', 'content': 0.06405195593833923, 'timestamp': '2025-10-02 00:54:22.550113', 'step': 24328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:22.603597', 'step': 24328, 'epoch': 3}
{'type': 'loss', 'content': 0.04345107451081276, 'timestamp': '2025-10-02 00:54:22.606090', 'step': 24329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:22.660633', 'step': 24329, 'epoch': 3}
{'type': 'loss', 'content': 0.04280902445316315, 'timestamp': '2025-10-02 00:54:22.669976', 'step': 24330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:22.725210', 'step': 24330, 'epoch': 3}
{'type': 'loss', 'content': 0.02427828311920166, 'timestamp': '2025-10-02 00:54:22.730814', 'step': 24331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:22.787897', 'step': 24331, 'epoch': 3}
{'type': 'loss', 'content': 0.06169680505990982, 'timestamp': '2025-10-02 00:54:22.795959', 'step': 24332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:22.854618', 'step': 24332, 'epoch': 3}
{'type': 'loss', 'content': 0.009412981569766998, 'timestamp': '2025-10-02 00:54:22.864870', 'step': 24333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:22.930422', 'step': 24333, 'epoch': 3}
{'type': 'loss', 'content': 0.033196207135915756, 'timestamp': '2025-10-02 00:54:22.935299', 'step': 24334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:23.018747', 'step': 24334, 'epoch': 3}
{'type': 'loss', 'content': 0.03556312993168831, 'timestamp': '2025-10-02 00:54:23.028850', 'step': 24335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:23.083518', 'step': 24335, 'epoch': 3}
{'type': 'loss', 'content': 0.04933972284197807, 'timestamp': '2025-10-02 00:54:23.089632', 'step': 24336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:23.143022', 'step': 24336, 'epoch': 3}
{'type': 'loss', 'content': 0.09254182130098343, 'timestamp': '2025-10-02 00:54:23.145595', 'step': 24337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:23.200190', 'step': 24337, 'epoch': 3}
{'type': 'loss', 'content': 0.07708825916051865, 'timestamp': '2025-10-02 00:54:23.209458', 'step': 24338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:23.270097', 'step': 24338, 'epoch': 3}
{'type': 'loss', 'content': 0.0010010269470512867, 'timestamp': '2025-10-02 00:54:23.283349', 'step': 24339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:23.351228', 'step': 24339, 'epoch': 3}
{'type': 'loss', 'content': 0.02919788844883442, 'timestamp': '2025-10-02 00:54:23.362995', 'step': 24340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:23.440630', 'step': 24340, 'epoch': 3}
{'type': 'loss', 'content': 0.042769256979227066, 'timestamp': '2025-10-02 00:54:23.451615', 'step': 24341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:23.529270', 'step': 24341, 'epoch': 3}
{'type': 'loss', 'content': 0.09314685314893723, 'timestamp': '2025-10-02 00:54:23.539044', 'step': 24342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:23.611758', 'step': 24342, 'epoch': 3}
{'type': 'loss', 'content': 0.00536749791353941, 'timestamp': '2025-10-02 00:54:23.622254', 'step': 24343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:23.700029', 'step': 24343, 'epoch': 3}
{'type': 'loss', 'content': 0.07027621567249298, 'timestamp': '2025-10-02 00:54:23.712022', 'step': 24344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:23.785739', 'step': 24344, 'epoch': 3}
{'type': 'loss', 'content': 0.04464205726981163, 'timestamp': '2025-10-02 00:54:23.795840', 'step': 24345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:23.883334', 'step': 24345, 'epoch': 3}
{'type': 'loss', 'content': 0.08193077147006989, 'timestamp': '2025-10-02 00:54:23.901760', 'step': 24346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:24.010852', 'step': 24346, 'epoch': 3}
{'type': 'loss', 'content': 0.01919640228152275, 'timestamp': '2025-10-02 00:54:24.030431', 'step': 24347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:24.108520', 'step': 24347, 'epoch': 3}
{'type': 'loss', 'content': 0.011552118696272373, 'timestamp': '2025-10-02 00:54:24.125183', 'step': 24348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:24.209736', 'step': 24348, 'epoch': 3}
{'type': 'loss', 'content': 0.030765295028686523, 'timestamp': '2025-10-02 00:54:24.218775', 'step': 24349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:24.300393', 'step': 24349, 'epoch': 3}
{'type': 'loss', 'content': 0.011422056704759598, 'timestamp': '2025-10-02 00:54:24.310551', 'step': 24350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:24.390353', 'step': 24350, 'epoch': 3}
{'type': 'loss', 'content': 0.13757061958312988, 'timestamp': '2025-10-02 00:54:24.397405', 'step': 24351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:24.483915', 'step': 24351, 'epoch': 3}
{'type': 'loss', 'content': 0.009378576651215553, 'timestamp': '2025-10-02 00:54:24.494210', 'step': 24352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:24.549099', 'step': 24352, 'epoch': 3}
{'type': 'loss', 'content': 0.016627900302410126, 'timestamp': '2025-10-02 00:54:24.559151', 'step': 24353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:24.613763', 'step': 24353, 'epoch': 3}
{'type': 'loss', 'content': 0.12348609417676926, 'timestamp': '2025-10-02 00:54:24.616061', 'step': 24354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:54:24.679682', 'step': 24354, 'epoch': 3}
{'type': 'loss', 'content': 0.015622138045728207, 'timestamp': '2025-10-02 00:54:24.690515', 'step': 24355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:24.746217', 'step': 24355, 'epoch': 3}
{'type': 'loss', 'content': 0.07004814594984055, 'timestamp': '2025-10-02 00:54:24.752343', 'step': 24356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:24.812797', 'step': 24356, 'epoch': 3}
{'type': 'loss', 'content': 0.007686446886509657, 'timestamp': '2025-10-02 00:54:24.824131', 'step': 24357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:24.891378', 'step': 24357, 'epoch': 3}
{'type': 'loss', 'content': 0.08347323536872864, 'timestamp': '2025-10-02 00:54:24.909095', 'step': 24358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:24.969938', 'step': 24358, 'epoch': 3}
{'type': 'loss', 'content': 0.032259222120046616, 'timestamp': '2025-10-02 00:54:24.975265', 'step': 24359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:25.037312', 'step': 24359, 'epoch': 3}
{'type': 'loss', 'content': 0.04095010831952095, 'timestamp': '2025-10-02 00:54:25.043433', 'step': 24360, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:54:54.177803', 'step': 24360, 'epoch': 3}
{'type': 'pplx', 'content': 100.1298322926974, 'timestamp': '2025-10-02 00:54:54.181827', 'step': 24360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:54.237519', 'step': 24360, 'epoch': 3}
{'type': 'loss', 'content': 0.023884445428848267, 'timestamp': '2025-10-02 00:54:54.241128', 'step': 24361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:54.297457', 'step': 24361, 'epoch': 3}
{'type': 'loss', 'content': 0.08618911355733871, 'timestamp': '2025-10-02 00:54:54.299971', 'step': 24362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:54.355734', 'step': 24362, 'epoch': 3}
{'type': 'loss', 'content': 0.013382979668676853, 'timestamp': '2025-10-02 00:54:54.365280', 'step': 24363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:54.420741', 'step': 24363, 'epoch': 3}
{'type': 'loss', 'content': 0.05542526766657829, 'timestamp': '2025-10-02 00:54:54.427887', 'step': 24364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:54.487046', 'step': 24364, 'epoch': 3}
{'type': 'loss', 'content': 0.0161832794547081, 'timestamp': '2025-10-02 00:54:54.497944', 'step': 24365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:54.563304', 'step': 24365, 'epoch': 3}
{'type': 'loss', 'content': 0.028910664841532707, 'timestamp': '2025-10-02 00:54:54.565921', 'step': 24366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:54.621645', 'step': 24366, 'epoch': 3}
{'type': 'loss', 'content': 0.040994931012392044, 'timestamp': '2025-10-02 00:54:54.625250', 'step': 24367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:54.681426', 'step': 24367, 'epoch': 3}
{'type': 'loss', 'content': 0.005407510790973902, 'timestamp': '2025-10-02 00:54:54.691444', 'step': 24368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:54.746324', 'step': 24368, 'epoch': 3}
{'type': 'loss', 'content': 0.01795043610036373, 'timestamp': '2025-10-02 00:54:54.749135', 'step': 24369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:54.805284', 'step': 24369, 'epoch': 3}
{'type': 'loss', 'content': 0.03247378021478653, 'timestamp': '2025-10-02 00:54:54.814423', 'step': 24370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:54.871759', 'step': 24370, 'epoch': 3}
{'type': 'loss', 'content': 0.079436294734478, 'timestamp': '2025-10-02 00:54:54.874138', 'step': 24371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:54.929234', 'step': 24371, 'epoch': 3}
{'type': 'loss', 'content': 0.05365559831261635, 'timestamp': '2025-10-02 00:54:54.935808', 'step': 24372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:54.992652', 'step': 24372, 'epoch': 3}
{'type': 'loss', 'content': 0.02255762368440628, 'timestamp': '2025-10-02 00:54:54.995670', 'step': 24373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:55.052204', 'step': 24373, 'epoch': 3}
{'type': 'loss', 'content': 0.035876449197530746, 'timestamp': '2025-10-02 00:54:55.054440', 'step': 24374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:55.110761', 'step': 24374, 'epoch': 3}
{'type': 'loss', 'content': 0.062115203589200974, 'timestamp': '2025-10-02 00:54:55.113379', 'step': 24375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:55.168194', 'step': 24375, 'epoch': 3}
{'type': 'loss', 'content': 0.04990983009338379, 'timestamp': '2025-10-02 00:54:55.173756', 'step': 24376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:55.229097', 'step': 24376, 'epoch': 3}
{'type': 'loss', 'content': 0.14528463780879974, 'timestamp': '2025-10-02 00:54:55.234870', 'step': 24377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:55.291514', 'step': 24377, 'epoch': 3}
{'type': 'loss', 'content': 0.011100562289357185, 'timestamp': '2025-10-02 00:54:55.294501', 'step': 24378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:55.350013', 'step': 24378, 'epoch': 3}
{'type': 'loss', 'content': 0.026923736557364464, 'timestamp': '2025-10-02 00:54:55.353515', 'step': 24379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:55.408718', 'step': 24379, 'epoch': 3}
{'type': 'loss', 'content': 0.09029246121644974, 'timestamp': '2025-10-02 00:54:55.415043', 'step': 24380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:55.468643', 'step': 24380, 'epoch': 3}
{'type': 'loss', 'content': 0.07595233619213104, 'timestamp': '2025-10-02 00:54:55.470770', 'step': 24381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:55.525692', 'step': 24381, 'epoch': 3}
{'type': 'loss', 'content': 0.10531210154294968, 'timestamp': '2025-10-02 00:54:55.528045', 'step': 24382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:54:55.584511', 'step': 24382, 'epoch': 3}
{'type': 'loss', 'content': 0.16077275574207306, 'timestamp': '2025-10-02 00:54:55.587426', 'step': 24383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:55.644175', 'step': 24383, 'epoch': 3}
{'type': 'loss', 'content': 0.008236750960350037, 'timestamp': '2025-10-02 00:54:55.652274', 'step': 24384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:55.706740', 'step': 24384, 'epoch': 3}
{'type': 'loss', 'content': 0.09437759965658188, 'timestamp': '2025-10-02 00:54:55.708979', 'step': 24385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:55.764144', 'step': 24385, 'epoch': 3}
{'type': 'loss', 'content': 0.08181854337453842, 'timestamp': '2025-10-02 00:54:55.767046', 'step': 24386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:55.822184', 'step': 24386, 'epoch': 3}
{'type': 'loss', 'content': 0.05470184236764908, 'timestamp': '2025-10-02 00:54:55.824525', 'step': 24387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:55.878603', 'step': 24387, 'epoch': 3}
{'type': 'loss', 'content': 0.06664469838142395, 'timestamp': '2025-10-02 00:54:55.884242', 'step': 24388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:55.938568', 'step': 24388, 'epoch': 3}
{'type': 'loss', 'content': 0.061517830938100815, 'timestamp': '2025-10-02 00:54:55.948790', 'step': 24389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:56.046390', 'step': 24389, 'epoch': 3}
{'type': 'loss', 'content': 0.03573893383145332, 'timestamp': '2025-10-02 00:54:56.056595', 'step': 24390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:56.122360', 'step': 24390, 'epoch': 3}
{'type': 'loss', 'content': 0.08808132261037827, 'timestamp': '2025-10-02 00:54:56.128029', 'step': 24391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:54:56.198845', 'step': 24391, 'epoch': 3}
{'type': 'loss', 'content': 0.019479379057884216, 'timestamp': '2025-10-02 00:54:56.210310', 'step': 24392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:56.272614', 'step': 24392, 'epoch': 3}
{'type': 'loss', 'content': 0.04463496431708336, 'timestamp': '2025-10-02 00:54:56.282257', 'step': 24393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:56.342240', 'step': 24393, 'epoch': 3}
{'type': 'loss', 'content': 0.19283060729503632, 'timestamp': '2025-10-02 00:54:56.345176', 'step': 24394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:56.403149', 'step': 24394, 'epoch': 3}
{'type': 'loss', 'content': 0.07456868141889572, 'timestamp': '2025-10-02 00:54:56.407883', 'step': 24395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:56.475423', 'step': 24395, 'epoch': 3}
{'type': 'loss', 'content': 0.06862985342741013, 'timestamp': '2025-10-02 00:54:56.482019', 'step': 24396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:56.548519', 'step': 24396, 'epoch': 3}
{'type': 'loss', 'content': 0.003214458702132106, 'timestamp': '2025-10-02 00:54:56.559445', 'step': 24397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:56.623330', 'step': 24397, 'epoch': 3}
{'type': 'loss', 'content': 0.01736687496304512, 'timestamp': '2025-10-02 00:54:56.633013', 'step': 24398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:56.701719', 'step': 24398, 'epoch': 3}
{'type': 'loss', 'content': 0.045116931200027466, 'timestamp': '2025-10-02 00:54:56.708228', 'step': 24399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:56.779550', 'step': 24399, 'epoch': 3}
{'type': 'loss', 'content': 0.08638586103916168, 'timestamp': '2025-10-02 00:54:56.786476', 'step': 24400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:56.854622', 'step': 24400, 'epoch': 3}
{'type': 'loss', 'content': 0.04779272899031639, 'timestamp': '2025-10-02 00:54:56.861691', 'step': 24401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:56.925313', 'step': 24401, 'epoch': 3}
{'type': 'loss', 'content': 0.07572395354509354, 'timestamp': '2025-10-02 00:54:56.927710', 'step': 24402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:56.990421', 'step': 24402, 'epoch': 3}
{'type': 'loss', 'content': 0.04036004841327667, 'timestamp': '2025-10-02 00:54:56.994956', 'step': 24403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:57.056039', 'step': 24403, 'epoch': 3}
{'type': 'loss', 'content': 0.05365131422877312, 'timestamp': '2025-10-02 00:54:57.062561', 'step': 24404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:54:57.126046', 'step': 24404, 'epoch': 3}
{'type': 'loss', 'content': 0.09532976895570755, 'timestamp': '2025-10-02 00:54:57.129163', 'step': 24405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:57.186830', 'step': 24405, 'epoch': 3}
{'type': 'loss', 'content': 0.06107904762029648, 'timestamp': '2025-10-02 00:54:57.191632', 'step': 24406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:57.261205', 'step': 24406, 'epoch': 3}
{'type': 'loss', 'content': 0.051725588738918304, 'timestamp': '2025-10-02 00:54:57.264017', 'step': 24407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:57.321225', 'step': 24407, 'epoch': 3}
{'type': 'loss', 'content': 0.03136347606778145, 'timestamp': '2025-10-02 00:54:57.328657', 'step': 24408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:54:57.386956', 'step': 24408, 'epoch': 3}
{'type': 'loss', 'content': 0.07265456765890121, 'timestamp': '2025-10-02 00:54:57.397190', 'step': 24409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:57.461729', 'step': 24409, 'epoch': 3}
{'type': 'loss', 'content': 0.006797450128942728, 'timestamp': '2025-10-02 00:54:57.464562', 'step': 24410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:57.526526', 'step': 24410, 'epoch': 3}
{'type': 'loss', 'content': 0.04442509636282921, 'timestamp': '2025-10-02 00:54:57.529910', 'step': 24411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:57.588619', 'step': 24411, 'epoch': 3}
{'type': 'loss', 'content': 0.024709464982151985, 'timestamp': '2025-10-02 00:54:57.598809', 'step': 24412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:54:57.665231', 'step': 24412, 'epoch': 3}
{'type': 'loss', 'content': 0.07331322133541107, 'timestamp': '2025-10-02 00:54:57.674764', 'step': 24413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:57.741741', 'step': 24413, 'epoch': 3}
{'type': 'loss', 'content': 0.03962525725364685, 'timestamp': '2025-10-02 00:54:57.755881', 'step': 24414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:57.825372', 'step': 24414, 'epoch': 3}
{'type': 'loss', 'content': 0.06278922408819199, 'timestamp': '2025-10-02 00:54:57.829866', 'step': 24415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:57.892387', 'step': 24415, 'epoch': 3}
{'type': 'loss', 'content': 0.0410066694021225, 'timestamp': '2025-10-02 00:54:57.898676', 'step': 24416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:57.955649', 'step': 24416, 'epoch': 3}
{'type': 'loss', 'content': 0.0713198333978653, 'timestamp': '2025-10-02 00:54:57.960016', 'step': 24417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:58.025362', 'step': 24417, 'epoch': 3}
{'type': 'loss', 'content': 0.015179255045950413, 'timestamp': '2025-10-02 00:54:58.035545', 'step': 24418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:54:58.109689', 'step': 24418, 'epoch': 3}
{'type': 'loss', 'content': 0.05119951441884041, 'timestamp': '2025-10-02 00:54:58.123912', 'step': 24419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:54:58.206518', 'step': 24419, 'epoch': 3}
{'type': 'loss', 'content': 0.07383901625871658, 'timestamp': '2025-10-02 00:54:58.217607', 'step': 24420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:54:58.289921', 'step': 24420, 'epoch': 3}
{'type': 'loss', 'content': 0.014139894396066666, 'timestamp': '2025-10-02 00:54:58.301502', 'step': 24421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:58.373554', 'step': 24421, 'epoch': 3}
{'type': 'loss', 'content': 0.029436979442834854, 'timestamp': '2025-10-02 00:54:58.377424', 'step': 24422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:54:58.443003', 'step': 24422, 'epoch': 3}
{'type': 'loss', 'content': 0.007148839998990297, 'timestamp': '2025-10-02 00:54:58.453472', 'step': 24423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:58.517279', 'step': 24423, 'epoch': 3}
{'type': 'loss', 'content': 0.08237145841121674, 'timestamp': '2025-10-02 00:54:58.524999', 'step': 24424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:58.590837', 'step': 24424, 'epoch': 3}
{'type': 'loss', 'content': 0.058802105486392975, 'timestamp': '2025-10-02 00:54:58.596245', 'step': 24425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:54:58.656403', 'step': 24425, 'epoch': 3}
{'type': 'loss', 'content': 0.04717130586504936, 'timestamp': '2025-10-02 00:54:58.659759', 'step': 24426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:58.715941', 'step': 24426, 'epoch': 3}
{'type': 'loss', 'content': 0.04592783376574516, 'timestamp': '2025-10-02 00:54:58.719053', 'step': 24427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:54:58.788214', 'step': 24427, 'epoch': 3}
{'type': 'loss', 'content': 0.05326530709862709, 'timestamp': '2025-10-02 00:54:58.801018', 'step': 24428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:58.858040', 'step': 24428, 'epoch': 3}
{'type': 'loss', 'content': 0.026676416397094727, 'timestamp': '2025-10-02 00:54:58.861351', 'step': 24429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:58.920279', 'step': 24429, 'epoch': 3}
{'type': 'loss', 'content': 0.057692915201187134, 'timestamp': '2025-10-02 00:54:58.927467', 'step': 24430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:58.989563', 'step': 24430, 'epoch': 3}
{'type': 'loss', 'content': 0.07236387580633163, 'timestamp': '2025-10-02 00:54:58.997332', 'step': 24431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:54:59.063560', 'step': 24431, 'epoch': 3}
{'type': 'loss', 'content': 0.007803081534802914, 'timestamp': '2025-10-02 00:54:59.074963', 'step': 24432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:54:59.142932', 'step': 24432, 'epoch': 3}
{'type': 'loss', 'content': 0.13387435674667358, 'timestamp': '2025-10-02 00:54:59.146024', 'step': 24433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:54:59.208842', 'step': 24433, 'epoch': 3}
{'type': 'loss', 'content': 0.06760775297880173, 'timestamp': '2025-10-02 00:54:59.211884', 'step': 24434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:59.271429', 'step': 24434, 'epoch': 3}
{'type': 'loss', 'content': 0.052646659314632416, 'timestamp': '2025-10-02 00:54:59.275585', 'step': 24435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:54:59.339436', 'step': 24435, 'epoch': 3}
{'type': 'loss', 'content': 0.0506724938750267, 'timestamp': '2025-10-02 00:54:59.346616', 'step': 24436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 11520070000896.0}, 'timestamp': '2025-10-02 00:54:59.432241', 'step': 24436, 'epoch': 3}
{'type': 'loss', 'content': 0.012626959942281246, 'timestamp': '2025-10-02 00:54:59.449073', 'step': 24437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:54:59.509711', 'step': 24437, 'epoch': 3}
{'type': 'loss', 'content': 0.0567806214094162, 'timestamp': '2025-10-02 00:54:59.519879', 'step': 24438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:54:59.576643', 'step': 24438, 'epoch': 3}
{'type': 'loss', 'content': 0.022658973932266235, 'timestamp': '2025-10-02 00:54:59.583886', 'step': 24439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:54:59.637682', 'step': 24439, 'epoch': 3}
{'type': 'loss', 'content': 0.05509200692176819, 'timestamp': '2025-10-02 00:54:59.644221', 'step': 24440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:54:59.698670', 'step': 24440, 'epoch': 3}
{'type': 'loss', 'content': 0.11764218658208847, 'timestamp': '2025-10-02 00:54:59.701051', 'step': 24441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:59.756519', 'step': 24441, 'epoch': 3}
{'type': 'loss', 'content': 0.04842129722237587, 'timestamp': '2025-10-02 00:54:59.758620', 'step': 24442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:59.814951', 'step': 24442, 'epoch': 3}
{'type': 'loss', 'content': 0.032600756734609604, 'timestamp': '2025-10-02 00:54:59.817249', 'step': 24443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:54:59.872106', 'step': 24443, 'epoch': 3}
{'type': 'loss', 'content': 0.06520497053861618, 'timestamp': '2025-10-02 00:54:59.878315', 'step': 24444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:54:59.931272', 'step': 24444, 'epoch': 3}
{'type': 'loss', 'content': 0.028175387531518936, 'timestamp': '2025-10-02 00:54:59.933815', 'step': 24445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:54:59.989351', 'step': 24445, 'epoch': 3}
{'type': 'loss', 'content': 0.040450189262628555, 'timestamp': '2025-10-02 00:54:59.992238', 'step': 24446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:00.048688', 'step': 24446, 'epoch': 3}
{'type': 'loss', 'content': 0.025298312306404114, 'timestamp': '2025-10-02 00:55:00.051287', 'step': 24447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:00.106383', 'step': 24447, 'epoch': 3}
{'type': 'loss', 'content': 0.020013999193906784, 'timestamp': '2025-10-02 00:55:00.112576', 'step': 24448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:00.168179', 'step': 24448, 'epoch': 3}
{'type': 'loss', 'content': 0.007217828184366226, 'timestamp': '2025-10-02 00:55:00.173917', 'step': 24449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:00.228920', 'step': 24449, 'epoch': 3}
{'type': 'loss', 'content': 0.023231739178299904, 'timestamp': '2025-10-02 00:55:00.231517', 'step': 24450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:00.287152', 'step': 24450, 'epoch': 3}
{'type': 'loss', 'content': 0.005401141010224819, 'timestamp': '2025-10-02 00:55:00.292725', 'step': 24451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:00.351821', 'step': 24451, 'epoch': 3}
{'type': 'loss', 'content': 0.0498974435031414, 'timestamp': '2025-10-02 00:55:00.362820', 'step': 24452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:00.418704', 'step': 24452, 'epoch': 3}
{'type': 'loss', 'content': 0.01669478788971901, 'timestamp': '2025-10-02 00:55:00.421015', 'step': 24453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:00.475261', 'step': 24453, 'epoch': 3}
{'type': 'loss', 'content': 0.06848783791065216, 'timestamp': '2025-10-02 00:55:00.480996', 'step': 24454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:55:00.543833', 'step': 24454, 'epoch': 3}
{'type': 'loss', 'content': 0.008439626544713974, 'timestamp': '2025-10-02 00:55:00.554705', 'step': 24455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:00.610526', 'step': 24455, 'epoch': 3}
{'type': 'loss', 'content': 0.029759587720036507, 'timestamp': '2025-10-02 00:55:00.616488', 'step': 24456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:55:00.677946', 'step': 24456, 'epoch': 3}
{'type': 'loss', 'content': 0.01912805438041687, 'timestamp': '2025-10-02 00:55:00.689511', 'step': 24457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:00.744518', 'step': 24457, 'epoch': 3}
{'type': 'loss', 'content': 0.027503937482833862, 'timestamp': '2025-10-02 00:55:00.746902', 'step': 24458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:00.801941', 'step': 24458, 'epoch': 3}
{'type': 'loss', 'content': 0.11830198019742966, 'timestamp': '2025-10-02 00:55:00.807599', 'step': 24459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:55:00.869127', 'step': 24459, 'epoch': 3}
{'type': 'loss', 'content': 0.012798241339623928, 'timestamp': '2025-10-02 00:55:00.880524', 'step': 24460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:00.934119', 'step': 24460, 'epoch': 3}
{'type': 'loss', 'content': 0.03894408047199249, 'timestamp': '2025-10-02 00:55:00.941473', 'step': 24461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:55:01.011511', 'step': 24461, 'epoch': 3}
{'type': 'loss', 'content': 0.046378243714571, 'timestamp': '2025-10-02 00:55:01.023810', 'step': 24462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:01.079441', 'step': 24462, 'epoch': 3}
{'type': 'loss', 'content': 0.07490582764148712, 'timestamp': '2025-10-02 00:55:01.088965', 'step': 24463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:01.143157', 'step': 24463, 'epoch': 3}
{'type': 'loss', 'content': 0.11277500540018082, 'timestamp': '2025-10-02 00:55:01.149041', 'step': 24464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:01.213761', 'step': 24464, 'epoch': 3}
{'type': 'loss', 'content': 0.10858722031116486, 'timestamp': '2025-10-02 00:55:01.216139', 'step': 24465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:01.270427', 'step': 24465, 'epoch': 3}
{'type': 'loss', 'content': 0.05518362298607826, 'timestamp': '2025-10-02 00:55:01.273063', 'step': 24466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:01.330459', 'step': 24466, 'epoch': 3}
{'type': 'loss', 'content': 0.027548430487513542, 'timestamp': '2025-10-02 00:55:01.339959', 'step': 24467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:01.394772', 'step': 24467, 'epoch': 3}
{'type': 'loss', 'content': 0.12595444917678833, 'timestamp': '2025-10-02 00:55:01.400451', 'step': 24468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:01.454787', 'step': 24468, 'epoch': 3}
{'type': 'loss', 'content': 0.09178555011749268, 'timestamp': '2025-10-02 00:55:01.457056', 'step': 24469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:01.511056', 'step': 24469, 'epoch': 3}
{'type': 'loss', 'content': 0.1023474857211113, 'timestamp': '2025-10-02 00:55:01.513346', 'step': 24470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:55:01.582487', 'step': 24470, 'epoch': 3}
{'type': 'loss', 'content': 0.004496874753385782, 'timestamp': '2025-10-02 00:55:01.594813', 'step': 24471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:01.657078', 'step': 24471, 'epoch': 3}
{'type': 'loss', 'content': 0.02132086455821991, 'timestamp': '2025-10-02 00:55:01.668308', 'step': 24472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:01.722091', 'step': 24472, 'epoch': 3}
{'type': 'loss', 'content': 0.04568810760974884, 'timestamp': '2025-10-02 00:55:01.729165', 'step': 24473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:01.784335', 'step': 24473, 'epoch': 3}
{'type': 'loss', 'content': 0.05014823004603386, 'timestamp': '2025-10-02 00:55:01.789980', 'step': 24474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:01.844552', 'step': 24474, 'epoch': 3}
{'type': 'loss', 'content': 0.005015693139284849, 'timestamp': '2025-10-02 00:55:01.847071', 'step': 24475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:01.900945', 'step': 24475, 'epoch': 3}
{'type': 'loss', 'content': 0.01549080852419138, 'timestamp': '2025-10-02 00:55:01.906990', 'step': 24476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:01.961134', 'step': 24476, 'epoch': 3}
{'type': 'loss', 'content': 0.09643492102622986, 'timestamp': '2025-10-02 00:55:01.963981', 'step': 24477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:02.018454', 'step': 24477, 'epoch': 3}
{'type': 'loss', 'content': 0.026858093217015266, 'timestamp': '2025-10-02 00:55:02.027561', 'step': 24478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:02.082477', 'step': 24478, 'epoch': 3}
{'type': 'loss', 'content': 0.11740042269229889, 'timestamp': '2025-10-02 00:55:02.085378', 'step': 24479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:02.139926', 'step': 24479, 'epoch': 3}
{'type': 'loss', 'content': 0.11111757904291153, 'timestamp': '2025-10-02 00:55:02.146029', 'step': 24480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:02.199845', 'step': 24480, 'epoch': 3}
{'type': 'loss', 'content': 0.04331867769360542, 'timestamp': '2025-10-02 00:55:02.205466', 'step': 24481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:02.260680', 'step': 24481, 'epoch': 3}
{'type': 'loss', 'content': 0.09634595364332199, 'timestamp': '2025-10-02 00:55:02.266308', 'step': 24482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:02.320928', 'step': 24482, 'epoch': 3}
{'type': 'loss', 'content': 0.10252371430397034, 'timestamp': '2025-10-02 00:55:02.323295', 'step': 24483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:02.378405', 'step': 24483, 'epoch': 3}
{'type': 'loss', 'content': 0.0399758517742157, 'timestamp': '2025-10-02 00:55:02.384407', 'step': 24484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:02.437956', 'step': 24484, 'epoch': 3}
{'type': 'loss', 'content': 0.02037014067173004, 'timestamp': '2025-10-02 00:55:02.440382', 'step': 24485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:02.495016', 'step': 24485, 'epoch': 3}
{'type': 'loss', 'content': 0.06770361959934235, 'timestamp': '2025-10-02 00:55:02.497511', 'step': 24486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:02.554108', 'step': 24486, 'epoch': 3}
{'type': 'loss', 'content': 0.038212601095438004, 'timestamp': '2025-10-02 00:55:02.556649', 'step': 24487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:02.611277', 'step': 24487, 'epoch': 3}
{'type': 'loss', 'content': 0.04733830317854881, 'timestamp': '2025-10-02 00:55:02.617473', 'step': 24488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:02.671461', 'step': 24488, 'epoch': 3}
{'type': 'loss', 'content': 0.06362137198448181, 'timestamp': '2025-10-02 00:55:02.678790', 'step': 24489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:02.734217', 'step': 24489, 'epoch': 3}
{'type': 'loss', 'content': 0.044099174439907074, 'timestamp': '2025-10-02 00:55:02.736578', 'step': 24490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:02.798783', 'step': 24490, 'epoch': 3}
{'type': 'loss', 'content': 0.0037685991264879704, 'timestamp': '2025-10-02 00:55:02.809228', 'step': 24491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:02.866842', 'step': 24491, 'epoch': 3}
{'type': 'loss', 'content': 0.05173872783780098, 'timestamp': '2025-10-02 00:55:02.873618', 'step': 24492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:02.929925', 'step': 24492, 'epoch': 3}
{'type': 'loss', 'content': 0.12283603847026825, 'timestamp': '2025-10-02 00:55:02.933030', 'step': 24493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:02.988840', 'step': 24493, 'epoch': 3}
{'type': 'loss', 'content': 0.056754399091005325, 'timestamp': '2025-10-02 00:55:02.996171', 'step': 24494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:03.053234', 'step': 24494, 'epoch': 3}
{'type': 'loss', 'content': 0.08684647083282471, 'timestamp': '2025-10-02 00:55:03.058993', 'step': 24495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:03.115682', 'step': 24495, 'epoch': 3}
{'type': 'loss', 'content': 0.053007811307907104, 'timestamp': '2025-10-02 00:55:03.122651', 'step': 24496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:03.178054', 'step': 24496, 'epoch': 3}
{'type': 'loss', 'content': 0.01732723042368889, 'timestamp': '2025-10-02 00:55:03.181449', 'step': 24497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:03.237154', 'step': 24497, 'epoch': 3}
{'type': 'loss', 'content': 0.025609707459807396, 'timestamp': '2025-10-02 00:55:03.243022', 'step': 24498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:55:03.318704', 'step': 24498, 'epoch': 3}
{'type': 'loss', 'content': 0.023722469806671143, 'timestamp': '2025-10-02 00:55:03.332150', 'step': 24499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:03.388327', 'step': 24499, 'epoch': 3}
{'type': 'loss', 'content': 0.11488475650548935, 'timestamp': '2025-10-02 00:55:03.395404', 'step': 24500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 24500', 'timestamp': '2025-10-02 00:55:03.782252', 'step': 24500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:03.838899', 'step': 24500, 'epoch': 3}
{'type': 'loss', 'content': 0.009702254086732864, 'timestamp': '2025-10-02 00:55:03.849837', 'step': 24501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:03.907011', 'step': 24501, 'epoch': 3}
{'type': 'loss', 'content': 0.047787856310606, 'timestamp': '2025-10-02 00:55:03.909367', 'step': 24502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:03.965682', 'step': 24502, 'epoch': 3}
{'type': 'loss', 'content': 0.04800290986895561, 'timestamp': '2025-10-02 00:55:03.969772', 'step': 24503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:04.026193', 'step': 24503, 'epoch': 3}
{'type': 'loss', 'content': 0.15116208791732788, 'timestamp': '2025-10-02 00:55:04.032512', 'step': 24504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:04.090358', 'step': 24504, 'epoch': 3}
{'type': 'loss', 'content': 0.014075503684580326, 'timestamp': '2025-10-02 00:55:04.100229', 'step': 24505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:04.156802', 'step': 24505, 'epoch': 3}
{'type': 'loss', 'content': 0.08730752766132355, 'timestamp': '2025-10-02 00:55:04.160019', 'step': 24506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:04.217316', 'step': 24506, 'epoch': 3}
{'type': 'loss', 'content': 0.01770060509443283, 'timestamp': '2025-10-02 00:55:04.223693', 'step': 24507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:04.279371', 'step': 24507, 'epoch': 3}
{'type': 'loss', 'content': 0.12466304004192352, 'timestamp': '2025-10-02 00:55:04.286893', 'step': 24508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:04.341729', 'step': 24508, 'epoch': 3}
{'type': 'loss', 'content': 0.06175369396805763, 'timestamp': '2025-10-02 00:55:04.350864', 'step': 24509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:04.406746', 'step': 24509, 'epoch': 3}
{'type': 'loss', 'content': 0.14044076204299927, 'timestamp': '2025-10-02 00:55:04.409133', 'step': 24510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:04.465698', 'step': 24510, 'epoch': 3}
{'type': 'loss', 'content': 0.06823412328958511, 'timestamp': '2025-10-02 00:55:04.468436', 'step': 24511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:04.524293', 'step': 24511, 'epoch': 3}
{'type': 'loss', 'content': 0.06498222053050995, 'timestamp': '2025-10-02 00:55:04.530645', 'step': 24512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:04.588502', 'step': 24512, 'epoch': 3}
{'type': 'loss', 'content': 0.091229148209095, 'timestamp': '2025-10-02 00:55:04.590554', 'step': 24513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:04.645043', 'step': 24513, 'epoch': 3}
{'type': 'loss', 'content': 0.06349308043718338, 'timestamp': '2025-10-02 00:55:04.647642', 'step': 24514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:04.706942', 'step': 24514, 'epoch': 3}
{'type': 'loss', 'content': 0.05633794516324997, 'timestamp': '2025-10-02 00:55:04.711791', 'step': 24515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:04.767395', 'step': 24515, 'epoch': 3}
{'type': 'loss', 'content': 0.10699035972356796, 'timestamp': '2025-10-02 00:55:04.775056', 'step': 24516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:04.829402', 'step': 24516, 'epoch': 3}
{'type': 'loss', 'content': 0.026276826858520508, 'timestamp': '2025-10-02 00:55:04.834523', 'step': 24517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 640], 'flops': 12800077771264.0}, 'timestamp': '2025-10-02 00:55:04.930992', 'step': 24517, 'epoch': 3}
{'type': 'loss', 'content': 0.00564459478482604, 'timestamp': '2025-10-02 00:55:04.948158', 'step': 24518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:05.005116', 'step': 24518, 'epoch': 3}
{'type': 'loss', 'content': 0.07819025963544846, 'timestamp': '2025-10-02 00:55:05.014412', 'step': 24519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:05.078349', 'step': 24519, 'epoch': 3}
{'type': 'loss', 'content': 0.019943345338106155, 'timestamp': '2025-10-02 00:55:05.089607', 'step': 24520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:05.146483', 'step': 24520, 'epoch': 3}
{'type': 'loss', 'content': 0.032272566109895706, 'timestamp': '2025-10-02 00:55:05.156830', 'step': 24521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:05.213061', 'step': 24521, 'epoch': 3}
{'type': 'loss', 'content': 0.04093538597226143, 'timestamp': '2025-10-02 00:55:05.216266', 'step': 24522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:05.271456', 'step': 24522, 'epoch': 3}
{'type': 'loss', 'content': 0.03549791872501373, 'timestamp': '2025-10-02 00:55:05.277205', 'step': 24523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:05.332930', 'step': 24523, 'epoch': 3}
{'type': 'loss', 'content': 0.04019337147474289, 'timestamp': '2025-10-02 00:55:05.340809', 'step': 24524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:55:05.423246', 'step': 24524, 'epoch': 3}
{'type': 'loss', 'content': 0.014709429815411568, 'timestamp': '2025-10-02 00:55:05.439527', 'step': 24525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:05.494032', 'step': 24525, 'epoch': 3}
{'type': 'loss', 'content': 0.060403212904930115, 'timestamp': '2025-10-02 00:55:05.496949', 'step': 24526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:55:05.560154', 'step': 24526, 'epoch': 3}
{'type': 'loss', 'content': 0.0370514951646328, 'timestamp': '2025-10-02 00:55:05.570954', 'step': 24527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:05.625426', 'step': 24527, 'epoch': 3}
{'type': 'loss', 'content': 0.09949816018342972, 'timestamp': '2025-10-02 00:55:05.632296', 'step': 24528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:05.686194', 'step': 24528, 'epoch': 3}
{'type': 'loss', 'content': 0.06249081343412399, 'timestamp': '2025-10-02 00:55:05.688117', 'step': 24529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:05.741833', 'step': 24529, 'epoch': 3}
{'type': 'loss', 'content': 0.1207512840628624, 'timestamp': '2025-10-02 00:55:05.743812', 'step': 24530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:05.804994', 'step': 24530, 'epoch': 3}
{'type': 'loss', 'content': 0.02535921335220337, 'timestamp': '2025-10-02 00:55:05.815496', 'step': 24531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:05.870239', 'step': 24531, 'epoch': 3}
{'type': 'loss', 'content': 0.0576963908970356, 'timestamp': '2025-10-02 00:55:05.879513', 'step': 24532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:05.933799', 'step': 24532, 'epoch': 3}
{'type': 'loss', 'content': 0.053104981780052185, 'timestamp': '2025-10-02 00:55:05.936295', 'step': 24533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:05.990874', 'step': 24533, 'epoch': 3}
{'type': 'loss', 'content': 0.0753452479839325, 'timestamp': '2025-10-02 00:55:05.993494', 'step': 24534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:06.054419', 'step': 24534, 'epoch': 3}
{'type': 'loss', 'content': 0.020065126940608025, 'timestamp': '2025-10-02 00:55:06.064843', 'step': 24535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:06.119454', 'step': 24535, 'epoch': 3}
{'type': 'loss', 'content': 0.01958397775888443, 'timestamp': '2025-10-02 00:55:06.125641', 'step': 24536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:06.179812', 'step': 24536, 'epoch': 3}
{'type': 'loss', 'content': 0.0390220507979393, 'timestamp': '2025-10-02 00:55:06.181729', 'step': 24537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:06.237463', 'step': 24537, 'epoch': 3}
{'type': 'loss', 'content': 0.03635333850979805, 'timestamp': '2025-10-02 00:55:06.239442', 'step': 24538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:06.293952', 'step': 24538, 'epoch': 3}
{'type': 'loss', 'content': 0.018842527642846107, 'timestamp': '2025-10-02 00:55:06.296438', 'step': 24539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:06.350422', 'step': 24539, 'epoch': 3}
{'type': 'loss', 'content': 0.04534792900085449, 'timestamp': '2025-10-02 00:55:06.356268', 'step': 24540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:06.409861', 'step': 24540, 'epoch': 3}
{'type': 'loss', 'content': 0.06467287987470627, 'timestamp': '2025-10-02 00:55:06.412391', 'step': 24541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:06.467674', 'step': 24541, 'epoch': 3}
{'type': 'loss', 'content': 0.043929554522037506, 'timestamp': '2025-10-02 00:55:06.477188', 'step': 24542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:06.532094', 'step': 24542, 'epoch': 3}
{'type': 'loss', 'content': 0.10518456250429153, 'timestamp': '2025-10-02 00:55:06.534438', 'step': 24543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:06.589107', 'step': 24543, 'epoch': 3}
{'type': 'loss', 'content': 0.07944938540458679, 'timestamp': '2025-10-02 00:55:06.594676', 'step': 24544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:06.648670', 'step': 24544, 'epoch': 3}
{'type': 'loss', 'content': 0.07053825259208679, 'timestamp': '2025-10-02 00:55:06.654041', 'step': 24545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:06.717026', 'step': 24545, 'epoch': 3}
{'type': 'loss', 'content': 0.042405012995004654, 'timestamp': '2025-10-02 00:55:06.719335', 'step': 24546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:06.780243', 'step': 24546, 'epoch': 3}
{'type': 'loss', 'content': 0.05572866275906563, 'timestamp': '2025-10-02 00:55:06.782399', 'step': 24547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:06.837686', 'step': 24547, 'epoch': 3}
{'type': 'loss', 'content': 0.012967709451913834, 'timestamp': '2025-10-02 00:55:06.843722', 'step': 24548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:06.904156', 'step': 24548, 'epoch': 3}
{'type': 'loss', 'content': 0.005200853571295738, 'timestamp': '2025-10-02 00:55:06.915458', 'step': 24549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:06.969890', 'step': 24549, 'epoch': 3}
{'type': 'loss', 'content': 0.06993507593870163, 'timestamp': '2025-10-02 00:55:06.972087', 'step': 24550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:07.026192', 'step': 24550, 'epoch': 3}
{'type': 'loss', 'content': 0.08700740337371826, 'timestamp': '2025-10-02 00:55:07.028483', 'step': 24551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:07.082623', 'step': 24551, 'epoch': 3}
{'type': 'loss', 'content': 0.045733846724033356, 'timestamp': '2025-10-02 00:55:07.088293', 'step': 24552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:07.142784', 'step': 24552, 'epoch': 3}
{'type': 'loss', 'content': 0.15478496253490448, 'timestamp': '2025-10-02 00:55:07.144906', 'step': 24553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:07.199242', 'step': 24553, 'epoch': 3}
{'type': 'loss', 'content': 0.016496580094099045, 'timestamp': '2025-10-02 00:55:07.201533', 'step': 24554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:07.255907', 'step': 24554, 'epoch': 3}
{'type': 'loss', 'content': 0.06229254975914955, 'timestamp': '2025-10-02 00:55:07.261466', 'step': 24555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:07.317298', 'step': 24555, 'epoch': 3}
{'type': 'loss', 'content': 0.025058947503566742, 'timestamp': '2025-10-02 00:55:07.322894', 'step': 24556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:07.378231', 'step': 24556, 'epoch': 3}
{'type': 'loss', 'content': 0.018154306337237358, 'timestamp': '2025-10-02 00:55:07.383979', 'step': 24557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:07.438646', 'step': 24557, 'epoch': 3}
{'type': 'loss', 'content': 0.03242470324039459, 'timestamp': '2025-10-02 00:55:07.440881', 'step': 24558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:07.495707', 'step': 24558, 'epoch': 3}
{'type': 'loss', 'content': 0.06112489104270935, 'timestamp': '2025-10-02 00:55:07.497590', 'step': 24559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:07.551956', 'step': 24559, 'epoch': 3}
{'type': 'loss', 'content': 0.11386815458536148, 'timestamp': '2025-10-02 00:55:07.561851', 'step': 24560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:07.618045', 'step': 24560, 'epoch': 3}
{'type': 'loss', 'content': 0.1436471939086914, 'timestamp': '2025-10-02 00:55:07.620046', 'step': 24561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:07.675002', 'step': 24561, 'epoch': 3}
{'type': 'loss', 'content': 0.12639714777469635, 'timestamp': '2025-10-02 00:55:07.676984', 'step': 24562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:07.732733', 'step': 24562, 'epoch': 3}
{'type': 'loss', 'content': 0.05787081643939018, 'timestamp': '2025-10-02 00:55:07.742210', 'step': 24563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:07.796726', 'step': 24563, 'epoch': 3}
{'type': 'loss', 'content': 0.06518981605768204, 'timestamp': '2025-10-02 00:55:07.802899', 'step': 24564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:07.857070', 'step': 24564, 'epoch': 3}
{'type': 'loss', 'content': 0.04893412068486214, 'timestamp': '2025-10-02 00:55:07.859432', 'step': 24565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:07.915374', 'step': 24565, 'epoch': 3}
{'type': 'loss', 'content': 0.04162922501564026, 'timestamp': '2025-10-02 00:55:07.924652', 'step': 24566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:07.984537', 'step': 24566, 'epoch': 3}
{'type': 'loss', 'content': 0.014303802512586117, 'timestamp': '2025-10-02 00:55:07.994688', 'step': 24567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:08.056130', 'step': 24567, 'epoch': 3}
{'type': 'loss', 'content': 0.03969527408480644, 'timestamp': '2025-10-02 00:55:08.067370', 'step': 24568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:08.122152', 'step': 24568, 'epoch': 3}
{'type': 'loss', 'content': 0.004741213750094175, 'timestamp': '2025-10-02 00:55:08.130917', 'step': 24569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:08.186134', 'step': 24569, 'epoch': 3}
{'type': 'loss', 'content': 0.021505726501345634, 'timestamp': '2025-10-02 00:55:08.195761', 'step': 24570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:08.250179', 'step': 24570, 'epoch': 3}
{'type': 'loss', 'content': 0.05374939739704132, 'timestamp': '2025-10-02 00:55:08.252486', 'step': 24571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:08.306956', 'step': 24571, 'epoch': 3}
{'type': 'loss', 'content': 0.0881427675485611, 'timestamp': '2025-10-02 00:55:08.312991', 'step': 24572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:08.367081', 'step': 24572, 'epoch': 3}
{'type': 'loss', 'content': 0.08357245475053787, 'timestamp': '2025-10-02 00:55:08.369643', 'step': 24573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:08.423499', 'step': 24573, 'epoch': 3}
{'type': 'loss', 'content': 0.10717311501502991, 'timestamp': '2025-10-02 00:55:08.425843', 'step': 24574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:08.481041', 'step': 24574, 'epoch': 3}
{'type': 'loss', 'content': 0.0006723938859067857, 'timestamp': '2025-10-02 00:55:08.488319', 'step': 24575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:08.548111', 'step': 24575, 'epoch': 3}
{'type': 'loss', 'content': 0.014384016394615173, 'timestamp': '2025-10-02 00:55:08.559172', 'step': 24576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:08.613188', 'step': 24576, 'epoch': 3}
{'type': 'loss', 'content': 0.06416825205087662, 'timestamp': '2025-10-02 00:55:08.615472', 'step': 24577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:08.669442', 'step': 24577, 'epoch': 3}
{'type': 'loss', 'content': 0.08094322681427002, 'timestamp': '2025-10-02 00:55:08.671819', 'step': 24578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:08.727753', 'step': 24578, 'epoch': 3}
{'type': 'loss', 'content': 0.012200120836496353, 'timestamp': '2025-10-02 00:55:08.734615', 'step': 24579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:08.788675', 'step': 24579, 'epoch': 3}
{'type': 'loss', 'content': 0.13221077620983124, 'timestamp': '2025-10-02 00:55:08.795156', 'step': 24580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:08.848875', 'step': 24580, 'epoch': 3}
{'type': 'loss', 'content': 0.054889604449272156, 'timestamp': '2025-10-02 00:55:08.851158', 'step': 24581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:08.909686', 'step': 24581, 'epoch': 3}
{'type': 'loss', 'content': 0.016427509486675262, 'timestamp': '2025-10-02 00:55:08.919204', 'step': 24582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:08.974734', 'step': 24582, 'epoch': 3}
{'type': 'loss', 'content': 0.07157424837350845, 'timestamp': '2025-10-02 00:55:08.976933', 'step': 24583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:09.031834', 'step': 24583, 'epoch': 3}
{'type': 'loss', 'content': 0.051642969250679016, 'timestamp': '2025-10-02 00:55:09.039381', 'step': 24584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:09.093602', 'step': 24584, 'epoch': 3}
{'type': 'loss', 'content': 0.015259169973433018, 'timestamp': '2025-10-02 00:55:09.096676', 'step': 24585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:09.150899', 'step': 24585, 'epoch': 3}
{'type': 'loss', 'content': 0.03506682068109512, 'timestamp': '2025-10-02 00:55:09.158193', 'step': 24586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:09.217684', 'step': 24586, 'epoch': 3}
{'type': 'loss', 'content': 0.0289120152592659, 'timestamp': '2025-10-02 00:55:09.227166', 'step': 24587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:09.281936', 'step': 24587, 'epoch': 3}
{'type': 'loss', 'content': 0.1256493628025055, 'timestamp': '2025-10-02 00:55:09.288065', 'step': 24588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:09.341510', 'step': 24588, 'epoch': 3}
{'type': 'loss', 'content': 0.031564343720674515, 'timestamp': '2025-10-02 00:55:09.349521', 'step': 24589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:09.427457', 'step': 24589, 'epoch': 3}
{'type': 'loss', 'content': 0.03676142543554306, 'timestamp': '2025-10-02 00:55:09.429899', 'step': 24590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:09.483459', 'step': 24590, 'epoch': 3}
{'type': 'loss', 'content': 0.09020639955997467, 'timestamp': '2025-10-02 00:55:09.485231', 'step': 24591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:09.541382', 'step': 24591, 'epoch': 3}
{'type': 'loss', 'content': 0.01927664689719677, 'timestamp': '2025-10-02 00:55:09.548405', 'step': 24592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:09.603747', 'step': 24592, 'epoch': 3}
{'type': 'loss', 'content': 0.03818308934569359, 'timestamp': '2025-10-02 00:55:09.605833', 'step': 24593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:09.661987', 'step': 24593, 'epoch': 3}
{'type': 'loss', 'content': 0.09473579376935959, 'timestamp': '2025-10-02 00:55:09.664220', 'step': 24594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:09.718396', 'step': 24594, 'epoch': 3}
{'type': 'loss', 'content': 0.042694613337516785, 'timestamp': '2025-10-02 00:55:09.720575', 'step': 24595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:09.774505', 'step': 24595, 'epoch': 3}
{'type': 'loss', 'content': 0.06356080621480942, 'timestamp': '2025-10-02 00:55:09.780434', 'step': 24596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:09.837512', 'step': 24596, 'epoch': 3}
{'type': 'loss', 'content': 0.05267604440450668, 'timestamp': '2025-10-02 00:55:09.839636', 'step': 24597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:09.897586', 'step': 24597, 'epoch': 3}
{'type': 'loss', 'content': 0.04824882373213768, 'timestamp': '2025-10-02 00:55:09.905177', 'step': 24598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:09.960526', 'step': 24598, 'epoch': 3}
{'type': 'loss', 'content': 0.03459075838327408, 'timestamp': '2025-10-02 00:55:09.963403', 'step': 24599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:10.021927', 'step': 24599, 'epoch': 3}
{'type': 'loss', 'content': 0.06344469636678696, 'timestamp': '2025-10-02 00:55:10.027509', 'step': 24600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:10.081390', 'step': 24600, 'epoch': 3}
{'type': 'loss', 'content': 0.05283431336283684, 'timestamp': '2025-10-02 00:55:10.090820', 'step': 24601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:10.149088', 'step': 24601, 'epoch': 3}
{'type': 'loss', 'content': 0.0342651903629303, 'timestamp': '2025-10-02 00:55:10.151642', 'step': 24602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:10.207936', 'step': 24602, 'epoch': 3}
{'type': 'loss', 'content': 0.0687946155667305, 'timestamp': '2025-10-02 00:55:10.210626', 'step': 24603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:10.264965', 'step': 24603, 'epoch': 3}
{'type': 'loss', 'content': 0.07681272178888321, 'timestamp': '2025-10-02 00:55:10.271166', 'step': 24604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:10.325628', 'step': 24604, 'epoch': 3}
{'type': 'loss', 'content': 0.054092440754175186, 'timestamp': '2025-10-02 00:55:10.330703', 'step': 24605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:10.385792', 'step': 24605, 'epoch': 3}
{'type': 'loss', 'content': 0.0695389062166214, 'timestamp': '2025-10-02 00:55:10.391371', 'step': 24606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:10.445997', 'step': 24606, 'epoch': 3}
{'type': 'loss', 'content': 0.03910035640001297, 'timestamp': '2025-10-02 00:55:10.448862', 'step': 24607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:10.503260', 'step': 24607, 'epoch': 3}
{'type': 'loss', 'content': 0.03198501467704773, 'timestamp': '2025-10-02 00:55:10.509673', 'step': 24608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:10.563194', 'step': 24608, 'epoch': 3}
{'type': 'loss', 'content': 0.014870150946080685, 'timestamp': '2025-10-02 00:55:10.565690', 'step': 24609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:10.621343', 'step': 24609, 'epoch': 3}
{'type': 'loss', 'content': 0.02030758000910282, 'timestamp': '2025-10-02 00:55:10.623715', 'step': 24610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:10.678681', 'step': 24610, 'epoch': 3}
{'type': 'loss', 'content': 0.0899110659956932, 'timestamp': '2025-10-02 00:55:10.680996', 'step': 24611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:10.735707', 'step': 24611, 'epoch': 3}
{'type': 'loss', 'content': 0.023917727172374725, 'timestamp': '2025-10-02 00:55:10.741933', 'step': 24612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:55:10.803008', 'step': 24612, 'epoch': 3}
{'type': 'loss', 'content': 0.039492178708314896, 'timestamp': '2025-10-02 00:55:10.814522', 'step': 24613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:10.868662', 'step': 24613, 'epoch': 3}
{'type': 'loss', 'content': 0.09558332711458206, 'timestamp': '2025-10-02 00:55:10.871112', 'step': 24614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:10.935129', 'step': 24614, 'epoch': 3}
{'type': 'loss', 'content': 0.009651425294578075, 'timestamp': '2025-10-02 00:55:10.945537', 'step': 24615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 00:55:11.034305', 'step': 24615, 'epoch': 3}
{'type': 'loss', 'content': 0.009146502241492271, 'timestamp': '2025-10-02 00:55:11.051536', 'step': 24616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:11.107095', 'step': 24616, 'epoch': 3}
{'type': 'loss', 'content': 0.03397708013653755, 'timestamp': '2025-10-02 00:55:11.109885', 'step': 24617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:11.163709', 'step': 24617, 'epoch': 3}
{'type': 'loss', 'content': 0.06523929536342621, 'timestamp': '2025-10-02 00:55:11.166238', 'step': 24618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:11.220116', 'step': 24618, 'epoch': 3}
{'type': 'loss', 'content': 0.07880731672048569, 'timestamp': '2025-10-02 00:55:11.222542', 'step': 24619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:55:11.276300', 'step': 24619, 'epoch': 3}
{'type': 'loss', 'content': 0.12917551398277283, 'timestamp': '2025-10-02 00:55:11.282812', 'step': 24620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:11.336770', 'step': 24620, 'epoch': 3}
{'type': 'loss', 'content': 0.10320960730314255, 'timestamp': '2025-10-02 00:55:11.339494', 'step': 24621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:11.394820', 'step': 24621, 'epoch': 3}
{'type': 'loss', 'content': 0.06527721881866455, 'timestamp': '2025-10-02 00:55:11.396984', 'step': 24622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:11.452222', 'step': 24622, 'epoch': 3}
{'type': 'loss', 'content': 0.028698621317744255, 'timestamp': '2025-10-02 00:55:11.459587', 'step': 24623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:11.521300', 'step': 24623, 'epoch': 3}
{'type': 'loss', 'content': 0.015522311441600323, 'timestamp': '2025-10-02 00:55:11.532531', 'step': 24624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:11.593206', 'step': 24624, 'epoch': 3}
{'type': 'loss', 'content': 0.028187979012727737, 'timestamp': '2025-10-02 00:55:11.604539', 'step': 24625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:11.659247', 'step': 24625, 'epoch': 3}
{'type': 'loss', 'content': 0.02956102229654789, 'timestamp': '2025-10-02 00:55:11.666897', 'step': 24626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:11.727193', 'step': 24626, 'epoch': 3}
{'type': 'loss', 'content': 0.02056683972477913, 'timestamp': '2025-10-02 00:55:11.737376', 'step': 24627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:11.791629', 'step': 24627, 'epoch': 3}
{'type': 'loss', 'content': 0.028089622035622597, 'timestamp': '2025-10-02 00:55:11.798090', 'step': 24628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:11.852066', 'step': 24628, 'epoch': 3}
{'type': 'loss', 'content': 0.027062129229307175, 'timestamp': '2025-10-02 00:55:11.854476', 'step': 24629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:11.908914', 'step': 24629, 'epoch': 3}
{'type': 'loss', 'content': 0.08572354167699814, 'timestamp': '2025-10-02 00:55:11.911068', 'step': 24630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:11.965464', 'step': 24630, 'epoch': 3}
{'type': 'loss', 'content': 0.03581603616476059, 'timestamp': '2025-10-02 00:55:11.967910', 'step': 24631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:12.022461', 'step': 24631, 'epoch': 3}
{'type': 'loss', 'content': 0.11165133118629456, 'timestamp': '2025-10-02 00:55:12.028618', 'step': 24632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:12.082888', 'step': 24632, 'epoch': 3}
{'type': 'loss', 'content': 0.008717665448784828, 'timestamp': '2025-10-02 00:55:12.085390', 'step': 24633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:12.141104', 'step': 24633, 'epoch': 3}
{'type': 'loss', 'content': 0.05357295274734497, 'timestamp': '2025-10-02 00:55:12.150610', 'step': 24634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:12.205350', 'step': 24634, 'epoch': 3}
{'type': 'loss', 'content': 0.04247438162565231, 'timestamp': '2025-10-02 00:55:12.207898', 'step': 24635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:12.262191', 'step': 24635, 'epoch': 3}
{'type': 'loss', 'content': 0.020090242847800255, 'timestamp': '2025-10-02 00:55:12.270478', 'step': 24636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:12.325041', 'step': 24636, 'epoch': 3}
{'type': 'loss', 'content': 0.024268923327326775, 'timestamp': '2025-10-02 00:55:12.327737', 'step': 24637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:12.382232', 'step': 24637, 'epoch': 3}
{'type': 'loss', 'content': 0.039972707629203796, 'timestamp': '2025-10-02 00:55:12.384960', 'step': 24638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:12.441158', 'step': 24638, 'epoch': 3}
{'type': 'loss', 'content': 0.05443182215094566, 'timestamp': '2025-10-02 00:55:12.444545', 'step': 24639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:12.498691', 'step': 24639, 'epoch': 3}
{'type': 'loss', 'content': 0.010496572591364384, 'timestamp': '2025-10-02 00:55:12.504641', 'step': 24640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:12.562613', 'step': 24640, 'epoch': 3}
{'type': 'loss', 'content': 0.05150933936238289, 'timestamp': '2025-10-02 00:55:12.564763', 'step': 24641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:12.618629', 'step': 24641, 'epoch': 3}
{'type': 'loss', 'content': 0.06169384345412254, 'timestamp': '2025-10-02 00:55:12.620905', 'step': 24642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:12.676527', 'step': 24642, 'epoch': 3}
{'type': 'loss', 'content': 0.07922407239675522, 'timestamp': '2025-10-02 00:55:12.685820', 'step': 24643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:12.742686', 'step': 24643, 'epoch': 3}
{'type': 'loss', 'content': 0.026548950001597404, 'timestamp': '2025-10-02 00:55:12.750470', 'step': 24644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:12.805718', 'step': 24644, 'epoch': 3}
{'type': 'loss', 'content': 0.024201471358537674, 'timestamp': '2025-10-02 00:55:12.809026', 'step': 24645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:12.863652', 'step': 24645, 'epoch': 3}
{'type': 'loss', 'content': 0.029398811981081963, 'timestamp': '2025-10-02 00:55:12.866287', 'step': 24646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:12.922845', 'step': 24646, 'epoch': 3}
{'type': 'loss', 'content': 0.07082536816596985, 'timestamp': '2025-10-02 00:55:12.925634', 'step': 24647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:55:13.002171', 'step': 24647, 'epoch': 3}
{'type': 'loss', 'content': 0.007813780568540096, 'timestamp': '2025-10-02 00:55:13.016880', 'step': 24648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:13.073806', 'step': 24648, 'epoch': 3}
{'type': 'loss', 'content': 0.12809467315673828, 'timestamp': '2025-10-02 00:55:13.081143', 'step': 24649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:13.138650', 'step': 24649, 'epoch': 3}
{'type': 'loss', 'content': 0.002968085464090109, 'timestamp': '2025-10-02 00:55:13.142232', 'step': 24650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:13.197430', 'step': 24650, 'epoch': 3}
{'type': 'loss', 'content': 0.052554089576005936, 'timestamp': '2025-10-02 00:55:13.200306', 'step': 24651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:13.254874', 'step': 24651, 'epoch': 3}
{'type': 'loss', 'content': 0.10873356461524963, 'timestamp': '2025-10-02 00:55:13.260847', 'step': 24652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:13.317272', 'step': 24652, 'epoch': 3}
{'type': 'loss', 'content': 0.06249416619539261, 'timestamp': '2025-10-02 00:55:13.319582', 'step': 24653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:13.380159', 'step': 24653, 'epoch': 3}
{'type': 'loss', 'content': 0.00626399414613843, 'timestamp': '2025-10-02 00:55:13.390358', 'step': 24654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:13.446639', 'step': 24654, 'epoch': 3}
{'type': 'loss', 'content': 0.03303145244717598, 'timestamp': '2025-10-02 00:55:13.456144', 'step': 24655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:13.514775', 'step': 24655, 'epoch': 3}
{'type': 'loss', 'content': 0.03671402111649513, 'timestamp': '2025-10-02 00:55:13.521209', 'step': 24656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:13.577095', 'step': 24656, 'epoch': 3}
{'type': 'loss', 'content': 0.05147663876414299, 'timestamp': '2025-10-02 00:55:13.584626', 'step': 24657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:13.643088', 'step': 24657, 'epoch': 3}
{'type': 'loss', 'content': 0.050965942442417145, 'timestamp': '2025-10-02 00:55:13.646396', 'step': 24658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:13.702746', 'step': 24658, 'epoch': 3}
{'type': 'loss', 'content': 0.0354214683175087, 'timestamp': '2025-10-02 00:55:13.706012', 'step': 24659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:13.762788', 'step': 24659, 'epoch': 3}
{'type': 'loss', 'content': 0.0006958579178899527, 'timestamp': '2025-10-02 00:55:13.769110', 'step': 24660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:13.829176', 'step': 24660, 'epoch': 3}
{'type': 'loss', 'content': 0.0022338582202792168, 'timestamp': '2025-10-02 00:55:13.840121', 'step': 24661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:13.897476', 'step': 24661, 'epoch': 3}
{'type': 'loss', 'content': 0.051666051149368286, 'timestamp': '2025-10-02 00:55:13.900386', 'step': 24662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:13.955233', 'step': 24662, 'epoch': 3}
{'type': 'loss', 'content': 0.05532712861895561, 'timestamp': '2025-10-02 00:55:13.958640', 'step': 24663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:14.014947', 'step': 24663, 'epoch': 3}
{'type': 'loss', 'content': 0.008565026335418224, 'timestamp': '2025-10-02 00:55:14.021138', 'step': 24664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:14.076319', 'step': 24664, 'epoch': 3}
{'type': 'loss', 'content': 0.05610016733407974, 'timestamp': '2025-10-02 00:55:14.080642', 'step': 24665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:14.135699', 'step': 24665, 'epoch': 3}
{'type': 'loss', 'content': 0.13570407032966614, 'timestamp': '2025-10-02 00:55:14.138043', 'step': 24666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:14.192683', 'step': 24666, 'epoch': 3}
{'type': 'loss', 'content': 0.029874399304389954, 'timestamp': '2025-10-02 00:55:14.198468', 'step': 24667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:14.261279', 'step': 24667, 'epoch': 3}
{'type': 'loss', 'content': 0.019470080733299255, 'timestamp': '2025-10-02 00:55:14.267153', 'step': 24668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:14.321352', 'step': 24668, 'epoch': 3}
{'type': 'loss', 'content': 0.04651474580168724, 'timestamp': '2025-10-02 00:55:14.330487', 'step': 24669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:14.384559', 'step': 24669, 'epoch': 3}
{'type': 'loss', 'content': 0.08268052339553833, 'timestamp': '2025-10-02 00:55:14.387032', 'step': 24670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:14.442911', 'step': 24670, 'epoch': 3}
{'type': 'loss', 'content': 0.00013035364099778235, 'timestamp': '2025-10-02 00:55:14.445289', 'step': 24671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:14.499538', 'step': 24671, 'epoch': 3}
{'type': 'loss', 'content': 0.09614630788564682, 'timestamp': '2025-10-02 00:55:14.505430', 'step': 24672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:14.558867', 'step': 24672, 'epoch': 3}
{'type': 'loss', 'content': 0.055882979184389114, 'timestamp': '2025-10-02 00:55:14.561146', 'step': 24673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:14.615285', 'step': 24673, 'epoch': 3}
{'type': 'loss', 'content': 0.03175826370716095, 'timestamp': '2025-10-02 00:55:14.617486', 'step': 24674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:14.671355', 'step': 24674, 'epoch': 3}
{'type': 'loss', 'content': 0.07644139975309372, 'timestamp': '2025-10-02 00:55:14.673904', 'step': 24675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:14.728140', 'step': 24675, 'epoch': 3}
{'type': 'loss', 'content': 0.036464083939790726, 'timestamp': '2025-10-02 00:55:14.734156', 'step': 24676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:14.787494', 'step': 24676, 'epoch': 3}
{'type': 'loss', 'content': 0.1098245233297348, 'timestamp': '2025-10-02 00:55:14.789777', 'step': 24677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:14.844692', 'step': 24677, 'epoch': 3}
{'type': 'loss', 'content': 0.0195537731051445, 'timestamp': '2025-10-02 00:55:14.847290', 'step': 24678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:14.902236', 'step': 24678, 'epoch': 3}
{'type': 'loss', 'content': 0.020862914621829987, 'timestamp': '2025-10-02 00:55:14.907912', 'step': 24679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:14.962387', 'step': 24679, 'epoch': 3}
{'type': 'loss', 'content': 0.05633104220032692, 'timestamp': '2025-10-02 00:55:14.968026', 'step': 24680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:15.028286', 'step': 24680, 'epoch': 3}
{'type': 'loss', 'content': 0.0006773502682335675, 'timestamp': '2025-10-02 00:55:15.039598', 'step': 24681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:15.095804', 'step': 24681, 'epoch': 3}
{'type': 'loss', 'content': 0.02491685375571251, 'timestamp': '2025-10-02 00:55:15.101630', 'step': 24682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:15.156679', 'step': 24682, 'epoch': 3}
{'type': 'loss', 'content': 0.037931255996227264, 'timestamp': '2025-10-02 00:55:15.159061', 'step': 24683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:15.213682', 'step': 24683, 'epoch': 3}
{'type': 'loss', 'content': 0.06864703446626663, 'timestamp': '2025-10-02 00:55:15.219466', 'step': 24684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:15.274114', 'step': 24684, 'epoch': 3}
{'type': 'loss', 'content': 0.02869063802063465, 'timestamp': '2025-10-02 00:55:15.276696', 'step': 24685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:15.332455', 'step': 24685, 'epoch': 3}
{'type': 'loss', 'content': 0.03601200506091118, 'timestamp': '2025-10-02 00:55:15.334854', 'step': 24686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:15.389318', 'step': 24686, 'epoch': 3}
{'type': 'loss', 'content': 0.026404401287436485, 'timestamp': '2025-10-02 00:55:15.391794', 'step': 24687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:15.446916', 'step': 24687, 'epoch': 3}
{'type': 'loss', 'content': 0.0739012286067009, 'timestamp': '2025-10-02 00:55:15.453359', 'step': 24688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:15.507761', 'step': 24688, 'epoch': 3}
{'type': 'loss', 'content': 0.10218963772058487, 'timestamp': '2025-10-02 00:55:15.510399', 'step': 24689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:15.565392', 'step': 24689, 'epoch': 3}
{'type': 'loss', 'content': 0.04721924290060997, 'timestamp': '2025-10-02 00:55:15.570972', 'step': 24690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:15.625466', 'step': 24690, 'epoch': 3}
{'type': 'loss', 'content': 0.10723493248224258, 'timestamp': '2025-10-02 00:55:15.627647', 'step': 24691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:15.682782', 'step': 24691, 'epoch': 3}
{'type': 'loss', 'content': 0.018360765650868416, 'timestamp': '2025-10-02 00:55:15.689257', 'step': 24692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:15.751018', 'step': 24692, 'epoch': 3}
{'type': 'loss', 'content': 0.02328873798251152, 'timestamp': '2025-10-02 00:55:15.753544', 'step': 24693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:15.808793', 'step': 24693, 'epoch': 3}
{'type': 'loss', 'content': 0.03343859314918518, 'timestamp': '2025-10-02 00:55:15.811425', 'step': 24694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:15.867878', 'step': 24694, 'epoch': 3}
{'type': 'loss', 'content': 0.029481640085577965, 'timestamp': '2025-10-02 00:55:15.870222', 'step': 24695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:55:15.945327', 'step': 24695, 'epoch': 3}
{'type': 'loss', 'content': 0.002787825418636203, 'timestamp': '2025-10-02 00:55:15.959481', 'step': 24696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:16.013348', 'step': 24696, 'epoch': 3}
{'type': 'loss', 'content': 0.04772032052278519, 'timestamp': '2025-10-02 00:55:16.018607', 'step': 24697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:16.072705', 'step': 24697, 'epoch': 3}
{'type': 'loss', 'content': 0.03216922655701637, 'timestamp': '2025-10-02 00:55:16.080109', 'step': 24698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:16.135123', 'step': 24698, 'epoch': 3}
{'type': 'loss', 'content': 0.0025597484782338142, 'timestamp': '2025-10-02 00:55:16.142043', 'step': 24699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:16.201238', 'step': 24699, 'epoch': 3}
{'type': 'loss', 'content': 0.02731255069375038, 'timestamp': '2025-10-02 00:55:16.212285', 'step': 24700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:16.266241', 'step': 24700, 'epoch': 3}
{'type': 'loss', 'content': 0.0352725051343441, 'timestamp': '2025-10-02 00:55:16.268917', 'step': 24701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:16.323872', 'step': 24701, 'epoch': 3}
{'type': 'loss', 'content': 0.08335968852043152, 'timestamp': '2025-10-02 00:55:16.326173', 'step': 24702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:16.380620', 'step': 24702, 'epoch': 3}
{'type': 'loss', 'content': 0.0698479413986206, 'timestamp': '2025-10-02 00:55:16.382825', 'step': 24703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:55:16.445091', 'step': 24703, 'epoch': 3}
{'type': 'loss', 'content': 0.01837312802672386, 'timestamp': '2025-10-02 00:55:16.456599', 'step': 24704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:16.514808', 'step': 24704, 'epoch': 3}
{'type': 'loss', 'content': 0.00800101924687624, 'timestamp': '2025-10-02 00:55:16.524036', 'step': 24705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:16.578766', 'step': 24705, 'epoch': 3}
{'type': 'loss', 'content': 0.010605363175272942, 'timestamp': '2025-10-02 00:55:16.581984', 'step': 24706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:16.638404', 'step': 24706, 'epoch': 3}
{'type': 'loss', 'content': 0.12914204597473145, 'timestamp': '2025-10-02 00:55:16.641071', 'step': 24707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:16.695614', 'step': 24707, 'epoch': 3}
{'type': 'loss', 'content': 0.06226619333028793, 'timestamp': '2025-10-02 00:55:16.701720', 'step': 24708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:16.756333', 'step': 24708, 'epoch': 3}
{'type': 'loss', 'content': 0.0623512826859951, 'timestamp': '2025-10-02 00:55:16.758482', 'step': 24709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:16.813084', 'step': 24709, 'epoch': 3}
{'type': 'loss', 'content': 0.0970214456319809, 'timestamp': '2025-10-02 00:55:16.815164', 'step': 24710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:16.869059', 'step': 24710, 'epoch': 3}
{'type': 'loss', 'content': 0.08102639019489288, 'timestamp': '2025-10-02 00:55:16.871343', 'step': 24711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:16.925533', 'step': 24711, 'epoch': 3}
{'type': 'loss', 'content': 0.15527859330177307, 'timestamp': '2025-10-02 00:55:16.931893', 'step': 24712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:16.985750', 'step': 24712, 'epoch': 3}
{'type': 'loss', 'content': 0.018515288829803467, 'timestamp': '2025-10-02 00:55:16.988039', 'step': 24713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:17.041867', 'step': 24713, 'epoch': 3}
{'type': 'loss', 'content': 0.08146945387125015, 'timestamp': '2025-10-02 00:55:17.044423', 'step': 24714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:17.101100', 'step': 24714, 'epoch': 3}
{'type': 'loss', 'content': 0.04545469209551811, 'timestamp': '2025-10-02 00:55:17.103646', 'step': 24715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:17.158364', 'step': 24715, 'epoch': 3}
{'type': 'loss', 'content': 0.04828658699989319, 'timestamp': '2025-10-02 00:55:17.164365', 'step': 24716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:17.218622', 'step': 24716, 'epoch': 3}
{'type': 'loss', 'content': 0.07585038244724274, 'timestamp': '2025-10-02 00:55:17.222589', 'step': 24717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:17.277621', 'step': 24717, 'epoch': 3}
{'type': 'loss', 'content': 0.07064080238342285, 'timestamp': '2025-10-02 00:55:17.284623', 'step': 24718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:17.340329', 'step': 24718, 'epoch': 3}
{'type': 'loss', 'content': 0.06717544049024582, 'timestamp': '2025-10-02 00:55:17.347073', 'step': 24719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:17.404698', 'step': 24719, 'epoch': 3}
{'type': 'loss', 'content': 0.015272378921508789, 'timestamp': '2025-10-02 00:55:17.412109', 'step': 24720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:17.465644', 'step': 24720, 'epoch': 3}
{'type': 'loss', 'content': 0.0738091915845871, 'timestamp': '2025-10-02 00:55:17.468175', 'step': 24721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:17.522291', 'step': 24721, 'epoch': 3}
{'type': 'loss', 'content': 0.03997485712170601, 'timestamp': '2025-10-02 00:55:17.524754', 'step': 24722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:17.580544', 'step': 24722, 'epoch': 3}
{'type': 'loss', 'content': 0.02722734399139881, 'timestamp': '2025-10-02 00:55:17.582958', 'step': 24723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:17.638123', 'step': 24723, 'epoch': 3}
{'type': 'loss', 'content': 0.12226666510105133, 'timestamp': '2025-10-02 00:55:17.644197', 'step': 24724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:17.700786', 'step': 24724, 'epoch': 3}
{'type': 'loss', 'content': 0.03845867142081261, 'timestamp': '2025-10-02 00:55:17.703339', 'step': 24725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:17.759670', 'step': 24725, 'epoch': 3}
{'type': 'loss', 'content': 0.01915714144706726, 'timestamp': '2025-10-02 00:55:17.765587', 'step': 24726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:17.821179', 'step': 24726, 'epoch': 3}
{'type': 'loss', 'content': 0.1486242264509201, 'timestamp': '2025-10-02 00:55:17.823558', 'step': 24727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:17.877737', 'step': 24727, 'epoch': 3}
{'type': 'loss', 'content': 0.0702202171087265, 'timestamp': '2025-10-02 00:55:17.883959', 'step': 24728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:17.938815', 'step': 24728, 'epoch': 3}
{'type': 'loss', 'content': 0.0006661401130259037, 'timestamp': '2025-10-02 00:55:17.945827', 'step': 24729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:18.001306', 'step': 24729, 'epoch': 3}
{'type': 'loss', 'content': 0.07074929773807526, 'timestamp': '2025-10-02 00:55:18.003604', 'step': 24730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:18.058439', 'step': 24730, 'epoch': 3}
{'type': 'loss', 'content': 0.11548633873462677, 'timestamp': '2025-10-02 00:55:18.060733', 'step': 24731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:18.115271', 'step': 24731, 'epoch': 3}
{'type': 'loss', 'content': 0.0347420796751976, 'timestamp': '2025-10-02 00:55:18.123340', 'step': 24732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:18.177677', 'step': 24732, 'epoch': 3}
{'type': 'loss', 'content': 0.00733610987663269, 'timestamp': '2025-10-02 00:55:18.180387', 'step': 24733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:18.234695', 'step': 24733, 'epoch': 3}
{'type': 'loss', 'content': 0.044221848249435425, 'timestamp': '2025-10-02 00:55:18.236793', 'step': 24734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:18.291264', 'step': 24734, 'epoch': 3}
{'type': 'loss', 'content': 0.06463827192783356, 'timestamp': '2025-10-02 00:55:18.293514', 'step': 24735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:18.347621', 'step': 24735, 'epoch': 3}
{'type': 'loss', 'content': 0.09332290291786194, 'timestamp': '2025-10-02 00:55:18.353720', 'step': 24736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:18.407421', 'step': 24736, 'epoch': 3}
{'type': 'loss', 'content': 0.004099463578313589, 'timestamp': '2025-10-02 00:55:18.409838', 'step': 24737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:18.463847', 'step': 24737, 'epoch': 3}
{'type': 'loss', 'content': 0.018096067011356354, 'timestamp': '2025-10-02 00:55:18.466481', 'step': 24738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:18.520953', 'step': 24738, 'epoch': 3}
{'type': 'loss', 'content': 0.15638206899166107, 'timestamp': '2025-10-02 00:55:18.523312', 'step': 24739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:18.577748', 'step': 24739, 'epoch': 3}
{'type': 'loss', 'content': 0.06890580803155899, 'timestamp': '2025-10-02 00:55:18.584198', 'step': 24740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:18.638929', 'step': 24740, 'epoch': 3}
{'type': 'loss', 'content': 0.04740560054779053, 'timestamp': '2025-10-02 00:55:18.649023', 'step': 24741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:18.703790', 'step': 24741, 'epoch': 3}
{'type': 'loss', 'content': 0.061412323266267776, 'timestamp': '2025-10-02 00:55:18.709517', 'step': 24742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:18.764326', 'step': 24742, 'epoch': 3}
{'type': 'loss', 'content': 0.02403196506202221, 'timestamp': '2025-10-02 00:55:18.766583', 'step': 24743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:55:18.829624', 'step': 24743, 'epoch': 3}
{'type': 'loss', 'content': 0.03157878667116165, 'timestamp': '2025-10-02 00:55:18.841026', 'step': 24744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:18.895715', 'step': 24744, 'epoch': 3}
{'type': 'loss', 'content': 0.038785915821790695, 'timestamp': '2025-10-02 00:55:18.902929', 'step': 24745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:18.958440', 'step': 24745, 'epoch': 3}
{'type': 'loss', 'content': 0.06200985610485077, 'timestamp': '2025-10-02 00:55:18.961136', 'step': 24746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:19.016601', 'step': 24746, 'epoch': 3}
{'type': 'loss', 'content': 0.08024509996175766, 'timestamp': '2025-10-02 00:55:19.018612', 'step': 24747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:19.073282', 'step': 24747, 'epoch': 3}
{'type': 'loss', 'content': 0.03770855814218521, 'timestamp': '2025-10-02 00:55:19.079072', 'step': 24748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:19.132411', 'step': 24748, 'epoch': 3}
{'type': 'loss', 'content': 0.07467546314001083, 'timestamp': '2025-10-02 00:55:19.134693', 'step': 24749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:19.188754', 'step': 24749, 'epoch': 3}
{'type': 'loss', 'content': 0.060223668813705444, 'timestamp': '2025-10-02 00:55:19.191233', 'step': 24750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:19.247066', 'step': 24750, 'epoch': 3}
{'type': 'loss', 'content': 0.077232226729393, 'timestamp': '2025-10-02 00:55:19.256553', 'step': 24751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:19.312121', 'step': 24751, 'epoch': 3}
{'type': 'loss', 'content': 0.02216145396232605, 'timestamp': '2025-10-02 00:55:19.318573', 'step': 24752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:19.372908', 'step': 24752, 'epoch': 3}
{'type': 'loss', 'content': 0.0223256703466177, 'timestamp': '2025-10-02 00:55:19.382437', 'step': 24753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:19.436718', 'step': 24753, 'epoch': 3}
{'type': 'loss', 'content': 0.05534554272890091, 'timestamp': '2025-10-02 00:55:19.439118', 'step': 24754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:19.493877', 'step': 24754, 'epoch': 3}
{'type': 'loss', 'content': 0.0621831938624382, 'timestamp': '2025-10-02 00:55:19.496586', 'step': 24755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:19.551993', 'step': 24755, 'epoch': 3}
{'type': 'loss', 'content': 0.05565095692873001, 'timestamp': '2025-10-02 00:55:19.557820', 'step': 24756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:19.613748', 'step': 24756, 'epoch': 3}
{'type': 'loss', 'content': 0.02899184823036194, 'timestamp': '2025-10-02 00:55:19.615870', 'step': 24757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:19.670621', 'step': 24757, 'epoch': 3}
{'type': 'loss', 'content': 0.01390860229730606, 'timestamp': '2025-10-02 00:55:19.673121', 'step': 24758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:19.728248', 'step': 24758, 'epoch': 3}
{'type': 'loss', 'content': 0.03480051830410957, 'timestamp': '2025-10-02 00:55:19.730488', 'step': 24759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:19.785823', 'step': 24759, 'epoch': 3}
{'type': 'loss', 'content': 0.020276501774787903, 'timestamp': '2025-10-02 00:55:19.791723', 'step': 24760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:19.848775', 'step': 24760, 'epoch': 3}
{'type': 'loss', 'content': 0.03308279812335968, 'timestamp': '2025-10-02 00:55:19.851245', 'step': 24761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:19.905385', 'step': 24761, 'epoch': 3}
{'type': 'loss', 'content': 0.11145161092281342, 'timestamp': '2025-10-02 00:55:19.907798', 'step': 24762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:19.969762', 'step': 24762, 'epoch': 3}
{'type': 'loss', 'content': 0.025789683684706688, 'timestamp': '2025-10-02 00:55:19.980251', 'step': 24763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:55:20.048119', 'step': 24763, 'epoch': 3}
{'type': 'loss', 'content': 0.01106951292604208, 'timestamp': '2025-10-02 00:55:20.060887', 'step': 24764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:20.115127', 'step': 24764, 'epoch': 3}
{'type': 'loss', 'content': 0.03980088606476784, 'timestamp': '2025-10-02 00:55:20.124388', 'step': 24765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:20.179790', 'step': 24765, 'epoch': 3}
{'type': 'loss', 'content': 0.038876961916685104, 'timestamp': '2025-10-02 00:55:20.182217', 'step': 24766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:20.236384', 'step': 24766, 'epoch': 3}
{'type': 'loss', 'content': 0.0612398236989975, 'timestamp': '2025-10-02 00:55:20.243559', 'step': 24767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:20.298190', 'step': 24767, 'epoch': 3}
{'type': 'loss', 'content': 0.06643933057785034, 'timestamp': '2025-10-02 00:55:20.304592', 'step': 24768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:20.359734', 'step': 24768, 'epoch': 3}
{'type': 'loss', 'content': 0.06116301938891411, 'timestamp': '2025-10-02 00:55:20.366802', 'step': 24769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:20.422426', 'step': 24769, 'epoch': 3}
{'type': 'loss', 'content': 0.0015278250211849809, 'timestamp': '2025-10-02 00:55:20.429889', 'step': 24770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:20.485589', 'step': 24770, 'epoch': 3}
{'type': 'loss', 'content': 0.05510745197534561, 'timestamp': '2025-10-02 00:55:20.488060', 'step': 24771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:20.543221', 'step': 24771, 'epoch': 3}
{'type': 'loss', 'content': 0.027641575783491135, 'timestamp': '2025-10-02 00:55:20.552549', 'step': 24772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:20.608274', 'step': 24772, 'epoch': 3}
{'type': 'loss', 'content': 0.1249048188328743, 'timestamp': '2025-10-02 00:55:20.610645', 'step': 24773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:20.666599', 'step': 24773, 'epoch': 3}
{'type': 'loss', 'content': 0.03615172207355499, 'timestamp': '2025-10-02 00:55:20.668884', 'step': 24774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:55:20.730925', 'step': 24774, 'epoch': 3}
{'type': 'loss', 'content': 0.0561501644551754, 'timestamp': '2025-10-02 00:55:20.741565', 'step': 24775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:20.795904', 'step': 24775, 'epoch': 3}
{'type': 'loss', 'content': 0.07583139836788177, 'timestamp': '2025-10-02 00:55:20.802093', 'step': 24776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:20.856238', 'step': 24776, 'epoch': 3}
{'type': 'loss', 'content': 0.0840395987033844, 'timestamp': '2025-10-02 00:55:20.861717', 'step': 24777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:20.916446', 'step': 24777, 'epoch': 3}
{'type': 'loss', 'content': 0.014582112431526184, 'timestamp': '2025-10-02 00:55:20.918909', 'step': 24778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:20.974082', 'step': 24778, 'epoch': 3}
{'type': 'loss', 'content': 0.10635505616664886, 'timestamp': '2025-10-02 00:55:20.976538', 'step': 24779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:21.032093', 'step': 24779, 'epoch': 3}
{'type': 'loss', 'content': 0.004658449441194534, 'timestamp': '2025-10-02 00:55:21.042051', 'step': 24780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:21.096104', 'step': 24780, 'epoch': 3}
{'type': 'loss', 'content': 0.027643242850899696, 'timestamp': '2025-10-02 00:55:21.098141', 'step': 24781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:21.152370', 'step': 24781, 'epoch': 3}
{'type': 'loss', 'content': 0.06112319603562355, 'timestamp': '2025-10-02 00:55:21.154597', 'step': 24782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:21.216459', 'step': 24782, 'epoch': 3}
{'type': 'loss', 'content': 0.013965677469968796, 'timestamp': '2025-10-02 00:55:21.227049', 'step': 24783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:21.281362', 'step': 24783, 'epoch': 3}
{'type': 'loss', 'content': 0.019953927025198936, 'timestamp': '2025-10-02 00:55:21.289701', 'step': 24784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:21.344787', 'step': 24784, 'epoch': 3}
{'type': 'loss', 'content': 0.050736457109451294, 'timestamp': '2025-10-02 00:55:21.351831', 'step': 24785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:21.406121', 'step': 24785, 'epoch': 3}
{'type': 'loss', 'content': 0.04547271877527237, 'timestamp': '2025-10-02 00:55:21.408479', 'step': 24786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:21.464117', 'step': 24786, 'epoch': 3}
{'type': 'loss', 'content': 0.0725286453962326, 'timestamp': '2025-10-02 00:55:21.473612', 'step': 24787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:21.528114', 'step': 24787, 'epoch': 3}
{'type': 'loss', 'content': 0.06270240247249603, 'timestamp': '2025-10-02 00:55:21.538235', 'step': 24788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:21.593753', 'step': 24788, 'epoch': 3}
{'type': 'loss', 'content': 0.16055384278297424, 'timestamp': '2025-10-02 00:55:21.596081', 'step': 24789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:21.650832', 'step': 24789, 'epoch': 3}
{'type': 'loss', 'content': 0.10435003787279129, 'timestamp': '2025-10-02 00:55:21.653706', 'step': 24790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:21.709562', 'step': 24790, 'epoch': 3}
{'type': 'loss', 'content': 0.058973297476768494, 'timestamp': '2025-10-02 00:55:21.711980', 'step': 24791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:21.766033', 'step': 24791, 'epoch': 3}
{'type': 'loss', 'content': 0.060727331787347794, 'timestamp': '2025-10-02 00:55:21.771977', 'step': 24792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:21.829917', 'step': 24792, 'epoch': 3}
{'type': 'loss', 'content': 0.0381784625351429, 'timestamp': '2025-10-02 00:55:21.840854', 'step': 24793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:21.897763', 'step': 24793, 'epoch': 3}
{'type': 'loss', 'content': 0.0844191387295723, 'timestamp': '2025-10-02 00:55:21.901470', 'step': 24794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:21.956360', 'step': 24794, 'epoch': 3}
{'type': 'loss', 'content': 0.05900978669524193, 'timestamp': '2025-10-02 00:55:21.959374', 'step': 24795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:22.014944', 'step': 24795, 'epoch': 3}
{'type': 'loss', 'content': 0.14135941863059998, 'timestamp': '2025-10-02 00:55:22.020392', 'step': 24796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:22.076246', 'step': 24796, 'epoch': 3}
{'type': 'loss', 'content': 0.06856800615787506, 'timestamp': '2025-10-02 00:55:22.079111', 'step': 24797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:22.135447', 'step': 24797, 'epoch': 3}
{'type': 'loss', 'content': 0.04265270754694939, 'timestamp': '2025-10-02 00:55:22.144803', 'step': 24798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:22.199550', 'step': 24798, 'epoch': 3}
{'type': 'loss', 'content': 0.058100346475839615, 'timestamp': '2025-10-02 00:55:22.202600', 'step': 24799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:22.262204', 'step': 24799, 'epoch': 3}
{'type': 'loss', 'content': 0.0006484538316726685, 'timestamp': '2025-10-02 00:55:22.273136', 'step': 24800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:22.333678', 'step': 24800, 'epoch': 3}
{'type': 'loss', 'content': 0.049861740320920944, 'timestamp': '2025-10-02 00:55:22.344563', 'step': 24801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:22.400983', 'step': 24801, 'epoch': 3}
{'type': 'loss', 'content': 0.039044372737407684, 'timestamp': '2025-10-02 00:55:22.406711', 'step': 24802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:22.466421', 'step': 24802, 'epoch': 3}
{'type': 'loss', 'content': 0.04951624572277069, 'timestamp': '2025-10-02 00:55:22.468732', 'step': 24803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:22.527091', 'step': 24803, 'epoch': 3}
{'type': 'loss', 'content': 0.056781359016895294, 'timestamp': '2025-10-02 00:55:22.532904', 'step': 24804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:22.587510', 'step': 24804, 'epoch': 3}
{'type': 'loss', 'content': 0.0842338353395462, 'timestamp': '2025-10-02 00:55:22.589839', 'step': 24805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:22.647047', 'step': 24805, 'epoch': 3}
{'type': 'loss', 'content': 0.011302751488983631, 'timestamp': '2025-10-02 00:55:22.649596', 'step': 24806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:22.704313', 'step': 24806, 'epoch': 3}
{'type': 'loss', 'content': 0.04642398655414581, 'timestamp': '2025-10-02 00:55:22.706883', 'step': 24807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:22.763849', 'step': 24807, 'epoch': 3}
{'type': 'loss', 'content': 0.0832151249051094, 'timestamp': '2025-10-02 00:55:22.774094', 'step': 24808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:22.830427', 'step': 24808, 'epoch': 3}
{'type': 'loss', 'content': 0.04199374094605446, 'timestamp': '2025-10-02 00:55:22.833626', 'step': 24809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:22.889941', 'step': 24809, 'epoch': 3}
{'type': 'loss', 'content': 0.06903211027383804, 'timestamp': '2025-10-02 00:55:22.892730', 'step': 24810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:22.949483', 'step': 24810, 'epoch': 3}
{'type': 'loss', 'content': 0.02677171491086483, 'timestamp': '2025-10-02 00:55:22.953000', 'step': 24811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:55:23.022682', 'step': 24811, 'epoch': 3}
{'type': 'loss', 'content': 0.01567612588405609, 'timestamp': '2025-10-02 00:55:23.035419', 'step': 24812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:23.090203', 'step': 24812, 'epoch': 3}
{'type': 'loss', 'content': 0.031082170084118843, 'timestamp': '2025-10-02 00:55:23.093391', 'step': 24813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:23.149789', 'step': 24813, 'epoch': 3}
{'type': 'loss', 'content': 0.04413509741425514, 'timestamp': '2025-10-02 00:55:23.152686', 'step': 24814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:23.209816', 'step': 24814, 'epoch': 3}
{'type': 'loss', 'content': 0.0016024906653910875, 'timestamp': '2025-10-02 00:55:23.219061', 'step': 24815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:23.276985', 'step': 24815, 'epoch': 3}
{'type': 'loss', 'content': 0.11852036416530609, 'timestamp': '2025-10-02 00:55:23.283601', 'step': 24816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:23.339292', 'step': 24816, 'epoch': 3}
{'type': 'loss', 'content': 0.009668044745922089, 'timestamp': '2025-10-02 00:55:23.344950', 'step': 24817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:23.403165', 'step': 24817, 'epoch': 3}
{'type': 'loss', 'content': 0.08058758825063705, 'timestamp': '2025-10-02 00:55:23.406054', 'step': 24818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:23.462199', 'step': 24818, 'epoch': 3}
{'type': 'loss', 'content': 0.01931685023009777, 'timestamp': '2025-10-02 00:55:23.469283', 'step': 24819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:23.533354', 'step': 24819, 'epoch': 3}
{'type': 'loss', 'content': 0.011437925510108471, 'timestamp': '2025-10-02 00:55:23.544570', 'step': 24820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:23.600093', 'step': 24820, 'epoch': 3}
{'type': 'loss', 'content': 0.1281108856201172, 'timestamp': '2025-10-02 00:55:23.603275', 'step': 24821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:23.662901', 'step': 24821, 'epoch': 3}
{'type': 'loss', 'content': 0.018252674490213394, 'timestamp': '2025-10-02 00:55:23.672423', 'step': 24822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:23.728919', 'step': 24822, 'epoch': 3}
{'type': 'loss', 'content': 0.12091200053691864, 'timestamp': '2025-10-02 00:55:23.731933', 'step': 24823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:23.793286', 'step': 24823, 'epoch': 3}
{'type': 'loss', 'content': 0.03812406212091446, 'timestamp': '2025-10-02 00:55:23.804225', 'step': 24824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:23.861270', 'step': 24824, 'epoch': 3}
{'type': 'loss', 'content': 0.016147581860423088, 'timestamp': '2025-10-02 00:55:23.864128', 'step': 24825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:23.920269', 'step': 24825, 'epoch': 3}
{'type': 'loss', 'content': 0.06413348019123077, 'timestamp': '2025-10-02 00:55:23.926096', 'step': 24826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:23.982079', 'step': 24826, 'epoch': 3}
{'type': 'loss', 'content': 0.0157582126557827, 'timestamp': '2025-10-02 00:55:23.991604', 'step': 24827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:24.048779', 'step': 24827, 'epoch': 3}
{'type': 'loss', 'content': 0.03340671956539154, 'timestamp': '2025-10-02 00:55:24.054596', 'step': 24828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:24.108684', 'step': 24828, 'epoch': 3}
{'type': 'loss', 'content': 0.030891897156834602, 'timestamp': '2025-10-02 00:55:24.110974', 'step': 24829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:24.165645', 'step': 24829, 'epoch': 3}
{'type': 'loss', 'content': 0.144877627491951, 'timestamp': '2025-10-02 00:55:24.168250', 'step': 24830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:24.223468', 'step': 24830, 'epoch': 3}
{'type': 'loss', 'content': 0.08991558849811554, 'timestamp': '2025-10-02 00:55:24.225927', 'step': 24831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:24.280563', 'step': 24831, 'epoch': 3}
{'type': 'loss', 'content': 0.06073073670268059, 'timestamp': '2025-10-02 00:55:24.286725', 'step': 24832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:24.340528', 'step': 24832, 'epoch': 3}
{'type': 'loss', 'content': 0.019786056131124496, 'timestamp': '2025-10-02 00:55:24.343118', 'step': 24833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:24.398185', 'step': 24833, 'epoch': 3}
{'type': 'loss', 'content': 0.0041799405589699745, 'timestamp': '2025-10-02 00:55:24.400793', 'step': 24834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:55:24.464008', 'step': 24834, 'epoch': 3}
{'type': 'loss', 'content': 0.02107548899948597, 'timestamp': '2025-10-02 00:55:24.474848', 'step': 24835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:24.530472', 'step': 24835, 'epoch': 3}
{'type': 'loss', 'content': 0.06979561597108841, 'timestamp': '2025-10-02 00:55:24.536282', 'step': 24836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:24.590729', 'step': 24836, 'epoch': 3}
{'type': 'loss', 'content': 0.09147915989160538, 'timestamp': '2025-10-02 00:55:24.593150', 'step': 24837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:24.647298', 'step': 24837, 'epoch': 3}
{'type': 'loss', 'content': 0.09341046214103699, 'timestamp': '2025-10-02 00:55:24.649762', 'step': 24838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:24.705602', 'step': 24838, 'epoch': 3}
{'type': 'loss', 'content': 0.03223322704434395, 'timestamp': '2025-10-02 00:55:24.712792', 'step': 24839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:24.767323', 'step': 24839, 'epoch': 3}
{'type': 'loss', 'content': 0.09600113332271576, 'timestamp': '2025-10-02 00:55:24.773419', 'step': 24840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 00:55:24.852738', 'step': 24840, 'epoch': 3}
{'type': 'loss', 'content': 0.004007352516055107, 'timestamp': '2025-10-02 00:55:24.869065', 'step': 24841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:24.924164', 'step': 24841, 'epoch': 3}
{'type': 'loss', 'content': 0.02716808021068573, 'timestamp': '2025-10-02 00:55:24.929868', 'step': 24842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:24.984691', 'step': 24842, 'epoch': 3}
{'type': 'loss', 'content': 0.050949640572071075, 'timestamp': '2025-10-02 00:55:24.987335', 'step': 24843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:25.042825', 'step': 24843, 'epoch': 3}
{'type': 'loss', 'content': 0.018715301528573036, 'timestamp': '2025-10-02 00:55:25.050792', 'step': 24844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:25.104770', 'step': 24844, 'epoch': 3}
{'type': 'loss', 'content': 0.051446251571178436, 'timestamp': '2025-10-02 00:55:25.107339', 'step': 24845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:25.163762', 'step': 24845, 'epoch': 3}
{'type': 'loss', 'content': 0.06285876035690308, 'timestamp': '2025-10-02 00:55:25.166717', 'step': 24846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:25.221406', 'step': 24846, 'epoch': 3}
{'type': 'loss', 'content': 0.06598349660634995, 'timestamp': '2025-10-02 00:55:25.230827', 'step': 24847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:25.289764', 'step': 24847, 'epoch': 3}
{'type': 'loss', 'content': 0.012593846768140793, 'timestamp': '2025-10-02 00:55:25.298146', 'step': 24848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:25.352651', 'step': 24848, 'epoch': 3}
{'type': 'loss', 'content': 0.006062345113605261, 'timestamp': '2025-10-02 00:55:25.358239', 'step': 24849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:25.413721', 'step': 24849, 'epoch': 3}
{'type': 'loss', 'content': 0.09077564626932144, 'timestamp': '2025-10-02 00:55:25.416254', 'step': 24850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:25.473244', 'step': 24850, 'epoch': 3}
{'type': 'loss', 'content': 0.07629801332950592, 'timestamp': '2025-10-02 00:55:25.476003', 'step': 24851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:25.530787', 'step': 24851, 'epoch': 3}
{'type': 'loss', 'content': 0.02485625073313713, 'timestamp': '2025-10-02 00:55:25.536562', 'step': 24852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:25.592788', 'step': 24852, 'epoch': 3}
{'type': 'loss', 'content': 0.09513254463672638, 'timestamp': '2025-10-02 00:55:25.595378', 'step': 24853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:25.651545', 'step': 24853, 'epoch': 3}
{'type': 'loss', 'content': 0.06253084540367126, 'timestamp': '2025-10-02 00:55:25.658657', 'step': 24854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:25.713328', 'step': 24854, 'epoch': 3}
{'type': 'loss', 'content': 0.01924786902964115, 'timestamp': '2025-10-02 00:55:25.715564', 'step': 24855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:25.770776', 'step': 24855, 'epoch': 3}
{'type': 'loss', 'content': 0.06221485137939453, 'timestamp': '2025-10-02 00:55:25.776713', 'step': 24856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:25.831681', 'step': 24856, 'epoch': 3}
{'type': 'loss', 'content': 0.006513099651783705, 'timestamp': '2025-10-02 00:55:25.841944', 'step': 24857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:25.898080', 'step': 24857, 'epoch': 3}
{'type': 'loss', 'content': 0.012943609617650509, 'timestamp': '2025-10-02 00:55:25.905058', 'step': 24858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:25.960762', 'step': 24858, 'epoch': 3}
{'type': 'loss', 'content': 0.0598197765648365, 'timestamp': '2025-10-02 00:55:25.963218', 'step': 24859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:26.018657', 'step': 24859, 'epoch': 3}
{'type': 'loss', 'content': 0.058712493628263474, 'timestamp': '2025-10-02 00:55:26.025481', 'step': 24860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:55:26.085813', 'step': 24860, 'epoch': 3}
{'type': 'loss', 'content': 0.02303626574575901, 'timestamp': '2025-10-02 00:55:26.097321', 'step': 24861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:26.152517', 'step': 24861, 'epoch': 3}
{'type': 'loss', 'content': 0.09760831296443939, 'timestamp': '2025-10-02 00:55:26.154819', 'step': 24862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:26.209429', 'step': 24862, 'epoch': 3}
{'type': 'loss', 'content': 0.02107514813542366, 'timestamp': '2025-10-02 00:55:26.216623', 'step': 24863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:26.271512', 'step': 24863, 'epoch': 3}
{'type': 'loss', 'content': 0.046471502631902695, 'timestamp': '2025-10-02 00:55:26.278102', 'step': 24864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:26.332265', 'step': 24864, 'epoch': 3}
{'type': 'loss', 'content': 0.05048227310180664, 'timestamp': '2025-10-02 00:55:26.334689', 'step': 24865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:26.389428', 'step': 24865, 'epoch': 3}
{'type': 'loss', 'content': 0.0541030690073967, 'timestamp': '2025-10-02 00:55:26.391785', 'step': 24866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:26.446622', 'step': 24866, 'epoch': 3}
{'type': 'loss', 'content': 0.05613350495696068, 'timestamp': '2025-10-02 00:55:26.448980', 'step': 24867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:26.511714', 'step': 24867, 'epoch': 3}
{'type': 'loss', 'content': 0.025269605219364166, 'timestamp': '2025-10-02 00:55:26.522632', 'step': 24868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:26.577137', 'step': 24868, 'epoch': 3}
{'type': 'loss', 'content': 0.023643169552087784, 'timestamp': '2025-10-02 00:55:26.587340', 'step': 24869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:26.642349', 'step': 24869, 'epoch': 3}
{'type': 'loss', 'content': 0.14706307649612427, 'timestamp': '2025-10-02 00:55:26.644666', 'step': 24870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:26.699403', 'step': 24870, 'epoch': 3}
{'type': 'loss', 'content': 0.020299147814512253, 'timestamp': '2025-10-02 00:55:26.701977', 'step': 24871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:26.756667', 'step': 24871, 'epoch': 3}
{'type': 'loss', 'content': 0.09924943000078201, 'timestamp': '2025-10-02 00:55:26.762963', 'step': 24872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:26.817380', 'step': 24872, 'epoch': 3}
{'type': 'loss', 'content': 0.04722984880208969, 'timestamp': '2025-10-02 00:55:26.823133', 'step': 24873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:26.877204', 'step': 24873, 'epoch': 3}
{'type': 'loss', 'content': 0.10929802805185318, 'timestamp': '2025-10-02 00:55:26.879501', 'step': 24874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:26.933920', 'step': 24874, 'epoch': 3}
{'type': 'loss', 'content': 0.008663461543619633, 'timestamp': '2025-10-02 00:55:26.939596', 'step': 24875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:26.993541', 'step': 24875, 'epoch': 3}
{'type': 'loss', 'content': 0.02979414537549019, 'timestamp': '2025-10-02 00:55:26.999565', 'step': 24876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:27.053936', 'step': 24876, 'epoch': 3}
{'type': 'loss', 'content': 0.07584217190742493, 'timestamp': '2025-10-02 00:55:27.056207', 'step': 24877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:27.111531', 'step': 24877, 'epoch': 3}
{'type': 'loss', 'content': 0.024625282734632492, 'timestamp': '2025-10-02 00:55:27.117249', 'step': 24878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:27.171524', 'step': 24878, 'epoch': 3}
{'type': 'loss', 'content': 0.04818924888968468, 'timestamp': '2025-10-02 00:55:27.174358', 'step': 24879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:27.229950', 'step': 24879, 'epoch': 3}
{'type': 'loss', 'content': 0.016295023262500763, 'timestamp': '2025-10-02 00:55:27.235975', 'step': 24880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:27.289726', 'step': 24880, 'epoch': 3}
{'type': 'loss', 'content': 0.10705073177814484, 'timestamp': '2025-10-02 00:55:27.292186', 'step': 24881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:27.347060', 'step': 24881, 'epoch': 3}
{'type': 'loss', 'content': 0.03675573319196701, 'timestamp': '2025-10-02 00:55:27.356209', 'step': 24882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:27.415868', 'step': 24882, 'epoch': 3}
{'type': 'loss', 'content': 0.074971504509449, 'timestamp': '2025-10-02 00:55:27.425324', 'step': 24883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:27.480224', 'step': 24883, 'epoch': 3}
{'type': 'loss', 'content': 0.05567150563001633, 'timestamp': '2025-10-02 00:55:27.488150', 'step': 24884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:27.542629', 'step': 24884, 'epoch': 3}
{'type': 'loss', 'content': 0.02987445332109928, 'timestamp': '2025-10-02 00:55:27.545092', 'step': 24885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:27.600587', 'step': 24885, 'epoch': 3}
{'type': 'loss', 'content': 0.007013201713562012, 'timestamp': '2025-10-02 00:55:27.609966', 'step': 24886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 11200068058304.0}, 'timestamp': '2025-10-02 00:55:27.697671', 'step': 24886, 'epoch': 3}
{'type': 'loss', 'content': 0.02083723247051239, 'timestamp': '2025-10-02 00:55:27.712736', 'step': 24887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:27.769613', 'step': 24887, 'epoch': 3}
{'type': 'loss', 'content': 0.025351503863930702, 'timestamp': '2025-10-02 00:55:27.775661', 'step': 24888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:27.835029', 'step': 24888, 'epoch': 3}
{'type': 'loss', 'content': 0.056791603565216064, 'timestamp': '2025-10-02 00:55:27.845971', 'step': 24889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:27.908305', 'step': 24889, 'epoch': 3}
{'type': 'loss', 'content': 0.04706432670354843, 'timestamp': '2025-10-02 00:55:27.917864', 'step': 24890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:27.978852', 'step': 24890, 'epoch': 3}
{'type': 'loss', 'content': 0.008568324148654938, 'timestamp': '2025-10-02 00:55:27.988420', 'step': 24891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:28.043366', 'step': 24891, 'epoch': 3}
{'type': 'loss', 'content': 0.09676402807235718, 'timestamp': '2025-10-02 00:55:28.051764', 'step': 24892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:28.107851', 'step': 24892, 'epoch': 3}
{'type': 'loss', 'content': 0.06418700516223907, 'timestamp': '2025-10-02 00:55:28.110516', 'step': 24893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:28.170092', 'step': 24893, 'epoch': 3}
{'type': 'loss', 'content': 0.09322161972522736, 'timestamp': '2025-10-02 00:55:28.175717', 'step': 24894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:28.237748', 'step': 24894, 'epoch': 3}
{'type': 'loss', 'content': 0.0568169429898262, 'timestamp': '2025-10-02 00:55:28.240052', 'step': 24895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:28.294184', 'step': 24895, 'epoch': 3}
{'type': 'loss', 'content': 0.007396232336759567, 'timestamp': '2025-10-02 00:55:28.300070', 'step': 24896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:28.357642', 'step': 24896, 'epoch': 3}
{'type': 'loss', 'content': 0.026858696714043617, 'timestamp': '2025-10-02 00:55:28.360335', 'step': 24897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:28.417394', 'step': 24897, 'epoch': 3}
{'type': 'loss', 'content': 0.07482874393463135, 'timestamp': '2025-10-02 00:55:28.421919', 'step': 24898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:28.479298', 'step': 24898, 'epoch': 3}
{'type': 'loss', 'content': 0.03254198282957077, 'timestamp': '2025-10-02 00:55:28.481502', 'step': 24899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:28.540323', 'step': 24899, 'epoch': 3}
{'type': 'loss', 'content': 0.007595089264214039, 'timestamp': '2025-10-02 00:55:28.550473', 'step': 24900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:28.612862', 'step': 24900, 'epoch': 3}
{'type': 'loss', 'content': 0.0211167149245739, 'timestamp': '2025-10-02 00:55:28.615323', 'step': 24901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:28.669573', 'step': 24901, 'epoch': 3}
{'type': 'loss', 'content': 0.0757736936211586, 'timestamp': '2025-10-02 00:55:28.671997', 'step': 24902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:28.726366', 'step': 24902, 'epoch': 3}
{'type': 'loss', 'content': 0.06833312660455704, 'timestamp': '2025-10-02 00:55:28.731992', 'step': 24903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:28.793673', 'step': 24903, 'epoch': 3}
{'type': 'loss', 'content': 0.01879854127764702, 'timestamp': '2025-10-02 00:55:28.804838', 'step': 24904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:28.859300', 'step': 24904, 'epoch': 3}
{'type': 'loss', 'content': 0.013810855336487293, 'timestamp': '2025-10-02 00:55:28.868676', 'step': 24905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:28.923795', 'step': 24905, 'epoch': 3}
{'type': 'loss', 'content': 0.09970048815011978, 'timestamp': '2025-10-02 00:55:28.931164', 'step': 24906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:28.986274', 'step': 24906, 'epoch': 3}
{'type': 'loss', 'content': 0.054919011890888214, 'timestamp': '2025-10-02 00:55:28.991165', 'step': 24907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:29.048695', 'step': 24907, 'epoch': 3}
{'type': 'loss', 'content': 0.01670791581273079, 'timestamp': '2025-10-02 00:55:29.055134', 'step': 24908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:29.109707', 'step': 24908, 'epoch': 3}
{'type': 'loss', 'content': 0.004661908373236656, 'timestamp': '2025-10-02 00:55:29.119930', 'step': 24909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:29.174235', 'step': 24909, 'epoch': 3}
{'type': 'loss', 'content': 0.04406732693314552, 'timestamp': '2025-10-02 00:55:29.176720', 'step': 24910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:29.231525', 'step': 24910, 'epoch': 3}
{'type': 'loss', 'content': 0.1812192052602768, 'timestamp': '2025-10-02 00:55:29.233709', 'step': 24911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:55:29.303740', 'step': 24911, 'epoch': 3}
{'type': 'loss', 'content': 0.011045658960938454, 'timestamp': '2025-10-02 00:55:29.316956', 'step': 24912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:29.371559', 'step': 24912, 'epoch': 3}
{'type': 'loss', 'content': 0.06788332015275955, 'timestamp': '2025-10-02 00:55:29.381809', 'step': 24913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:29.437142', 'step': 24913, 'epoch': 3}
{'type': 'loss', 'content': 0.009551829658448696, 'timestamp': '2025-10-02 00:55:29.439590', 'step': 24914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:29.495801', 'step': 24914, 'epoch': 3}
{'type': 'loss', 'content': 0.036325931549072266, 'timestamp': '2025-10-02 00:55:29.501539', 'step': 24915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:29.556227', 'step': 24915, 'epoch': 3}
{'type': 'loss', 'content': 0.03863954916596413, 'timestamp': '2025-10-02 00:55:29.562358', 'step': 24916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:29.615222', 'step': 24916, 'epoch': 3}
{'type': 'loss', 'content': 0.1078617125749588, 'timestamp': '2025-10-02 00:55:29.617496', 'step': 24917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:29.672843', 'step': 24917, 'epoch': 3}
{'type': 'loss', 'content': 0.017791248857975006, 'timestamp': '2025-10-02 00:55:29.675050', 'step': 24918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:29.728899', 'step': 24918, 'epoch': 3}
{'type': 'loss', 'content': 0.07018765062093735, 'timestamp': '2025-10-02 00:55:29.731271', 'step': 24919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:29.786404', 'step': 24919, 'epoch': 3}
{'type': 'loss', 'content': 0.03424803912639618, 'timestamp': '2025-10-02 00:55:29.792326', 'step': 24920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:55:29.858864', 'step': 24920, 'epoch': 3}
{'type': 'loss', 'content': 0.0015652007423341274, 'timestamp': '2025-10-02 00:55:29.871856', 'step': 24921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:29.926892', 'step': 24921, 'epoch': 3}
{'type': 'loss', 'content': 0.027956368401646614, 'timestamp': '2025-10-02 00:55:29.929480', 'step': 24922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:29.991476', 'step': 24922, 'epoch': 3}
{'type': 'loss', 'content': 0.035167887806892395, 'timestamp': '2025-10-02 00:55:29.994073', 'step': 24923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:30.049751', 'step': 24923, 'epoch': 3}
{'type': 'loss', 'content': 0.029277196153998375, 'timestamp': '2025-10-02 00:55:30.060059', 'step': 24924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:30.115052', 'step': 24924, 'epoch': 3}
{'type': 'loss', 'content': 0.002239610068500042, 'timestamp': '2025-10-02 00:55:30.125295', 'step': 24925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:30.180068', 'step': 24925, 'epoch': 3}
{'type': 'loss', 'content': 0.07780332863330841, 'timestamp': '2025-10-02 00:55:30.182289', 'step': 24926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:30.237263', 'step': 24926, 'epoch': 3}
{'type': 'loss', 'content': 0.09335345774888992, 'timestamp': '2025-10-02 00:55:30.239581', 'step': 24927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:30.293614', 'step': 24927, 'epoch': 3}
{'type': 'loss', 'content': 0.07301688939332962, 'timestamp': '2025-10-02 00:55:30.304474', 'step': 24928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:30.359355', 'step': 24928, 'epoch': 3}
{'type': 'loss', 'content': 0.00210877344943583, 'timestamp': '2025-10-02 00:55:30.366931', 'step': 24929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:30.421011', 'step': 24929, 'epoch': 3}
{'type': 'loss', 'content': 0.17757509648799896, 'timestamp': '2025-10-02 00:55:30.423844', 'step': 24930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:30.479510', 'step': 24930, 'epoch': 3}
{'type': 'loss', 'content': 0.06199056655168533, 'timestamp': '2025-10-02 00:55:30.481905', 'step': 24931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:30.537689', 'step': 24931, 'epoch': 3}
{'type': 'loss', 'content': 0.04861146956682205, 'timestamp': '2025-10-02 00:55:30.543964', 'step': 24932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:30.597734', 'step': 24932, 'epoch': 3}
{'type': 'loss', 'content': 0.02357655018568039, 'timestamp': '2025-10-02 00:55:30.600722', 'step': 24933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:30.655774', 'step': 24933, 'epoch': 3}
{'type': 'loss', 'content': 0.1266242265701294, 'timestamp': '2025-10-02 00:55:30.658074', 'step': 24934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:30.713264', 'step': 24934, 'epoch': 3}
{'type': 'loss', 'content': 0.11197932064533234, 'timestamp': '2025-10-02 00:55:30.716350', 'step': 24935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:55:30.786054', 'step': 24935, 'epoch': 3}
{'type': 'loss', 'content': 0.028799142688512802, 'timestamp': '2025-10-02 00:55:30.799242', 'step': 24936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:30.868052', 'step': 24936, 'epoch': 3}
{'type': 'loss', 'content': 0.004647642374038696, 'timestamp': '2025-10-02 00:55:30.877409', 'step': 24937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:30.935449', 'step': 24937, 'epoch': 3}
{'type': 'loss', 'content': 0.017840564250946045, 'timestamp': '2025-10-02 00:55:30.941015', 'step': 24938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:30.995290', 'step': 24938, 'epoch': 3}
{'type': 'loss', 'content': 0.03767000883817673, 'timestamp': '2025-10-02 00:55:30.997651', 'step': 24939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:31.057062', 'step': 24939, 'epoch': 3}
{'type': 'loss', 'content': 0.03470510616898537, 'timestamp': '2025-10-02 00:55:31.067976', 'step': 24940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:31.123508', 'step': 24940, 'epoch': 3}
{'type': 'loss', 'content': 0.021956544369459152, 'timestamp': '2025-10-02 00:55:31.130750', 'step': 24941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:31.187674', 'step': 24941, 'epoch': 3}
{'type': 'loss', 'content': 0.0362853929400444, 'timestamp': '2025-10-02 00:55:31.195062', 'step': 24942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:31.250789', 'step': 24942, 'epoch': 3}
{'type': 'loss', 'content': 0.06184590607881546, 'timestamp': '2025-10-02 00:55:31.254061', 'step': 24943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:31.309715', 'step': 24943, 'epoch': 3}
{'type': 'loss', 'content': 0.007630136329680681, 'timestamp': '2025-10-02 00:55:31.316372', 'step': 24944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:31.372843', 'step': 24944, 'epoch': 3}
{'type': 'loss', 'content': 0.05659840628504753, 'timestamp': '2025-10-02 00:55:31.380252', 'step': 24945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:31.435122', 'step': 24945, 'epoch': 3}
{'type': 'loss', 'content': 0.08852133899927139, 'timestamp': '2025-10-02 00:55:31.438249', 'step': 24946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:31.494620', 'step': 24946, 'epoch': 3}
{'type': 'loss', 'content': 0.0374821200966835, 'timestamp': '2025-10-02 00:55:31.498398', 'step': 24947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:31.555682', 'step': 24947, 'epoch': 3}
{'type': 'loss', 'content': 0.09396921843290329, 'timestamp': '2025-10-02 00:55:31.562215', 'step': 24948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:31.620393', 'step': 24948, 'epoch': 3}
{'type': 'loss', 'content': 0.032298482954502106, 'timestamp': '2025-10-02 00:55:31.623669', 'step': 24949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 00:55:31.715870', 'step': 24949, 'epoch': 3}
{'type': 'loss', 'content': 0.0143474405631423, 'timestamp': '2025-10-02 00:55:31.732281', 'step': 24950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:31.789414', 'step': 24950, 'epoch': 3}
{'type': 'loss', 'content': 0.018095985054969788, 'timestamp': '2025-10-02 00:55:31.792622', 'step': 24951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:55:31.857189', 'step': 24951, 'epoch': 3}
{'type': 'loss', 'content': 0.02855680137872696, 'timestamp': '2025-10-02 00:55:31.868759', 'step': 24952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:31.925353', 'step': 24952, 'epoch': 3}
{'type': 'loss', 'content': 0.025211317464709282, 'timestamp': '2025-10-02 00:55:31.934528', 'step': 24953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:31.990666', 'step': 24953, 'epoch': 3}
{'type': 'loss', 'content': 0.08826659619808197, 'timestamp': '2025-10-02 00:55:31.994119', 'step': 24954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:55:32.052403', 'step': 24954, 'epoch': 3}
{'type': 'loss', 'content': 0.02491835132241249, 'timestamp': '2025-10-02 00:55:32.054682', 'step': 24955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:32.110858', 'step': 24955, 'epoch': 3}
{'type': 'loss', 'content': 0.06813029944896698, 'timestamp': '2025-10-02 00:55:32.117612', 'step': 24956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:32.182400', 'step': 24956, 'epoch': 3}
{'type': 'loss', 'content': 0.02321668341755867, 'timestamp': '2025-10-02 00:55:32.193318', 'step': 24957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:32.250676', 'step': 24957, 'epoch': 3}
{'type': 'loss', 'content': 0.003487873589619994, 'timestamp': '2025-10-02 00:55:32.257918', 'step': 24958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:32.315876', 'step': 24958, 'epoch': 3}
{'type': 'loss', 'content': 0.0758277028799057, 'timestamp': '2025-10-02 00:55:32.318630', 'step': 24959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:32.373186', 'step': 24959, 'epoch': 3}
{'type': 'loss', 'content': 0.060207415372133255, 'timestamp': '2025-10-02 00:55:32.381205', 'step': 24960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:32.435233', 'step': 24960, 'epoch': 3}
{'type': 'loss', 'content': 0.12746329605579376, 'timestamp': '2025-10-02 00:55:32.437648', 'step': 24961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:32.491919', 'step': 24961, 'epoch': 3}
{'type': 'loss', 'content': 0.021645503118634224, 'timestamp': '2025-10-02 00:55:32.494415', 'step': 24962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 64], 'flops': 1280007837952.0}, 'timestamp': '2025-10-02 00:55:32.547862', 'step': 24962, 'epoch': 3}
{'type': 'loss', 'content': 0.059351321309804916, 'timestamp': '2025-10-02 00:55:32.550377', 'step': 24963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:32.606148', 'step': 24963, 'epoch': 3}
{'type': 'loss', 'content': 0.07658077031373978, 'timestamp': '2025-10-02 00:55:32.612250', 'step': 24964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:32.666276', 'step': 24964, 'epoch': 3}
{'type': 'loss', 'content': 0.07288181781768799, 'timestamp': '2025-10-02 00:55:32.668770', 'step': 24965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:32.730347', 'step': 24965, 'epoch': 3}
{'type': 'loss', 'content': 0.02004040777683258, 'timestamp': '2025-10-02 00:55:32.740721', 'step': 24966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:32.795892', 'step': 24966, 'epoch': 3}
{'type': 'loss', 'content': 0.04922713339328766, 'timestamp': '2025-10-02 00:55:32.798145', 'step': 24967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:32.852990', 'step': 24967, 'epoch': 3}
{'type': 'loss', 'content': 0.06373634189367294, 'timestamp': '2025-10-02 00:55:32.859053', 'step': 24968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:32.913494', 'step': 24968, 'epoch': 3}
{'type': 'loss', 'content': 0.03291326016187668, 'timestamp': '2025-10-02 00:55:32.920760', 'step': 24969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:32.975818', 'step': 24969, 'epoch': 3}
{'type': 'loss', 'content': 0.030855759978294373, 'timestamp': '2025-10-02 00:55:32.978231', 'step': 24970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:33.033266', 'step': 24970, 'epoch': 3}
{'type': 'loss', 'content': 0.046485066413879395, 'timestamp': '2025-10-02 00:55:33.040619', 'step': 24971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:33.095107', 'step': 24971, 'epoch': 3}
{'type': 'loss', 'content': 0.12589767575263977, 'timestamp': '2025-10-02 00:55:33.100924', 'step': 24972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:33.161075', 'step': 24972, 'epoch': 3}
{'type': 'loss', 'content': 0.04417041316628456, 'timestamp': '2025-10-02 00:55:33.172407', 'step': 24973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:33.227412', 'step': 24973, 'epoch': 3}
{'type': 'loss', 'content': 0.01644829288125038, 'timestamp': '2025-10-02 00:55:33.229852', 'step': 24974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:33.284272', 'step': 24974, 'epoch': 3}
{'type': 'loss', 'content': 0.002174733905121684, 'timestamp': '2025-10-02 00:55:33.289950', 'step': 24975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:33.345526', 'step': 24975, 'epoch': 3}
{'type': 'loss', 'content': 0.0008209492661990225, 'timestamp': '2025-10-02 00:55:33.352123', 'step': 24976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:33.406520', 'step': 24976, 'epoch': 3}
{'type': 'loss', 'content': 0.07227130234241486, 'timestamp': '2025-10-02 00:55:33.409521', 'step': 24977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:33.464786', 'step': 24977, 'epoch': 3}
{'type': 'loss', 'content': 0.13434669375419617, 'timestamp': '2025-10-02 00:55:33.467165', 'step': 24978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:55:33.543629', 'step': 24978, 'epoch': 3}
{'type': 'loss', 'content': 0.009938294999301434, 'timestamp': '2025-10-02 00:55:33.557267', 'step': 24979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:33.612633', 'step': 24979, 'epoch': 3}
{'type': 'loss', 'content': 0.03388344869017601, 'timestamp': '2025-10-02 00:55:33.618621', 'step': 24980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:33.672833', 'step': 24980, 'epoch': 3}
{'type': 'loss', 'content': 0.06669288128614426, 'timestamp': '2025-10-02 00:55:33.677787', 'step': 24981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:33.732397', 'step': 24981, 'epoch': 3}
{'type': 'loss', 'content': 0.09897039830684662, 'timestamp': '2025-10-02 00:55:33.735007', 'step': 24982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:33.789677', 'step': 24982, 'epoch': 3}
{'type': 'loss', 'content': 0.028192533180117607, 'timestamp': '2025-10-02 00:55:33.791909', 'step': 24983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:55:33.845475', 'step': 24983, 'epoch': 3}
{'type': 'loss', 'content': 0.14910462498664856, 'timestamp': '2025-10-02 00:55:33.851422', 'step': 24984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:33.905117', 'step': 24984, 'epoch': 3}
{'type': 'loss', 'content': 0.0451076403260231, 'timestamp': '2025-10-02 00:55:33.907570', 'step': 24985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:33.963636', 'step': 24985, 'epoch': 3}
{'type': 'loss', 'content': 0.06302803754806519, 'timestamp': '2025-10-02 00:55:33.966706', 'step': 24986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:34.021829', 'step': 24986, 'epoch': 3}
{'type': 'loss', 'content': 0.052771423012018204, 'timestamp': '2025-10-02 00:55:34.024394', 'step': 24987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:34.079119', 'step': 24987, 'epoch': 3}
{'type': 'loss', 'content': 0.07390589267015457, 'timestamp': '2025-10-02 00:55:34.085260', 'step': 24988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:34.142886', 'step': 24988, 'epoch': 3}
{'type': 'loss', 'content': 0.010831130668520927, 'timestamp': '2025-10-02 00:55:34.153875', 'step': 24989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:34.208523', 'step': 24989, 'epoch': 3}
{'type': 'loss', 'content': 0.08078884333372116, 'timestamp': '2025-10-02 00:55:34.210718', 'step': 24990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:34.265105', 'step': 24990, 'epoch': 3}
{'type': 'loss', 'content': 0.01769385114312172, 'timestamp': '2025-10-02 00:55:34.267610', 'step': 24991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:55:34.337433', 'step': 24991, 'epoch': 3}
{'type': 'loss', 'content': 0.017862435430288315, 'timestamp': '2025-10-02 00:55:34.350533', 'step': 24992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:34.404634', 'step': 24992, 'epoch': 3}
{'type': 'loss', 'content': 0.01748247817158699, 'timestamp': '2025-10-02 00:55:34.410450', 'step': 24993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:34.465762', 'step': 24993, 'epoch': 3}
{'type': 'loss', 'content': 0.08509007096290588, 'timestamp': '2025-10-02 00:55:34.468790', 'step': 24994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:34.524627', 'step': 24994, 'epoch': 3}
{'type': 'loss', 'content': 0.05942491814494133, 'timestamp': '2025-10-02 00:55:34.526917', 'step': 24995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:34.588213', 'step': 24995, 'epoch': 3}
{'type': 'loss', 'content': 0.044898051768541336, 'timestamp': '2025-10-02 00:55:34.599488', 'step': 24996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:34.654068', 'step': 24996, 'epoch': 3}
{'type': 'loss', 'content': 0.07993590831756592, 'timestamp': '2025-10-02 00:55:34.656551', 'step': 24997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:34.711395', 'step': 24997, 'epoch': 3}
{'type': 'loss', 'content': 0.08247899264097214, 'timestamp': '2025-10-02 00:55:34.720670', 'step': 24998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:34.775589', 'step': 24998, 'epoch': 3}
{'type': 'loss', 'content': 0.008978798985481262, 'timestamp': '2025-10-02 00:55:34.778023', 'step': 24999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:34.831949', 'step': 24999, 'epoch': 3}
{'type': 'loss', 'content': 0.12278412282466888, 'timestamp': '2025-10-02 00:55:34.837952', 'step': 25000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 25000', 'timestamp': '2025-10-02 00:55:35.264374', 'step': 25000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:35.323414', 'step': 25000, 'epoch': 3}
{'type': 'loss', 'content': 0.0505562461912632, 'timestamp': '2025-10-02 00:55:35.329805', 'step': 25001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:35.391342', 'step': 25001, 'epoch': 3}
{'type': 'loss', 'content': 0.026911264285445213, 'timestamp': '2025-10-02 00:55:35.398241', 'step': 25002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:55:35.465787', 'step': 25002, 'epoch': 3}
{'type': 'loss', 'content': 0.04727449640631676, 'timestamp': '2025-10-02 00:55:35.477740', 'step': 25003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:55:35.540928', 'step': 25003, 'epoch': 3}
{'type': 'loss', 'content': 0.018115313723683357, 'timestamp': '2025-10-02 00:55:35.552373', 'step': 25004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:35.608190', 'step': 25004, 'epoch': 3}
{'type': 'loss', 'content': 0.06914064288139343, 'timestamp': '2025-10-02 00:55:35.613956', 'step': 25005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:35.668519', 'step': 25005, 'epoch': 3}
{'type': 'loss', 'content': 0.027148697525262833, 'timestamp': '2025-10-02 00:55:35.670839', 'step': 25006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:35.725926', 'step': 25006, 'epoch': 3}
{'type': 'loss', 'content': 0.0423070453107357, 'timestamp': '2025-10-02 00:55:35.733224', 'step': 25007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:35.788951', 'step': 25007, 'epoch': 3}
{'type': 'loss', 'content': 0.056759413331747055, 'timestamp': '2025-10-02 00:55:35.795299', 'step': 25008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:35.849990', 'step': 25008, 'epoch': 3}
{'type': 'loss', 'content': 0.05659126862883568, 'timestamp': '2025-10-02 00:55:35.855796', 'step': 25009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:35.909884', 'step': 25009, 'epoch': 3}
{'type': 'loss', 'content': 0.09775315970182419, 'timestamp': '2025-10-02 00:55:35.912275', 'step': 25010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:35.967074', 'step': 25010, 'epoch': 3}
{'type': 'loss', 'content': 0.018579958006739616, 'timestamp': '2025-10-02 00:55:35.974166', 'step': 25011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:55:36.036664', 'step': 25011, 'epoch': 3}
{'type': 'loss', 'content': 0.008241577073931694, 'timestamp': '2025-10-02 00:55:36.048096', 'step': 25012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:36.101729', 'step': 25012, 'epoch': 3}
{'type': 'loss', 'content': 0.026983488351106644, 'timestamp': '2025-10-02 00:55:36.109080', 'step': 25013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:55:36.163232', 'step': 25013, 'epoch': 3}
{'type': 'loss', 'content': 0.029233645647764206, 'timestamp': '2025-10-02 00:55:36.165902', 'step': 25014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:36.220744', 'step': 25014, 'epoch': 3}
{'type': 'loss', 'content': 0.014863613061606884, 'timestamp': '2025-10-02 00:55:36.223233', 'step': 25015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:36.277260', 'step': 25015, 'epoch': 3}
{'type': 'loss', 'content': 0.13160744309425354, 'timestamp': '2025-10-02 00:55:36.283112', 'step': 25016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:36.338087', 'step': 25016, 'epoch': 3}
{'type': 'loss', 'content': 0.08615083992481232, 'timestamp': '2025-10-02 00:55:36.345502', 'step': 25017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:55:36.399606', 'step': 25017, 'epoch': 3}
{'type': 'loss', 'content': 0.10426472127437592, 'timestamp': '2025-10-02 00:55:36.401935', 'step': 25018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:55:36.460923', 'step': 25018, 'epoch': 3}
{'type': 'loss', 'content': 0.00958480965346098, 'timestamp': '2025-10-02 00:55:36.471116', 'step': 25019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:36.526228', 'step': 25019, 'epoch': 3}
{'type': 'loss', 'content': 0.03694726526737213, 'timestamp': '2025-10-02 00:55:36.532177', 'step': 25020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:55:36.587080', 'step': 25020, 'epoch': 3}
{'type': 'loss', 'content': 0.03531869873404503, 'timestamp': '2025-10-02 00:55:36.589372', 'step': 25021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:36.647093', 'step': 25021, 'epoch': 3}
{'type': 'loss', 'content': 0.08041311055421829, 'timestamp': '2025-10-02 00:55:36.649342', 'step': 25022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:36.703935', 'step': 25022, 'epoch': 3}
{'type': 'loss', 'content': 0.020447080954909325, 'timestamp': '2025-10-02 00:55:36.706048', 'step': 25023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:36.760680', 'step': 25023, 'epoch': 3}
{'type': 'loss', 'content': 0.028596723452210426, 'timestamp': '2025-10-02 00:55:36.766976', 'step': 25024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:36.821156', 'step': 25024, 'epoch': 3}
{'type': 'loss', 'content': 0.0713924914598465, 'timestamp': '2025-10-02 00:55:36.823952', 'step': 25025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:36.878752', 'step': 25025, 'epoch': 3}
{'type': 'loss', 'content': 0.16109967231750488, 'timestamp': '2025-10-02 00:55:36.881545', 'step': 25026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:36.936855', 'step': 25026, 'epoch': 3}
{'type': 'loss', 'content': 0.1455904096364975, 'timestamp': '2025-10-02 00:55:36.939224', 'step': 25027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:36.994527', 'step': 25027, 'epoch': 3}
{'type': 'loss', 'content': 0.015626557171344757, 'timestamp': '2025-10-02 00:55:37.001071', 'step': 25028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:55:37.056070', 'step': 25028, 'epoch': 3}
{'type': 'loss', 'content': 0.012471250258386135, 'timestamp': '2025-10-02 00:55:37.063434', 'step': 25029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:37.117400', 'step': 25029, 'epoch': 3}
{'type': 'loss', 'content': 0.08081091940402985, 'timestamp': '2025-10-02 00:55:37.119586', 'step': 25030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:37.173821', 'step': 25030, 'epoch': 3}
{'type': 'loss', 'content': 0.04985389485955238, 'timestamp': '2025-10-02 00:55:37.176161', 'step': 25031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:37.231093', 'step': 25031, 'epoch': 3}
{'type': 'loss', 'content': 0.02050730772316456, 'timestamp': '2025-10-02 00:55:37.241061', 'step': 25032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:55:37.295169', 'step': 25032, 'epoch': 3}
{'type': 'loss', 'content': 0.12785038352012634, 'timestamp': '2025-10-02 00:55:37.297680', 'step': 25033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:37.354730', 'step': 25033, 'epoch': 3}
{'type': 'loss', 'content': 0.09273277223110199, 'timestamp': '2025-10-02 00:55:37.357185', 'step': 25034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:55:37.417678', 'step': 25034, 'epoch': 3}
{'type': 'loss', 'content': 0.02813616208732128, 'timestamp': '2025-10-02 00:55:37.420360', 'step': 25035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:55:37.492192', 'step': 25035, 'epoch': 3}
{'type': 'loss', 'content': 0.0042220125906169415, 'timestamp': '2025-10-02 00:55:37.505259', 'step': 25036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:37.587271', 'step': 25036, 'epoch': 3}
{'type': 'loss', 'content': 0.015754953026771545, 'timestamp': '2025-10-02 00:55:37.594160', 'step': 25037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:37.657199', 'step': 25037, 'epoch': 3}
{'type': 'loss', 'content': 0.13372448086738586, 'timestamp': '2025-10-02 00:55:37.661550', 'step': 25038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:37.721840', 'step': 25038, 'epoch': 3}
{'type': 'loss', 'content': 0.017700213938951492, 'timestamp': '2025-10-02 00:55:37.726117', 'step': 25039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:55:37.788576', 'step': 25039, 'epoch': 3}
{'type': 'loss', 'content': 0.011712202802300453, 'timestamp': '2025-10-02 00:55:37.798609', 'step': 25040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:55:37.861423', 'step': 25040, 'epoch': 3}
{'type': 'loss', 'content': 0.03659893199801445, 'timestamp': '2025-10-02 00:55:37.863913', 'step': 25041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:55:37.923338', 'step': 25041, 'epoch': 3}
{'type': 'loss', 'content': 0.11492771655321121, 'timestamp': '2025-10-02 00:55:37.927116', 'step': 25042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:37.989994', 'step': 25042, 'epoch': 3}
{'type': 'loss', 'content': 0.06601858884096146, 'timestamp': '2025-10-02 00:55:37.995594', 'step': 25043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:38.056093', 'step': 25043, 'epoch': 3}
{'type': 'loss', 'content': 0.06348922848701477, 'timestamp': '2025-10-02 00:55:38.063305', 'step': 25044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:55:38.122592', 'step': 25044, 'epoch': 3}
{'type': 'loss', 'content': 0.003232591785490513, 'timestamp': '2025-10-02 00:55:38.125828', 'step': 25045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:55:38.184772', 'step': 25045, 'epoch': 3}
{'type': 'loss', 'content': 0.05170610547065735, 'timestamp': '2025-10-02 00:55:38.188245', 'step': 25046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:38.244875', 'step': 25046, 'epoch': 3}
{'type': 'loss', 'content': 0.05048023536801338, 'timestamp': '2025-10-02 00:55:38.266628', 'step': 25047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:55:38.326391', 'step': 25047, 'epoch': 3}
{'type': 'loss', 'content': 0.0032693527173250914, 'timestamp': '2025-10-02 00:55:38.332648', 'step': 25048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:55:38.403309', 'step': 25048, 'epoch': 3}
{'type': 'loss', 'content': 0.07045092433691025, 'timestamp': '2025-10-02 00:55:38.409130', 'step': 25049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:55:38.471523', 'step': 25049, 'epoch': 3}
{'type': 'loss', 'content': 0.006360779982060194, 'timestamp': '2025-10-02 00:55:38.482132', 'step': 25050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:38.575213', 'step': 25050, 'epoch': 3}
{'type': 'loss', 'content': 0.03371826186776161, 'timestamp': '2025-10-02 00:55:38.585645', 'step': 25051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:55:38.656307', 'step': 25051, 'epoch': 3}
{'type': 'loss', 'content': 0.023742003366351128, 'timestamp': '2025-10-02 00:55:38.666634', 'step': 25052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:55:38.733348', 'step': 25052, 'epoch': 3}
{'type': 'loss', 'content': 0.06396356970071793, 'timestamp': '2025-10-02 00:55:38.744595', 'step': 25053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:38.808800', 'step': 25053, 'epoch': 3}
{'type': 'loss', 'content': 0.02050638198852539, 'timestamp': '2025-10-02 00:55:38.812541', 'step': 25054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:55:38.871504', 'step': 25054, 'epoch': 3}
{'type': 'loss', 'content': 0.06797333061695099, 'timestamp': '2025-10-02 00:55:38.880340', 'step': 25055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:55:38.952651', 'step': 25055, 'epoch': 3}
{'type': 'loss', 'content': 0.03355085849761963, 'timestamp': '2025-10-02 00:55:38.964358', 'step': 25056, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:56:06.228820', 'step': 25056, 'epoch': 3}
{'type': 'pplx', 'content': 105.00348004993278, 'timestamp': '2025-10-02 00:56:06.232970', 'step': 25056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:06.287853', 'step': 25056, 'epoch': 3}
{'type': 'loss', 'content': 0.05402941256761551, 'timestamp': '2025-10-02 00:56:06.291614', 'step': 25057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:06.348539', 'step': 25057, 'epoch': 3}
{'type': 'loss', 'content': 0.09198334068059921, 'timestamp': '2025-10-02 00:56:06.358071', 'step': 25058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:06.414363', 'step': 25058, 'epoch': 3}
{'type': 'loss', 'content': 0.019421638920903206, 'timestamp': '2025-10-02 00:56:06.417082', 'step': 25059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:06.472066', 'step': 25059, 'epoch': 3}
{'type': 'loss', 'content': 0.03657585754990578, 'timestamp': '2025-10-02 00:56:06.478790', 'step': 25060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:06.533667', 'step': 25060, 'epoch': 3}
{'type': 'loss', 'content': 0.0202617384493351, 'timestamp': '2025-10-02 00:56:06.541228', 'step': 25061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:06.597058', 'step': 25061, 'epoch': 3}
{'type': 'loss', 'content': 0.019349366426467896, 'timestamp': '2025-10-02 00:56:06.603066', 'step': 25062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:06.658645', 'step': 25062, 'epoch': 3}
{'type': 'loss', 'content': 0.0263731200248003, 'timestamp': '2025-10-02 00:56:06.660990', 'step': 25063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:06.718100', 'step': 25063, 'epoch': 3}
{'type': 'loss', 'content': 0.058426350355148315, 'timestamp': '2025-10-02 00:56:06.725492', 'step': 25064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:06.779515', 'step': 25064, 'epoch': 3}
{'type': 'loss', 'content': 0.0652686208486557, 'timestamp': '2025-10-02 00:56:06.787039', 'step': 25065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 00:56:06.871250', 'step': 25065, 'epoch': 3}
{'type': 'loss', 'content': 0.0024914995301514864, 'timestamp': '2025-10-02 00:56:06.886109', 'step': 25066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:06.943686', 'step': 25066, 'epoch': 3}
{'type': 'loss', 'content': 0.0009152014390565455, 'timestamp': '2025-10-02 00:56:06.953271', 'step': 25067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:56:07.008352', 'step': 25067, 'epoch': 3}
{'type': 'loss', 'content': 0.053465839475393295, 'timestamp': '2025-10-02 00:56:07.014133', 'step': 25068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:07.068260', 'step': 25068, 'epoch': 3}
{'type': 'loss', 'content': 0.02588784694671631, 'timestamp': '2025-10-02 00:56:07.071523', 'step': 25069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:07.127416', 'step': 25069, 'epoch': 3}
{'type': 'loss', 'content': 0.016963839530944824, 'timestamp': '2025-10-02 00:56:07.133545', 'step': 25070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:07.189276', 'step': 25070, 'epoch': 3}
{'type': 'loss', 'content': 0.07840897142887115, 'timestamp': '2025-10-02 00:56:07.198590', 'step': 25071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:07.253871', 'step': 25071, 'epoch': 3}
{'type': 'loss', 'content': 0.02345443330705166, 'timestamp': '2025-10-02 00:56:07.259850', 'step': 25072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:07.314523', 'step': 25072, 'epoch': 3}
{'type': 'loss', 'content': 0.05888591706752777, 'timestamp': '2025-10-02 00:56:07.317580', 'step': 25073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:07.371625', 'step': 25073, 'epoch': 3}
{'type': 'loss', 'content': 0.10288692265748978, 'timestamp': '2025-10-02 00:56:07.374724', 'step': 25074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:56:07.443828', 'step': 25074, 'epoch': 3}
{'type': 'loss', 'content': 0.0047181034460663795, 'timestamp': '2025-10-02 00:56:07.456174', 'step': 25075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:07.511251', 'step': 25075, 'epoch': 3}
{'type': 'loss', 'content': 0.08167746663093567, 'timestamp': '2025-10-02 00:56:07.517342', 'step': 25076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:07.571547', 'step': 25076, 'epoch': 3}
{'type': 'loss', 'content': 0.035968419164419174, 'timestamp': '2025-10-02 00:56:07.577636', 'step': 25077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:07.632787', 'step': 25077, 'epoch': 3}
{'type': 'loss', 'content': 0.0136899808421731, 'timestamp': '2025-10-02 00:56:07.638915', 'step': 25078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:07.693940', 'step': 25078, 'epoch': 3}
{'type': 'loss', 'content': 0.049152921885252, 'timestamp': '2025-10-02 00:56:07.696396', 'step': 25079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:07.750905', 'step': 25079, 'epoch': 3}
{'type': 'loss', 'content': 0.09230083227157593, 'timestamp': '2025-10-02 00:56:07.757728', 'step': 25080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:07.812005', 'step': 25080, 'epoch': 3}
{'type': 'loss', 'content': 0.04483017325401306, 'timestamp': '2025-10-02 00:56:07.819668', 'step': 25081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:07.874748', 'step': 25081, 'epoch': 3}
{'type': 'loss', 'content': 0.014662185683846474, 'timestamp': '2025-10-02 00:56:07.882404', 'step': 25082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:07.937830', 'step': 25082, 'epoch': 3}
{'type': 'loss', 'content': 0.030195098370313644, 'timestamp': '2025-10-02 00:56:07.943646', 'step': 25083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:08.003947', 'step': 25083, 'epoch': 3}
{'type': 'loss', 'content': 0.003011820837855339, 'timestamp': '2025-10-02 00:56:08.010000', 'step': 25084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:08.067485', 'step': 25084, 'epoch': 3}
{'type': 'loss', 'content': 0.03176376223564148, 'timestamp': '2025-10-02 00:56:08.078436', 'step': 25085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:08.133262', 'step': 25085, 'epoch': 3}
{'type': 'loss', 'content': 0.0012534299166873097, 'timestamp': '2025-10-02 00:56:08.140854', 'step': 25086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:08.195797', 'step': 25086, 'epoch': 3}
{'type': 'loss', 'content': 0.14646725356578827, 'timestamp': '2025-10-02 00:56:08.198313', 'step': 25087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:08.251985', 'step': 25087, 'epoch': 3}
{'type': 'loss', 'content': 0.10759157687425613, 'timestamp': '2025-10-02 00:56:08.257942', 'step': 25088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:56:08.327708', 'step': 25088, 'epoch': 3}
{'type': 'loss', 'content': 0.053624752908945084, 'timestamp': '2025-10-02 00:56:08.341506', 'step': 25089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:08.396390', 'step': 25089, 'epoch': 3}
{'type': 'loss', 'content': 0.03857113793492317, 'timestamp': '2025-10-02 00:56:08.398999', 'step': 25090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:08.453235', 'step': 25090, 'epoch': 3}
{'type': 'loss', 'content': 0.07863472402095795, 'timestamp': '2025-10-02 00:56:08.455784', 'step': 25091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:08.510394', 'step': 25091, 'epoch': 3}
{'type': 'loss', 'content': 0.04067913070321083, 'timestamp': '2025-10-02 00:56:08.516337', 'step': 25092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:56:08.577595', 'step': 25092, 'epoch': 3}
{'type': 'loss', 'content': 0.01843305118381977, 'timestamp': '2025-10-02 00:56:08.589055', 'step': 25093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:08.643179', 'step': 25093, 'epoch': 3}
{'type': 'loss', 'content': 0.03793792054057121, 'timestamp': '2025-10-02 00:56:08.645293', 'step': 25094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:08.699059', 'step': 25094, 'epoch': 3}
{'type': 'loss', 'content': 0.01802309788763523, 'timestamp': '2025-10-02 00:56:08.704932', 'step': 25095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:08.759174', 'step': 25095, 'epoch': 3}
{'type': 'loss', 'content': 0.09789188206195831, 'timestamp': '2025-10-02 00:56:08.765160', 'step': 25096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:08.827090', 'step': 25096, 'epoch': 3}
{'type': 'loss', 'content': 0.12626639008522034, 'timestamp': '2025-10-02 00:56:08.829524', 'step': 25097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:08.884349', 'step': 25097, 'epoch': 3}
{'type': 'loss', 'content': 0.03703661635518074, 'timestamp': '2025-10-02 00:56:08.886884', 'step': 25098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:08.942511', 'step': 25098, 'epoch': 3}
{'type': 'loss', 'content': 0.03330598399043083, 'timestamp': '2025-10-02 00:56:08.944908', 'step': 25099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:08.998748', 'step': 25099, 'epoch': 3}
{'type': 'loss', 'content': 0.029903339222073555, 'timestamp': '2025-10-02 00:56:09.005468', 'step': 25100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:09.065349', 'step': 25100, 'epoch': 3}
{'type': 'loss', 'content': 0.006761991418898106, 'timestamp': '2025-10-02 00:56:09.075620', 'step': 25101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:09.129150', 'step': 25101, 'epoch': 3}
{'type': 'loss', 'content': 0.0636732280254364, 'timestamp': '2025-10-02 00:56:09.136815', 'step': 25102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:09.190934', 'step': 25102, 'epoch': 3}
{'type': 'loss', 'content': 0.048013366758823395, 'timestamp': '2025-10-02 00:56:09.197029', 'step': 25103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:09.259237', 'step': 25103, 'epoch': 3}
{'type': 'loss', 'content': 0.1393025517463684, 'timestamp': '2025-10-02 00:56:09.264941', 'step': 25104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:09.318674', 'step': 25104, 'epoch': 3}
{'type': 'loss', 'content': 0.07678375393152237, 'timestamp': '2025-10-02 00:56:09.321175', 'step': 25105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:09.376173', 'step': 25105, 'epoch': 3}
{'type': 'loss', 'content': 0.04740244522690773, 'timestamp': '2025-10-02 00:56:09.378432', 'step': 25106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:09.440218', 'step': 25106, 'epoch': 3}
{'type': 'loss', 'content': 0.03221313655376434, 'timestamp': '2025-10-02 00:56:09.450690', 'step': 25107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:09.506662', 'step': 25107, 'epoch': 3}
{'type': 'loss', 'content': 0.04795002564787865, 'timestamp': '2025-10-02 00:56:09.512439', 'step': 25108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:09.566433', 'step': 25108, 'epoch': 3}
{'type': 'loss', 'content': 0.09902320057153702, 'timestamp': '2025-10-02 00:56:09.568767', 'step': 25109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:09.622887', 'step': 25109, 'epoch': 3}
{'type': 'loss', 'content': 0.059450339525938034, 'timestamp': '2025-10-02 00:56:09.625278', 'step': 25110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:09.688628', 'step': 25110, 'epoch': 3}
{'type': 'loss', 'content': 0.045178577303886414, 'timestamp': '2025-10-02 00:56:09.690963', 'step': 25111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:09.745193', 'step': 25111, 'epoch': 3}
{'type': 'loss', 'content': 0.02978934533894062, 'timestamp': '2025-10-02 00:56:09.755504', 'step': 25112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:09.809624', 'step': 25112, 'epoch': 3}
{'type': 'loss', 'content': 0.05194305628538132, 'timestamp': '2025-10-02 00:56:09.812122', 'step': 25113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:09.874117', 'step': 25113, 'epoch': 3}
{'type': 'loss', 'content': 0.008404668420553207, 'timestamp': '2025-10-02 00:56:09.876535', 'step': 25114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:09.932284', 'step': 25114, 'epoch': 3}
{'type': 'loss', 'content': 0.03453199937939644, 'timestamp': '2025-10-02 00:56:09.938287', 'step': 25115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:09.993469', 'step': 25115, 'epoch': 3}
{'type': 'loss', 'content': 0.0612376444041729, 'timestamp': '2025-10-02 00:56:09.999684', 'step': 25116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:10.057262', 'step': 25116, 'epoch': 3}
{'type': 'loss', 'content': 0.031493645161390305, 'timestamp': '2025-10-02 00:56:10.068228', 'step': 25117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:10.123606', 'step': 25117, 'epoch': 3}
{'type': 'loss', 'content': 0.017519934102892876, 'timestamp': '2025-10-02 00:56:10.125851', 'step': 25118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:10.184629', 'step': 25118, 'epoch': 3}
{'type': 'loss', 'content': 0.04955310747027397, 'timestamp': '2025-10-02 00:56:10.194780', 'step': 25119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:56:10.269403', 'step': 25119, 'epoch': 3}
{'type': 'loss', 'content': 0.010326279327273369, 'timestamp': '2025-10-02 00:56:10.283399', 'step': 25120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:10.337400', 'step': 25120, 'epoch': 3}
{'type': 'loss', 'content': 0.03388742730021477, 'timestamp': '2025-10-02 00:56:10.339952', 'step': 25121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:10.394862', 'step': 25121, 'epoch': 3}
{'type': 'loss', 'content': 0.07480904459953308, 'timestamp': '2025-10-02 00:56:10.397524', 'step': 25122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:10.454702', 'step': 25122, 'epoch': 3}
{'type': 'loss', 'content': 0.05996527522802353, 'timestamp': '2025-10-02 00:56:10.464009', 'step': 25123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:10.520209', 'step': 25123, 'epoch': 3}
{'type': 'loss', 'content': 0.054516203701496124, 'timestamp': '2025-10-02 00:56:10.530349', 'step': 25124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:10.585832', 'step': 25124, 'epoch': 3}
{'type': 'loss', 'content': 0.028409136459231377, 'timestamp': '2025-10-02 00:56:10.595053', 'step': 25125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:10.651021', 'step': 25125, 'epoch': 3}
{'type': 'loss', 'content': 0.06863924860954285, 'timestamp': '2025-10-02 00:56:10.654320', 'step': 25126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:10.710282', 'step': 25126, 'epoch': 3}
{'type': 'loss', 'content': 0.034934502094984055, 'timestamp': '2025-10-02 00:56:10.713886', 'step': 25127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:10.772421', 'step': 25127, 'epoch': 3}
{'type': 'loss', 'content': 0.02504269964993, 'timestamp': '2025-10-02 00:56:10.779498', 'step': 25128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:10.833765', 'step': 25128, 'epoch': 3}
{'type': 'loss', 'content': 0.07827086746692657, 'timestamp': '2025-10-02 00:56:10.836675', 'step': 25129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:10.900665', 'step': 25129, 'epoch': 3}
{'type': 'loss', 'content': 0.04804709553718567, 'timestamp': '2025-10-02 00:56:10.908216', 'step': 25130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:10.964279', 'step': 25130, 'epoch': 3}
{'type': 'loss', 'content': 0.06087178736925125, 'timestamp': '2025-10-02 00:56:10.967562', 'step': 25131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:11.023772', 'step': 25131, 'epoch': 3}
{'type': 'loss', 'content': 0.09871680289506912, 'timestamp': '2025-10-02 00:56:11.031142', 'step': 25132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:11.086515', 'step': 25132, 'epoch': 3}
{'type': 'loss', 'content': 0.06705878674983978, 'timestamp': '2025-10-02 00:56:11.089429', 'step': 25133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:11.145390', 'step': 25133, 'epoch': 3}
{'type': 'loss', 'content': 0.034081004559993744, 'timestamp': '2025-10-02 00:56:11.148644', 'step': 25134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:11.205024', 'step': 25134, 'epoch': 3}
{'type': 'loss', 'content': 0.08613287657499313, 'timestamp': '2025-10-02 00:56:11.207305', 'step': 25135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:11.264189', 'step': 25135, 'epoch': 3}
{'type': 'loss', 'content': 0.017458627000451088, 'timestamp': '2025-10-02 00:56:11.271133', 'step': 25136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:11.325656', 'step': 25136, 'epoch': 3}
{'type': 'loss', 'content': 0.0853482186794281, 'timestamp': '2025-10-02 00:56:11.329538', 'step': 25137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:11.384351', 'step': 25137, 'epoch': 3}
{'type': 'loss', 'content': 0.06342169642448425, 'timestamp': '2025-10-02 00:56:11.387417', 'step': 25138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:11.443858', 'step': 25138, 'epoch': 3}
{'type': 'loss', 'content': 0.05694336071610451, 'timestamp': '2025-10-02 00:56:11.445782', 'step': 25139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:11.499816', 'step': 25139, 'epoch': 3}
{'type': 'loss', 'content': 0.02418355643749237, 'timestamp': '2025-10-02 00:56:11.505556', 'step': 25140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:11.559908', 'step': 25140, 'epoch': 3}
{'type': 'loss', 'content': 0.011890942230820656, 'timestamp': '2025-10-02 00:56:11.562229', 'step': 25141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:11.616171', 'step': 25141, 'epoch': 3}
{'type': 'loss', 'content': 0.019182221964001656, 'timestamp': '2025-10-02 00:56:11.619006', 'step': 25142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:11.673548', 'step': 25142, 'epoch': 3}
{'type': 'loss', 'content': 0.033388327807188034, 'timestamp': '2025-10-02 00:56:11.679218', 'step': 25143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:11.733801', 'step': 25143, 'epoch': 3}
{'type': 'loss', 'content': 0.03668567165732384, 'timestamp': '2025-10-02 00:56:11.739770', 'step': 25144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:11.800329', 'step': 25144, 'epoch': 3}
{'type': 'loss', 'content': 0.0040564811788499355, 'timestamp': '2025-10-02 00:56:11.811662', 'step': 25145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:11.866533', 'step': 25145, 'epoch': 3}
{'type': 'loss', 'content': 0.09792248904705048, 'timestamp': '2025-10-02 00:56:11.868874', 'step': 25146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:56:11.938079', 'step': 25146, 'epoch': 3}
{'type': 'loss', 'content': 0.026266420260071754, 'timestamp': '2025-10-02 00:56:11.950177', 'step': 25147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:12.006126', 'step': 25147, 'epoch': 3}
{'type': 'loss', 'content': 0.09318936616182327, 'timestamp': '2025-10-02 00:56:12.016342', 'step': 25148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:12.069618', 'step': 25148, 'epoch': 3}
{'type': 'loss', 'content': 0.04241551458835602, 'timestamp': '2025-10-02 00:56:12.071667', 'step': 25149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:12.126201', 'step': 25149, 'epoch': 3}
{'type': 'loss', 'content': 0.04294496774673462, 'timestamp': '2025-10-02 00:56:12.131906', 'step': 25150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:12.187783', 'step': 25150, 'epoch': 3}
{'type': 'loss', 'content': 0.0012333422200754285, 'timestamp': '2025-10-02 00:56:12.197068', 'step': 25151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:12.251649', 'step': 25151, 'epoch': 3}
{'type': 'loss', 'content': 0.029880249872803688, 'timestamp': '2025-10-02 00:56:12.257738', 'step': 25152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:12.311326', 'step': 25152, 'epoch': 3}
{'type': 'loss', 'content': 0.13081088662147522, 'timestamp': '2025-10-02 00:56:12.313748', 'step': 25153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:12.367630', 'step': 25153, 'epoch': 3}
{'type': 'loss', 'content': 0.0021532541140913963, 'timestamp': '2025-10-02 00:56:12.369692', 'step': 25154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:12.425130', 'step': 25154, 'epoch': 3}
{'type': 'loss', 'content': 0.03464550897479057, 'timestamp': '2025-10-02 00:56:12.432544', 'step': 25155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:12.487612', 'step': 25155, 'epoch': 3}
{'type': 'loss', 'content': 0.05312454700469971, 'timestamp': '2025-10-02 00:56:12.493604', 'step': 25156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:12.547276', 'step': 25156, 'epoch': 3}
{'type': 'loss', 'content': 0.03294159844517708, 'timestamp': '2025-10-02 00:56:12.555227', 'step': 25157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:12.608856', 'step': 25157, 'epoch': 3}
{'type': 'loss', 'content': 0.02749832719564438, 'timestamp': '2025-10-02 00:56:12.611441', 'step': 25158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:12.665408', 'step': 25158, 'epoch': 3}
{'type': 'loss', 'content': 0.014271233230829239, 'timestamp': '2025-10-02 00:56:12.667711', 'step': 25159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:12.721792', 'step': 25159, 'epoch': 3}
{'type': 'loss', 'content': 0.08896704763174057, 'timestamp': '2025-10-02 00:56:12.727704', 'step': 25160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:12.781659', 'step': 25160, 'epoch': 3}
{'type': 'loss', 'content': 0.031147858127951622, 'timestamp': '2025-10-02 00:56:12.791340', 'step': 25161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:12.854168', 'step': 25161, 'epoch': 3}
{'type': 'loss', 'content': 0.01774672605097294, 'timestamp': '2025-10-02 00:56:12.865102', 'step': 25162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:12.919643', 'step': 25162, 'epoch': 3}
{'type': 'loss', 'content': 0.015792682766914368, 'timestamp': '2025-10-02 00:56:12.929105', 'step': 25163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:12.983938', 'step': 25163, 'epoch': 3}
{'type': 'loss', 'content': 0.08508753776550293, 'timestamp': '2025-10-02 00:56:12.990212', 'step': 25164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:13.044018', 'step': 25164, 'epoch': 3}
{'type': 'loss', 'content': 0.11638687551021576, 'timestamp': '2025-10-02 00:56:13.046687', 'step': 25165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:13.101603', 'step': 25165, 'epoch': 3}
{'type': 'loss', 'content': 0.07523399591445923, 'timestamp': '2025-10-02 00:56:13.107674', 'step': 25166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:13.162193', 'step': 25166, 'epoch': 3}
{'type': 'loss', 'content': 0.027641505002975464, 'timestamp': '2025-10-02 00:56:13.164835', 'step': 25167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:13.219086', 'step': 25167, 'epoch': 3}
{'type': 'loss', 'content': 0.05105084925889969, 'timestamp': '2025-10-02 00:56:13.225278', 'step': 25168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:13.279854', 'step': 25168, 'epoch': 3}
{'type': 'loss', 'content': 0.03445253148674965, 'timestamp': '2025-10-02 00:56:13.282341', 'step': 25169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:13.336374', 'step': 25169, 'epoch': 3}
{'type': 'loss', 'content': 0.02650940977036953, 'timestamp': '2025-10-02 00:56:13.338528', 'step': 25170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:13.393998', 'step': 25170, 'epoch': 3}
{'type': 'loss', 'content': 0.03921809419989586, 'timestamp': '2025-10-02 00:56:13.396455', 'step': 25171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:13.455666', 'step': 25171, 'epoch': 3}
{'type': 'loss', 'content': 0.04268162325024605, 'timestamp': '2025-10-02 00:56:13.466647', 'step': 25172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:13.520946', 'step': 25172, 'epoch': 3}
{'type': 'loss', 'content': 0.07653074711561203, 'timestamp': '2025-10-02 00:56:13.523760', 'step': 25173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:13.592298', 'step': 25173, 'epoch': 3}
{'type': 'loss', 'content': 0.07806767523288727, 'timestamp': '2025-10-02 00:56:13.594687', 'step': 25174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:13.650339', 'step': 25174, 'epoch': 3}
{'type': 'loss', 'content': 0.028592782095074654, 'timestamp': '2025-10-02 00:56:13.659602', 'step': 25175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:13.714175', 'step': 25175, 'epoch': 3}
{'type': 'loss', 'content': 0.03710652142763138, 'timestamp': '2025-10-02 00:56:13.720549', 'step': 25176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:13.775773', 'step': 25176, 'epoch': 3}
{'type': 'loss', 'content': 0.0013822638429701328, 'timestamp': '2025-10-02 00:56:13.782033', 'step': 25177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:13.838023', 'step': 25177, 'epoch': 3}
{'type': 'loss', 'content': 0.00011580468708416447, 'timestamp': '2025-10-02 00:56:13.843857', 'step': 25178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:13.899341', 'step': 25178, 'epoch': 3}
{'type': 'loss', 'content': 0.020855916664004326, 'timestamp': '2025-10-02 00:56:13.904928', 'step': 25179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:13.959187', 'step': 25179, 'epoch': 3}
{'type': 'loss', 'content': 0.028209615498781204, 'timestamp': '2025-10-02 00:56:13.964666', 'step': 25180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:14.019702', 'step': 25180, 'epoch': 3}
{'type': 'loss', 'content': 0.0245236586779356, 'timestamp': '2025-10-02 00:56:14.022778', 'step': 25181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:14.076627', 'step': 25181, 'epoch': 3}
{'type': 'loss', 'content': 0.053629711270332336, 'timestamp': '2025-10-02 00:56:14.079322', 'step': 25182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:14.133295', 'step': 25182, 'epoch': 3}
{'type': 'loss', 'content': 0.019643954932689667, 'timestamp': '2025-10-02 00:56:14.135907', 'step': 25183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:14.192364', 'step': 25183, 'epoch': 3}
{'type': 'loss', 'content': 0.03358300402760506, 'timestamp': '2025-10-02 00:56:14.198213', 'step': 25184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:14.252515', 'step': 25184, 'epoch': 3}
{'type': 'loss', 'content': 0.013614054769277573, 'timestamp': '2025-10-02 00:56:14.258601', 'step': 25185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:14.314611', 'step': 25185, 'epoch': 3}
{'type': 'loss', 'content': 0.07934340089559555, 'timestamp': '2025-10-02 00:56:14.316777', 'step': 25186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:14.371072', 'step': 25186, 'epoch': 3}
{'type': 'loss', 'content': 0.08340571075677872, 'timestamp': '2025-10-02 00:56:14.373046', 'step': 25187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:14.427945', 'step': 25187, 'epoch': 3}
{'type': 'loss', 'content': 0.009977919049561024, 'timestamp': '2025-10-02 00:56:14.438148', 'step': 25188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:14.492605', 'step': 25188, 'epoch': 3}
{'type': 'loss', 'content': 0.10361991077661514, 'timestamp': '2025-10-02 00:56:14.494937', 'step': 25189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:14.550513', 'step': 25189, 'epoch': 3}
{'type': 'loss', 'content': 0.05047301575541496, 'timestamp': '2025-10-02 00:56:14.552830', 'step': 25190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:14.607235', 'step': 25190, 'epoch': 3}
{'type': 'loss', 'content': 0.011891378089785576, 'timestamp': '2025-10-02 00:56:14.609892', 'step': 25191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:14.664520', 'step': 25191, 'epoch': 3}
{'type': 'loss', 'content': 0.02976011112332344, 'timestamp': '2025-10-02 00:56:14.671082', 'step': 25192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:14.726142', 'step': 25192, 'epoch': 3}
{'type': 'loss', 'content': 0.023522550240159035, 'timestamp': '2025-10-02 00:56:14.735712', 'step': 25193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:14.793266', 'step': 25193, 'epoch': 3}
{'type': 'loss', 'content': 0.007579956669360399, 'timestamp': '2025-10-02 00:56:14.796321', 'step': 25194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:14.851498', 'step': 25194, 'epoch': 3}
{'type': 'loss', 'content': 0.005027131177484989, 'timestamp': '2025-10-02 00:56:14.853793', 'step': 25195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:14.914517', 'step': 25195, 'epoch': 3}
{'type': 'loss', 'content': 0.03090091049671173, 'timestamp': '2025-10-02 00:56:14.925511', 'step': 25196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:14.979949', 'step': 25196, 'epoch': 3}
{'type': 'loss', 'content': 0.01972014456987381, 'timestamp': '2025-10-02 00:56:14.982319', 'step': 25197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:15.040620', 'step': 25197, 'epoch': 3}
{'type': 'loss', 'content': 0.04241182655096054, 'timestamp': '2025-10-02 00:56:15.042785', 'step': 25198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:56:15.111417', 'step': 25198, 'epoch': 3}
{'type': 'loss', 'content': 0.025281637907028198, 'timestamp': '2025-10-02 00:56:15.123740', 'step': 25199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:15.178339', 'step': 25199, 'epoch': 3}
{'type': 'loss', 'content': 0.021924788132309914, 'timestamp': '2025-10-02 00:56:15.184255', 'step': 25200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:15.238044', 'step': 25200, 'epoch': 3}
{'type': 'loss', 'content': 0.025801606476306915, 'timestamp': '2025-10-02 00:56:15.245552', 'step': 25201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:15.299733', 'step': 25201, 'epoch': 3}
{'type': 'loss', 'content': 0.039398420602083206, 'timestamp': '2025-10-02 00:56:15.301488', 'step': 25202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:15.356779', 'step': 25202, 'epoch': 3}
{'type': 'loss', 'content': 0.06287514418363571, 'timestamp': '2025-10-02 00:56:15.359185', 'step': 25203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:15.413785', 'step': 25203, 'epoch': 3}
{'type': 'loss', 'content': 0.01907108724117279, 'timestamp': '2025-10-02 00:56:15.421903', 'step': 25204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:15.476651', 'step': 25204, 'epoch': 3}
{'type': 'loss', 'content': 0.028732802718877792, 'timestamp': '2025-10-02 00:56:15.479381', 'step': 25205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:15.537626', 'step': 25205, 'epoch': 3}
{'type': 'loss', 'content': 0.056425418704748154, 'timestamp': '2025-10-02 00:56:15.540385', 'step': 25206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:15.595501', 'step': 25206, 'epoch': 3}
{'type': 'loss', 'content': 0.034197017550468445, 'timestamp': '2025-10-02 00:56:15.599076', 'step': 25207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:15.663698', 'step': 25207, 'epoch': 3}
{'type': 'loss', 'content': 0.019026905298233032, 'timestamp': '2025-10-02 00:56:15.671960', 'step': 25208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:15.725497', 'step': 25208, 'epoch': 3}
{'type': 'loss', 'content': 0.14983871579170227, 'timestamp': '2025-10-02 00:56:15.727668', 'step': 25209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:15.781332', 'step': 25209, 'epoch': 3}
{'type': 'loss', 'content': 0.05380844697356224, 'timestamp': '2025-10-02 00:56:15.783979', 'step': 25210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:15.837930', 'step': 25210, 'epoch': 3}
{'type': 'loss', 'content': 0.024917706847190857, 'timestamp': '2025-10-02 00:56:15.845400', 'step': 25211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:15.901119', 'step': 25211, 'epoch': 3}
{'type': 'loss', 'content': 0.06761228293180466, 'timestamp': '2025-10-02 00:56:15.907069', 'step': 25212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:15.960861', 'step': 25212, 'epoch': 3}
{'type': 'loss', 'content': 0.1030501276254654, 'timestamp': '2025-10-02 00:56:15.963150', 'step': 25213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:16.017832', 'step': 25213, 'epoch': 3}
{'type': 'loss', 'content': 0.10813532024621964, 'timestamp': '2025-10-02 00:56:16.020435', 'step': 25214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:16.076055', 'step': 25214, 'epoch': 3}
{'type': 'loss', 'content': 0.0389409177005291, 'timestamp': '2025-10-02 00:56:16.083598', 'step': 25215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:16.138851', 'step': 25215, 'epoch': 3}
{'type': 'loss', 'content': 0.03228379413485527, 'timestamp': '2025-10-02 00:56:16.145096', 'step': 25216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:16.205718', 'step': 25216, 'epoch': 3}
{'type': 'loss', 'content': 0.08200401067733765, 'timestamp': '2025-10-02 00:56:16.217029', 'step': 25217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:16.272191', 'step': 25217, 'epoch': 3}
{'type': 'loss', 'content': 0.056933045387268066, 'timestamp': '2025-10-02 00:56:16.274695', 'step': 25218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:16.329684', 'step': 25218, 'epoch': 3}
{'type': 'loss', 'content': 0.011897794902324677, 'timestamp': '2025-10-02 00:56:16.332319', 'step': 25219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:16.386236', 'step': 25219, 'epoch': 3}
{'type': 'loss', 'content': 0.05377333238720894, 'timestamp': '2025-10-02 00:56:16.392327', 'step': 25220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:16.446601', 'step': 25220, 'epoch': 3}
{'type': 'loss', 'content': 0.028800034895539284, 'timestamp': '2025-10-02 00:56:16.449354', 'step': 25221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:16.504706', 'step': 25221, 'epoch': 3}
{'type': 'loss', 'content': 0.03974383324384689, 'timestamp': '2025-10-02 00:56:16.514243', 'step': 25222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:16.572822', 'step': 25222, 'epoch': 3}
{'type': 'loss', 'content': 0.03592992573976517, 'timestamp': '2025-10-02 00:56:16.583015', 'step': 25223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:16.637389', 'step': 25223, 'epoch': 3}
{'type': 'loss', 'content': 0.04282583296298981, 'timestamp': '2025-10-02 00:56:16.643478', 'step': 25224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:16.697874', 'step': 25224, 'epoch': 3}
{'type': 'loss', 'content': 0.08444925397634506, 'timestamp': '2025-10-02 00:56:16.700353', 'step': 25225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:16.754573', 'step': 25225, 'epoch': 3}
{'type': 'loss', 'content': 0.023156873881816864, 'timestamp': '2025-10-02 00:56:16.762118', 'step': 25226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:16.816298', 'step': 25226, 'epoch': 3}
{'type': 'loss', 'content': 0.09158080816268921, 'timestamp': '2025-10-02 00:56:16.818516', 'step': 25227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:16.876954', 'step': 25227, 'epoch': 3}
{'type': 'loss', 'content': 0.031159281730651855, 'timestamp': '2025-10-02 00:56:16.887974', 'step': 25228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:16.941903', 'step': 25228, 'epoch': 3}
{'type': 'loss', 'content': 0.050142236053943634, 'timestamp': '2025-10-02 00:56:16.949532', 'step': 25229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:17.003872', 'step': 25229, 'epoch': 3}
{'type': 'loss', 'content': 0.036081817001104355, 'timestamp': '2025-10-02 00:56:17.009856', 'step': 25230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:17.073277', 'step': 25230, 'epoch': 3}
{'type': 'loss', 'content': 0.00772317498922348, 'timestamp': '2025-10-02 00:56:17.084144', 'step': 25231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:17.138779', 'step': 25231, 'epoch': 3}
{'type': 'loss', 'content': 0.08189831674098969, 'timestamp': '2025-10-02 00:56:17.144706', 'step': 25232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:17.198050', 'step': 25232, 'epoch': 3}
{'type': 'loss', 'content': 0.09596022963523865, 'timestamp': '2025-10-02 00:56:17.200894', 'step': 25233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:17.262616', 'step': 25233, 'epoch': 3}
{'type': 'loss', 'content': 0.02403830923140049, 'timestamp': '2025-10-02 00:56:17.273024', 'step': 25234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:17.327414', 'step': 25234, 'epoch': 3}
{'type': 'loss', 'content': 0.031245706602931023, 'timestamp': '2025-10-02 00:56:17.330126', 'step': 25235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:17.386101', 'step': 25235, 'epoch': 3}
{'type': 'loss', 'content': 0.048942890018224716, 'timestamp': '2025-10-02 00:56:17.393335', 'step': 25236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:17.448174', 'step': 25236, 'epoch': 3}
{'type': 'loss', 'content': 0.05905994772911072, 'timestamp': '2025-10-02 00:56:17.451329', 'step': 25237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:17.506258', 'step': 25237, 'epoch': 3}
{'type': 'loss', 'content': 0.04233783110976219, 'timestamp': '2025-10-02 00:56:17.512011', 'step': 25238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:17.568807', 'step': 25238, 'epoch': 3}
{'type': 'loss', 'content': 0.017963247373700142, 'timestamp': '2025-10-02 00:56:17.578352', 'step': 25239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:17.632479', 'step': 25239, 'epoch': 3}
{'type': 'loss', 'content': 0.07367128133773804, 'timestamp': '2025-10-02 00:56:17.638572', 'step': 25240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:17.693653', 'step': 25240, 'epoch': 3}
{'type': 'loss', 'content': 0.08524882793426514, 'timestamp': '2025-10-02 00:56:17.695930', 'step': 25241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:17.750424', 'step': 25241, 'epoch': 3}
{'type': 'loss', 'content': 0.026081383228302002, 'timestamp': '2025-10-02 00:56:17.752852', 'step': 25242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:17.806652', 'step': 25242, 'epoch': 3}
{'type': 'loss', 'content': 0.04212835803627968, 'timestamp': '2025-10-02 00:56:17.809216', 'step': 25243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:17.863235', 'step': 25243, 'epoch': 3}
{'type': 'loss', 'content': 0.055150099098682404, 'timestamp': '2025-10-02 00:56:17.869267', 'step': 25244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:56:17.937867', 'step': 25244, 'epoch': 3}
{'type': 'loss', 'content': 0.03909694030880928, 'timestamp': '2025-10-02 00:56:17.951427', 'step': 25245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:18.005922', 'step': 25245, 'epoch': 3}
{'type': 'loss', 'content': 0.019552653655409813, 'timestamp': '2025-10-02 00:56:18.008104', 'step': 25246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:18.063077', 'step': 25246, 'epoch': 3}
{'type': 'loss', 'content': 0.015967749059200287, 'timestamp': '2025-10-02 00:56:18.065338', 'step': 25247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:18.118962', 'step': 25247, 'epoch': 3}
{'type': 'loss', 'content': 0.06849022209644318, 'timestamp': '2025-10-02 00:56:18.124911', 'step': 25248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:18.178783', 'step': 25248, 'epoch': 3}
{'type': 'loss', 'content': 0.012893371284008026, 'timestamp': '2025-10-02 00:56:18.181244', 'step': 25249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:18.235751', 'step': 25249, 'epoch': 3}
{'type': 'loss', 'content': 0.07830294966697693, 'timestamp': '2025-10-02 00:56:18.237907', 'step': 25250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:18.292665', 'step': 25250, 'epoch': 3}
{'type': 'loss', 'content': 0.034070782363414764, 'timestamp': '2025-10-02 00:56:18.295055', 'step': 25251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:18.348927', 'step': 25251, 'epoch': 3}
{'type': 'loss', 'content': 0.05857304111123085, 'timestamp': '2025-10-02 00:56:18.354879', 'step': 25252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:18.408550', 'step': 25252, 'epoch': 3}
{'type': 'loss', 'content': 0.05560905486345291, 'timestamp': '2025-10-02 00:56:18.410940', 'step': 25253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:18.473673', 'step': 25253, 'epoch': 3}
{'type': 'loss', 'content': 0.011596782132983208, 'timestamp': '2025-10-02 00:56:18.484481', 'step': 25254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:18.540736', 'step': 25254, 'epoch': 3}
{'type': 'loss', 'content': 0.04820133000612259, 'timestamp': '2025-10-02 00:56:18.550238', 'step': 25255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:18.609237', 'step': 25255, 'epoch': 3}
{'type': 'loss', 'content': 0.022620635107159615, 'timestamp': '2025-10-02 00:56:18.620184', 'step': 25256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:18.674657', 'step': 25256, 'epoch': 3}
{'type': 'loss', 'content': 0.045273154973983765, 'timestamp': '2025-10-02 00:56:18.677077', 'step': 25257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:18.730991', 'step': 25257, 'epoch': 3}
{'type': 'loss', 'content': 0.11686751991510391, 'timestamp': '2025-10-02 00:56:18.733439', 'step': 25258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:18.788297', 'step': 25258, 'epoch': 3}
{'type': 'loss', 'content': 0.16321219503879547, 'timestamp': '2025-10-02 00:56:18.790613', 'step': 25259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:18.844390', 'step': 25259, 'epoch': 3}
{'type': 'loss', 'content': 0.07536271959543228, 'timestamp': '2025-10-02 00:56:18.851642', 'step': 25260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:18.905609', 'step': 25260, 'epoch': 3}
{'type': 'loss', 'content': 0.024583464488387108, 'timestamp': '2025-10-02 00:56:18.911562', 'step': 25261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:18.965723', 'step': 25261, 'epoch': 3}
{'type': 'loss', 'content': 0.15865372121334076, 'timestamp': '2025-10-02 00:56:18.968124', 'step': 25262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:19.023720', 'step': 25262, 'epoch': 3}
{'type': 'loss', 'content': 0.03518884256482124, 'timestamp': '2025-10-02 00:56:19.026315', 'step': 25263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:19.080384', 'step': 25263, 'epoch': 3}
{'type': 'loss', 'content': 0.09982004016637802, 'timestamp': '2025-10-02 00:56:19.088312', 'step': 25264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:19.141305', 'step': 25264, 'epoch': 3}
{'type': 'loss', 'content': 0.03472035005688667, 'timestamp': '2025-10-02 00:56:19.145281', 'step': 25265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:19.199327', 'step': 25265, 'epoch': 3}
{'type': 'loss', 'content': 0.05087288096547127, 'timestamp': '2025-10-02 00:56:19.201482', 'step': 25266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:19.255534', 'step': 25266, 'epoch': 3}
{'type': 'loss', 'content': 0.15127825736999512, 'timestamp': '2025-10-02 00:56:19.258000', 'step': 25267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:19.313742', 'step': 25267, 'epoch': 3}
{'type': 'loss', 'content': 0.08832108974456787, 'timestamp': '2025-10-02 00:56:19.319392', 'step': 25268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:19.373298', 'step': 25268, 'epoch': 3}
{'type': 'loss', 'content': 0.0123878363519907, 'timestamp': '2025-10-02 00:56:19.380949', 'step': 25269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:19.436619', 'step': 25269, 'epoch': 3}
{'type': 'loss', 'content': 0.06393416970968246, 'timestamp': '2025-10-02 00:56:19.438795', 'step': 25270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:19.493992', 'step': 25270, 'epoch': 3}
{'type': 'loss', 'content': 0.028210295364260674, 'timestamp': '2025-10-02 00:56:19.497641', 'step': 25271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:19.554533', 'step': 25271, 'epoch': 3}
{'type': 'loss', 'content': 0.036630354821681976, 'timestamp': '2025-10-02 00:56:19.562068', 'step': 25272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:19.626138', 'step': 25272, 'epoch': 3}
{'type': 'loss', 'content': 0.02681211568415165, 'timestamp': '2025-10-02 00:56:19.637456', 'step': 25273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:19.697513', 'step': 25273, 'epoch': 3}
{'type': 'loss', 'content': 0.03608986362814903, 'timestamp': '2025-10-02 00:56:19.700267', 'step': 25274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:19.763046', 'step': 25274, 'epoch': 3}
{'type': 'loss', 'content': 0.029942357912659645, 'timestamp': '2025-10-02 00:56:19.773236', 'step': 25275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:19.831260', 'step': 25275, 'epoch': 3}
{'type': 'loss', 'content': 0.05913762375712395, 'timestamp': '2025-10-02 00:56:19.839604', 'step': 25276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:19.895337', 'step': 25276, 'epoch': 3}
{'type': 'loss', 'content': 0.034307900816202164, 'timestamp': '2025-10-02 00:56:19.903024', 'step': 25277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:19.960604', 'step': 25277, 'epoch': 3}
{'type': 'loss', 'content': 0.009215256199240685, 'timestamp': '2025-10-02 00:56:19.968196', 'step': 25278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:20.024897', 'step': 25278, 'epoch': 3}
{'type': 'loss', 'content': 0.036093827337026596, 'timestamp': '2025-10-02 00:56:20.028239', 'step': 25279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:20.087080', 'step': 25279, 'epoch': 3}
{'type': 'loss', 'content': 0.013151977211236954, 'timestamp': '2025-10-02 00:56:20.098065', 'step': 25280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:20.154490', 'step': 25280, 'epoch': 3}
{'type': 'loss', 'content': 0.027988240122795105, 'timestamp': '2025-10-02 00:56:20.164796', 'step': 25281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:20.222353', 'step': 25281, 'epoch': 3}
{'type': 'loss', 'content': 0.014818361029028893, 'timestamp': '2025-10-02 00:56:20.231714', 'step': 25282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:20.287513', 'step': 25282, 'epoch': 3}
{'type': 'loss', 'content': 0.05156000331044197, 'timestamp': '2025-10-02 00:56:20.293489', 'step': 25283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:20.350283', 'step': 25283, 'epoch': 3}
{'type': 'loss', 'content': 0.03871830925345421, 'timestamp': '2025-10-02 00:56:20.357110', 'step': 25284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:20.416246', 'step': 25284, 'epoch': 3}
{'type': 'loss', 'content': 0.01586298458278179, 'timestamp': '2025-10-02 00:56:20.422265', 'step': 25285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:20.483240', 'step': 25285, 'epoch': 3}
{'type': 'loss', 'content': 0.040593020617961884, 'timestamp': '2025-10-02 00:56:20.492761', 'step': 25286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:20.549778', 'step': 25286, 'epoch': 3}
{'type': 'loss', 'content': 0.008730463683605194, 'timestamp': '2025-10-02 00:56:20.557318', 'step': 25287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:20.614686', 'step': 25287, 'epoch': 3}
{'type': 'loss', 'content': 0.04279186204075813, 'timestamp': '2025-10-02 00:56:20.620570', 'step': 25288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:56:20.691465', 'step': 25288, 'epoch': 3}
{'type': 'loss', 'content': 0.041961055248975754, 'timestamp': '2025-10-02 00:56:20.704877', 'step': 25289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:56:20.771453', 'step': 25289, 'epoch': 3}
{'type': 'loss', 'content': 0.0052024913020431995, 'timestamp': '2025-10-02 00:56:20.782118', 'step': 25290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:20.840103', 'step': 25290, 'epoch': 3}
{'type': 'loss', 'content': 0.02987184002995491, 'timestamp': '2025-10-02 00:56:20.843593', 'step': 25291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:56:20.909016', 'step': 25291, 'epoch': 3}
{'type': 'loss', 'content': 0.02766854129731655, 'timestamp': '2025-10-02 00:56:20.920413', 'step': 25292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:20.977961', 'step': 25292, 'epoch': 3}
{'type': 'loss', 'content': 0.11615113168954849, 'timestamp': '2025-10-02 00:56:20.982748', 'step': 25293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:21.039498', 'step': 25293, 'epoch': 3}
{'type': 'loss', 'content': 0.1680726557970047, 'timestamp': '2025-10-02 00:56:21.042239', 'step': 25294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:21.097227', 'step': 25294, 'epoch': 3}
{'type': 'loss', 'content': 0.06946040689945221, 'timestamp': '2025-10-02 00:56:21.103230', 'step': 25295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:21.158616', 'step': 25295, 'epoch': 3}
{'type': 'loss', 'content': 0.016522275283932686, 'timestamp': '2025-10-02 00:56:21.165123', 'step': 25296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:21.219176', 'step': 25296, 'epoch': 3}
{'type': 'loss', 'content': 0.09138929843902588, 'timestamp': '2025-10-02 00:56:21.222784', 'step': 25297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:21.280129', 'step': 25297, 'epoch': 3}
{'type': 'loss', 'content': 0.06468778848648071, 'timestamp': '2025-10-02 00:56:21.283410', 'step': 25298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:21.347365', 'step': 25298, 'epoch': 3}
{'type': 'loss', 'content': 0.025696024298667908, 'timestamp': '2025-10-02 00:56:21.358212', 'step': 25299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:56:21.422657', 'step': 25299, 'epoch': 3}
{'type': 'loss', 'content': 0.07671579718589783, 'timestamp': '2025-10-02 00:56:21.434087', 'step': 25300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:21.491632', 'step': 25300, 'epoch': 3}
{'type': 'loss', 'content': 0.05105273425579071, 'timestamp': '2025-10-02 00:56:21.494447', 'step': 25301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:21.549924', 'step': 25301, 'epoch': 3}
{'type': 'loss', 'content': 0.027620954439044, 'timestamp': '2025-10-02 00:56:21.552238', 'step': 25302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:21.607891', 'step': 25302, 'epoch': 3}
{'type': 'loss', 'content': 0.03402233496308327, 'timestamp': '2025-10-02 00:56:21.611482', 'step': 25303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:21.667151', 'step': 25303, 'epoch': 3}
{'type': 'loss', 'content': 0.02639811299741268, 'timestamp': '2025-10-02 00:56:21.672977', 'step': 25304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:21.734331', 'step': 25304, 'epoch': 3}
{'type': 'loss', 'content': 0.035782940685749054, 'timestamp': '2025-10-02 00:56:21.746084', 'step': 25305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:21.801763', 'step': 25305, 'epoch': 3}
{'type': 'loss', 'content': 0.0731286108493805, 'timestamp': '2025-10-02 00:56:21.804623', 'step': 25306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:56:21.859609', 'step': 25306, 'epoch': 3}
{'type': 'loss', 'content': 0.047063492238521576, 'timestamp': '2025-10-02 00:56:21.863001', 'step': 25307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:21.917823', 'step': 25307, 'epoch': 3}
{'type': 'loss', 'content': 0.05614296719431877, 'timestamp': '2025-10-02 00:56:21.927979', 'step': 25308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:21.982368', 'step': 25308, 'epoch': 3}
{'type': 'loss', 'content': 0.07698819041252136, 'timestamp': '2025-10-02 00:56:21.984792', 'step': 25309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:22.038907', 'step': 25309, 'epoch': 3}
{'type': 'loss', 'content': 0.047609150409698486, 'timestamp': '2025-10-02 00:56:22.041690', 'step': 25310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:22.102123', 'step': 25310, 'epoch': 3}
{'type': 'loss', 'content': 0.01815062388777733, 'timestamp': '2025-10-02 00:56:22.112345', 'step': 25311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:22.169251', 'step': 25311, 'epoch': 3}
{'type': 'loss', 'content': 0.038937944918870926, 'timestamp': '2025-10-02 00:56:22.177565', 'step': 25312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:22.231155', 'step': 25312, 'epoch': 3}
{'type': 'loss', 'content': 0.0906553864479065, 'timestamp': '2025-10-02 00:56:22.233607', 'step': 25313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:22.288487', 'step': 25313, 'epoch': 3}
{'type': 'loss', 'content': 0.035181835293769836, 'timestamp': '2025-10-02 00:56:22.290931', 'step': 25314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:22.346684', 'step': 25314, 'epoch': 3}
{'type': 'loss', 'content': 0.05224242061376572, 'timestamp': '2025-10-02 00:56:22.349120', 'step': 25315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:22.403886', 'step': 25315, 'epoch': 3}
{'type': 'loss', 'content': 0.12440457940101624, 'timestamp': '2025-10-02 00:56:22.409843', 'step': 25316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:22.465995', 'step': 25316, 'epoch': 3}
{'type': 'loss', 'content': 0.014485781081020832, 'timestamp': '2025-10-02 00:56:22.476274', 'step': 25317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:56:22.539132', 'step': 25317, 'epoch': 3}
{'type': 'loss', 'content': 0.041358333081007004, 'timestamp': '2025-10-02 00:56:22.549763', 'step': 25318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:22.605437', 'step': 25318, 'epoch': 3}
{'type': 'loss', 'content': 0.045954301953315735, 'timestamp': '2025-10-02 00:56:22.607633', 'step': 25319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:22.662292', 'step': 25319, 'epoch': 3}
{'type': 'loss', 'content': 0.08477356284856796, 'timestamp': '2025-10-02 00:56:22.668380', 'step': 25320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:22.724382', 'step': 25320, 'epoch': 3}
{'type': 'loss', 'content': 0.034751180559396744, 'timestamp': '2025-10-02 00:56:22.726954', 'step': 25321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:22.781059', 'step': 25321, 'epoch': 3}
{'type': 'loss', 'content': 0.024755829945206642, 'timestamp': '2025-10-02 00:56:22.788700', 'step': 25322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:22.851231', 'step': 25322, 'epoch': 3}
{'type': 'loss', 'content': 0.023776941001415253, 'timestamp': '2025-10-02 00:56:22.862076', 'step': 25323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:22.918207', 'step': 25323, 'epoch': 3}
{'type': 'loss', 'content': 0.08073984086513519, 'timestamp': '2025-10-02 00:56:22.924194', 'step': 25324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:22.977615', 'step': 25324, 'epoch': 3}
{'type': 'loss', 'content': 0.06784429401159286, 'timestamp': '2025-10-02 00:56:22.985323', 'step': 25325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:23.039818', 'step': 25325, 'epoch': 3}
{'type': 'loss', 'content': 0.009732591919600964, 'timestamp': '2025-10-02 00:56:23.042802', 'step': 25326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:56:23.117200', 'step': 25326, 'epoch': 3}
{'type': 'loss', 'content': 0.026670310646295547, 'timestamp': '2025-10-02 00:56:23.130382', 'step': 25327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:23.184986', 'step': 25327, 'epoch': 3}
{'type': 'loss', 'content': 0.00701623922213912, 'timestamp': '2025-10-02 00:56:23.191143', 'step': 25328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:23.253090', 'step': 25328, 'epoch': 3}
{'type': 'loss', 'content': 0.003918871749192476, 'timestamp': '2025-10-02 00:56:23.264841', 'step': 25329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:23.320255', 'step': 25329, 'epoch': 3}
{'type': 'loss', 'content': 0.07244866341352463, 'timestamp': '2025-10-02 00:56:23.329587', 'step': 25330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:23.384226', 'step': 25330, 'epoch': 3}
{'type': 'loss', 'content': 0.046700820326805115, 'timestamp': '2025-10-02 00:56:23.390343', 'step': 25331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:23.445427', 'step': 25331, 'epoch': 3}
{'type': 'loss', 'content': 0.026331814005970955, 'timestamp': '2025-10-02 00:56:23.451472', 'step': 25332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:23.505679', 'step': 25332, 'epoch': 3}
{'type': 'loss', 'content': 0.021037107333540916, 'timestamp': '2025-10-02 00:56:23.511663', 'step': 25333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:23.567492', 'step': 25333, 'epoch': 3}
{'type': 'loss', 'content': 0.04432026669383049, 'timestamp': '2025-10-02 00:56:23.572989', 'step': 25334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:23.627741', 'step': 25334, 'epoch': 3}
{'type': 'loss', 'content': 0.01572047360241413, 'timestamp': '2025-10-02 00:56:23.630507', 'step': 25335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:23.692206', 'step': 25335, 'epoch': 3}
{'type': 'loss', 'content': 0.050379909574985504, 'timestamp': '2025-10-02 00:56:23.703455', 'step': 25336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:23.757146', 'step': 25336, 'epoch': 3}
{'type': 'loss', 'content': 0.07137696444988251, 'timestamp': '2025-10-02 00:56:23.759568', 'step': 25337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:23.815916', 'step': 25337, 'epoch': 3}
{'type': 'loss', 'content': 0.1052609458565712, 'timestamp': '2025-10-02 00:56:23.818448', 'step': 25338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:23.874425', 'step': 25338, 'epoch': 3}
{'type': 'loss', 'content': 0.026973851025104523, 'timestamp': '2025-10-02 00:56:23.876674', 'step': 25339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:56:23.946009', 'step': 25339, 'epoch': 3}
{'type': 'loss', 'content': 0.0027534354012459517, 'timestamp': '2025-10-02 00:56:23.959082', 'step': 25340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:24.014202', 'step': 25340, 'epoch': 3}
{'type': 'loss', 'content': 0.015972841531038284, 'timestamp': '2025-10-02 00:56:24.023555', 'step': 25341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:24.078285', 'step': 25341, 'epoch': 3}
{'type': 'loss', 'content': 0.08123386651277542, 'timestamp': '2025-10-02 00:56:24.080842', 'step': 25342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:24.136500', 'step': 25342, 'epoch': 3}
{'type': 'loss', 'content': 0.06193176284432411, 'timestamp': '2025-10-02 00:56:24.138601', 'step': 25343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:24.193964', 'step': 25343, 'epoch': 3}
{'type': 'loss', 'content': 0.024536389857530594, 'timestamp': '2025-10-02 00:56:24.200175', 'step': 25344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:24.254240', 'step': 25344, 'epoch': 3}
{'type': 'loss', 'content': 0.061882179230451584, 'timestamp': '2025-10-02 00:56:24.256630', 'step': 25345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:24.316377', 'step': 25345, 'epoch': 3}
{'type': 'loss', 'content': 0.02919379435479641, 'timestamp': '2025-10-02 00:56:24.326577', 'step': 25346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:24.380448', 'step': 25346, 'epoch': 3}
{'type': 'loss', 'content': 0.08239417523145676, 'timestamp': '2025-10-02 00:56:24.382762', 'step': 25347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:24.444990', 'step': 25347, 'epoch': 3}
{'type': 'loss', 'content': 0.037351515144109726, 'timestamp': '2025-10-02 00:56:24.456200', 'step': 25348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:24.510808', 'step': 25348, 'epoch': 3}
{'type': 'loss', 'content': 0.005117729306221008, 'timestamp': '2025-10-02 00:56:24.518285', 'step': 25349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:24.575872', 'step': 25349, 'epoch': 3}
{'type': 'loss', 'content': 0.07906437665224075, 'timestamp': '2025-10-02 00:56:24.578555', 'step': 25350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:24.633325', 'step': 25350, 'epoch': 3}
{'type': 'loss', 'content': 0.04837929829955101, 'timestamp': '2025-10-02 00:56:24.635363', 'step': 25351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:24.689848', 'step': 25351, 'epoch': 3}
{'type': 'loss', 'content': 0.07180812209844589, 'timestamp': '2025-10-02 00:56:24.695915', 'step': 25352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:24.751109', 'step': 25352, 'epoch': 3}
{'type': 'loss', 'content': 0.02833493985235691, 'timestamp': '2025-10-02 00:56:24.761389', 'step': 25353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:24.816182', 'step': 25353, 'epoch': 3}
{'type': 'loss', 'content': 0.02461962215602398, 'timestamp': '2025-10-02 00:56:24.825527', 'step': 25354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:24.881083', 'step': 25354, 'epoch': 3}
{'type': 'loss', 'content': 0.04331352561712265, 'timestamp': '2025-10-02 00:56:24.883702', 'step': 25355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:24.938513', 'step': 25355, 'epoch': 3}
{'type': 'loss', 'content': 0.014871111139655113, 'timestamp': '2025-10-02 00:56:24.944485', 'step': 25356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:24.998339', 'step': 25356, 'epoch': 3}
{'type': 'loss', 'content': 0.039738621562719345, 'timestamp': '2025-10-02 00:56:25.000600', 'step': 25357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:25.055098', 'step': 25357, 'epoch': 3}
{'type': 'loss', 'content': 0.08177930861711502, 'timestamp': '2025-10-02 00:56:25.057518', 'step': 25358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:25.113834', 'step': 25358, 'epoch': 3}
{'type': 'loss', 'content': 0.021029310300946236, 'timestamp': '2025-10-02 00:56:25.123320', 'step': 25359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:56:25.192038', 'step': 25359, 'epoch': 3}
{'type': 'loss', 'content': 0.08231949061155319, 'timestamp': '2025-10-02 00:56:25.204753', 'step': 25360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:25.259058', 'step': 25360, 'epoch': 3}
{'type': 'loss', 'content': 0.03638303652405739, 'timestamp': '2025-10-02 00:56:25.261265', 'step': 25361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:56:25.334440', 'step': 25361, 'epoch': 3}
{'type': 'loss', 'content': 0.026241900399327278, 'timestamp': '2025-10-02 00:56:25.347093', 'step': 25362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:25.409216', 'step': 25362, 'epoch': 3}
{'type': 'loss', 'content': 0.01478695310652256, 'timestamp': '2025-10-02 00:56:25.419696', 'step': 25363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:25.473942', 'step': 25363, 'epoch': 3}
{'type': 'loss', 'content': 0.030115222558379173, 'timestamp': '2025-10-02 00:56:25.480126', 'step': 25364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:25.533741', 'step': 25364, 'epoch': 3}
{'type': 'loss', 'content': 0.052915409207344055, 'timestamp': '2025-10-02 00:56:25.536142', 'step': 25365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:25.590335', 'step': 25365, 'epoch': 3}
{'type': 'loss', 'content': 0.04953049495816231, 'timestamp': '2025-10-02 00:56:25.599634', 'step': 25366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:25.654508', 'step': 25366, 'epoch': 3}
{'type': 'loss', 'content': 0.07459833472967148, 'timestamp': '2025-10-02 00:56:25.660281', 'step': 25367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:25.715431', 'step': 25367, 'epoch': 3}
{'type': 'loss', 'content': 0.033204853534698486, 'timestamp': '2025-10-02 00:56:25.721258', 'step': 25368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:56:25.788369', 'step': 25368, 'epoch': 3}
{'type': 'loss', 'content': 0.01327773742377758, 'timestamp': '2025-10-02 00:56:25.801348', 'step': 25369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:25.862847', 'step': 25369, 'epoch': 3}
{'type': 'loss', 'content': 0.020721012726426125, 'timestamp': '2025-10-02 00:56:25.873364', 'step': 25370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:25.928508', 'step': 25370, 'epoch': 3}
{'type': 'loss', 'content': 0.04007357731461525, 'timestamp': '2025-10-02 00:56:25.936026', 'step': 25371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:25.991331', 'step': 25371, 'epoch': 3}
{'type': 'loss', 'content': 0.0690467581152916, 'timestamp': '2025-10-02 00:56:25.997888', 'step': 25372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:26.061488', 'step': 25372, 'epoch': 3}
{'type': 'loss', 'content': 0.008125172927975655, 'timestamp': '2025-10-02 00:56:26.067293', 'step': 25373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:26.121390', 'step': 25373, 'epoch': 3}
{'type': 'loss', 'content': 0.06617186963558197, 'timestamp': '2025-10-02 00:56:26.123833', 'step': 25374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:26.178466', 'step': 25374, 'epoch': 3}
{'type': 'loss', 'content': 0.08071213215589523, 'timestamp': '2025-10-02 00:56:26.180731', 'step': 25375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:26.234866', 'step': 25375, 'epoch': 3}
{'type': 'loss', 'content': 0.03055964969098568, 'timestamp': '2025-10-02 00:56:26.241648', 'step': 25376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:26.295096', 'step': 25376, 'epoch': 3}
{'type': 'loss', 'content': 0.10087505728006363, 'timestamp': '2025-10-02 00:56:26.302516', 'step': 25377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:26.356515', 'step': 25377, 'epoch': 3}
{'type': 'loss', 'content': 0.03590364381670952, 'timestamp': '2025-10-02 00:56:26.362571', 'step': 25378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:26.417228', 'step': 25378, 'epoch': 3}
{'type': 'loss', 'content': 0.03127312287688255, 'timestamp': '2025-10-02 00:56:26.420108', 'step': 25379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:26.474831', 'step': 25379, 'epoch': 3}
{'type': 'loss', 'content': 0.02377641201019287, 'timestamp': '2025-10-02 00:56:26.481278', 'step': 25380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:26.535995', 'step': 25380, 'epoch': 3}
{'type': 'loss', 'content': 0.03627568855881691, 'timestamp': '2025-10-02 00:56:26.538293', 'step': 25381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:26.592766', 'step': 25381, 'epoch': 3}
{'type': 'loss', 'content': 0.038729432970285416, 'timestamp': '2025-10-02 00:56:26.595191', 'step': 25382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:26.649851', 'step': 25382, 'epoch': 3}
{'type': 'loss', 'content': 0.016017718240618706, 'timestamp': '2025-10-02 00:56:26.652334', 'step': 25383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:26.706240', 'step': 25383, 'epoch': 3}
{'type': 'loss', 'content': 0.13144470751285553, 'timestamp': '2025-10-02 00:56:26.712234', 'step': 25384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:26.768164', 'step': 25384, 'epoch': 3}
{'type': 'loss', 'content': 0.07201708853244781, 'timestamp': '2025-10-02 00:56:26.774123', 'step': 25385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:26.828501', 'step': 25385, 'epoch': 3}
{'type': 'loss', 'content': 0.1725623607635498, 'timestamp': '2025-10-02 00:56:26.830753', 'step': 25386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:26.887205', 'step': 25386, 'epoch': 3}
{'type': 'loss', 'content': 0.044960588216781616, 'timestamp': '2025-10-02 00:56:26.889522', 'step': 25387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:26.950531', 'step': 25387, 'epoch': 3}
{'type': 'loss', 'content': 0.023137368261814117, 'timestamp': '2025-10-02 00:56:26.961736', 'step': 25388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:56:27.015055', 'step': 25388, 'epoch': 3}
{'type': 'loss', 'content': 0.05297717824578285, 'timestamp': '2025-10-02 00:56:27.017406', 'step': 25389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:27.077643', 'step': 25389, 'epoch': 3}
{'type': 'loss', 'content': 0.02577936090528965, 'timestamp': '2025-10-02 00:56:27.087806', 'step': 25390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:27.143646', 'step': 25390, 'epoch': 3}
{'type': 'loss', 'content': 0.060410089790821075, 'timestamp': '2025-10-02 00:56:27.153106', 'step': 25391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:27.208424', 'step': 25391, 'epoch': 3}
{'type': 'loss', 'content': 0.05173416808247566, 'timestamp': '2025-10-02 00:56:27.215174', 'step': 25392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:27.269750', 'step': 25392, 'epoch': 3}
{'type': 'loss', 'content': 0.08727334439754486, 'timestamp': '2025-10-02 00:56:27.272248', 'step': 25393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:27.327278', 'step': 25393, 'epoch': 3}
{'type': 'loss', 'content': 0.12986427545547485, 'timestamp': '2025-10-02 00:56:27.329062', 'step': 25394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:27.385204', 'step': 25394, 'epoch': 3}
{'type': 'loss', 'content': 0.05117707699537277, 'timestamp': '2025-10-02 00:56:27.391617', 'step': 25395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:27.447291', 'step': 25395, 'epoch': 3}
{'type': 'loss', 'content': 0.021062424406409264, 'timestamp': '2025-10-02 00:56:27.457595', 'step': 25396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:56:27.530295', 'step': 25396, 'epoch': 3}
{'type': 'loss', 'content': 0.007919859141111374, 'timestamp': '2025-10-02 00:56:27.544830', 'step': 25397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:27.601074', 'step': 25397, 'epoch': 3}
{'type': 'loss', 'content': 0.051754940301179886, 'timestamp': '2025-10-02 00:56:27.603449', 'step': 25398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:27.658521', 'step': 25398, 'epoch': 3}
{'type': 'loss', 'content': 0.04443598911166191, 'timestamp': '2025-10-02 00:56:27.660863', 'step': 25399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:27.717327', 'step': 25399, 'epoch': 3}
{'type': 'loss', 'content': 0.01981947012245655, 'timestamp': '2025-10-02 00:56:27.727646', 'step': 25400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:27.781755', 'step': 25400, 'epoch': 3}
{'type': 'loss', 'content': 0.0671999379992485, 'timestamp': '2025-10-02 00:56:27.784027', 'step': 25401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:27.838366', 'step': 25401, 'epoch': 3}
{'type': 'loss', 'content': 0.10411194711923599, 'timestamp': '2025-10-02 00:56:27.841423', 'step': 25402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:27.897100', 'step': 25402, 'epoch': 3}
{'type': 'loss', 'content': 0.06030642241239548, 'timestamp': '2025-10-02 00:56:27.906604', 'step': 25403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:27.969302', 'step': 25403, 'epoch': 3}
{'type': 'loss', 'content': 0.015465734526515007, 'timestamp': '2025-10-02 00:56:27.980881', 'step': 25404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:28.035286', 'step': 25404, 'epoch': 3}
{'type': 'loss', 'content': 0.03865741565823555, 'timestamp': '2025-10-02 00:56:28.037710', 'step': 25405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:28.094951', 'step': 25405, 'epoch': 3}
{'type': 'loss', 'content': 0.026393746957182884, 'timestamp': '2025-10-02 00:56:28.097465', 'step': 25406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:28.153115', 'step': 25406, 'epoch': 3}
{'type': 'loss', 'content': 0.05069826915860176, 'timestamp': '2025-10-02 00:56:28.155490', 'step': 25407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:28.214990', 'step': 25407, 'epoch': 3}
{'type': 'loss', 'content': 0.00993835274130106, 'timestamp': '2025-10-02 00:56:28.225914', 'step': 25408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:28.281908', 'step': 25408, 'epoch': 3}
{'type': 'loss', 'content': 0.020755348727107048, 'timestamp': '2025-10-02 00:56:28.284336', 'step': 25409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:28.339707', 'step': 25409, 'epoch': 3}
{'type': 'loss', 'content': 0.021211707964539528, 'timestamp': '2025-10-02 00:56:28.341685', 'step': 25410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:28.397700', 'step': 25410, 'epoch': 3}
{'type': 'loss', 'content': 0.04693125933408737, 'timestamp': '2025-10-02 00:56:28.399702', 'step': 25411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:28.455718', 'step': 25411, 'epoch': 3}
{'type': 'loss', 'content': 0.01747380942106247, 'timestamp': '2025-10-02 00:56:28.462335', 'step': 25412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:28.516361', 'step': 25412, 'epoch': 3}
{'type': 'loss', 'content': 0.0586884580552578, 'timestamp': '2025-10-02 00:56:28.520247', 'step': 25413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:28.575721', 'step': 25413, 'epoch': 3}
{'type': 'loss', 'content': 0.022140180692076683, 'timestamp': '2025-10-02 00:56:28.580504', 'step': 25414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:28.636803', 'step': 25414, 'epoch': 3}
{'type': 'loss', 'content': 0.08885850757360458, 'timestamp': '2025-10-02 00:56:28.642344', 'step': 25415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:28.697312', 'step': 25415, 'epoch': 3}
{'type': 'loss', 'content': 0.020581865683197975, 'timestamp': '2025-10-02 00:56:28.707067', 'step': 25416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:28.760871', 'step': 25416, 'epoch': 3}
{'type': 'loss', 'content': 0.04990999028086662, 'timestamp': '2025-10-02 00:56:28.763931', 'step': 25417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:28.819428', 'step': 25417, 'epoch': 3}
{'type': 'loss', 'content': 0.03579242154955864, 'timestamp': '2025-10-02 00:56:28.828319', 'step': 25418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:28.883238', 'step': 25418, 'epoch': 3}
{'type': 'loss', 'content': 0.017921648919582367, 'timestamp': '2025-10-02 00:56:28.885343', 'step': 25419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:28.939860', 'step': 25419, 'epoch': 3}
{'type': 'loss', 'content': 0.049866121262311935, 'timestamp': '2025-10-02 00:56:28.946244', 'step': 25420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:29.001776', 'step': 25420, 'epoch': 3}
{'type': 'loss', 'content': 0.03790922835469246, 'timestamp': '2025-10-02 00:56:29.004150', 'step': 25421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:29.061432', 'step': 25421, 'epoch': 3}
{'type': 'loss', 'content': 0.013091991655528545, 'timestamp': '2025-10-02 00:56:29.067212', 'step': 25422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:29.124041', 'step': 25422, 'epoch': 3}
{'type': 'loss', 'content': 0.048380643129348755, 'timestamp': '2025-10-02 00:56:29.129788', 'step': 25423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:29.186145', 'step': 25423, 'epoch': 3}
{'type': 'loss', 'content': 0.03826211765408516, 'timestamp': '2025-10-02 00:56:29.194378', 'step': 25424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:29.250330', 'step': 25424, 'epoch': 3}
{'type': 'loss', 'content': 0.011009559966623783, 'timestamp': '2025-10-02 00:56:29.255636', 'step': 25425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:56:29.320444', 'step': 25425, 'epoch': 3}
{'type': 'loss', 'content': 0.05442938581109047, 'timestamp': '2025-10-02 00:56:29.331124', 'step': 25426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:29.389221', 'step': 25426, 'epoch': 3}
{'type': 'loss', 'content': 0.06763580441474915, 'timestamp': '2025-10-02 00:56:29.392944', 'step': 25427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:29.449782', 'step': 25427, 'epoch': 3}
{'type': 'loss', 'content': 0.047347910702228546, 'timestamp': '2025-10-02 00:56:29.456539', 'step': 25428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:29.516257', 'step': 25428, 'epoch': 3}
{'type': 'loss', 'content': 0.00870638806372881, 'timestamp': '2025-10-02 00:56:29.518609', 'step': 25429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:29.577163', 'step': 25429, 'epoch': 3}
{'type': 'loss', 'content': 0.014704229310154915, 'timestamp': '2025-10-02 00:56:29.586389', 'step': 25430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:29.642644', 'step': 25430, 'epoch': 3}
{'type': 'loss', 'content': 0.0640619695186615, 'timestamp': '2025-10-02 00:56:29.651730', 'step': 25431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:29.709839', 'step': 25431, 'epoch': 3}
{'type': 'loss', 'content': 0.20619209110736847, 'timestamp': '2025-10-02 00:56:29.716554', 'step': 25432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:29.772099', 'step': 25432, 'epoch': 3}
{'type': 'loss', 'content': 0.0023183708544820547, 'timestamp': '2025-10-02 00:56:29.779719', 'step': 25433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:29.835853', 'step': 25433, 'epoch': 3}
{'type': 'loss', 'content': 0.02452775277197361, 'timestamp': '2025-10-02 00:56:29.839401', 'step': 25434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:29.897759', 'step': 25434, 'epoch': 3}
{'type': 'loss', 'content': 0.06644035130739212, 'timestamp': '2025-10-02 00:56:29.903166', 'step': 25435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:29.973126', 'step': 25435, 'epoch': 3}
{'type': 'loss', 'content': 0.05852833017706871, 'timestamp': '2025-10-02 00:56:29.979656', 'step': 25436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:30.036755', 'step': 25436, 'epoch': 3}
{'type': 'loss', 'content': 0.014552940614521503, 'timestamp': '2025-10-02 00:56:30.045988', 'step': 25437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:30.105653', 'step': 25437, 'epoch': 3}
{'type': 'loss', 'content': 0.029920537024736404, 'timestamp': '2025-10-02 00:56:30.108383', 'step': 25438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:30.165366', 'step': 25438, 'epoch': 3}
{'type': 'loss', 'content': 0.0349402017891407, 'timestamp': '2025-10-02 00:56:30.171188', 'step': 25439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:30.228421', 'step': 25439, 'epoch': 3}
{'type': 'loss', 'content': 0.017698343843221664, 'timestamp': '2025-10-02 00:56:30.234476', 'step': 25440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:30.288702', 'step': 25440, 'epoch': 3}
{'type': 'loss', 'content': 0.09131558984518051, 'timestamp': '2025-10-02 00:56:30.290893', 'step': 25441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:30.345323', 'step': 25441, 'epoch': 3}
{'type': 'loss', 'content': 0.08094565570354462, 'timestamp': '2025-10-02 00:56:30.347713', 'step': 25442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:30.402422', 'step': 25442, 'epoch': 3}
{'type': 'loss', 'content': 0.05467292666435242, 'timestamp': '2025-10-02 00:56:30.404719', 'step': 25443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:30.460258', 'step': 25443, 'epoch': 3}
{'type': 'loss', 'content': 0.03355727717280388, 'timestamp': '2025-10-02 00:56:30.466734', 'step': 25444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:30.522288', 'step': 25444, 'epoch': 3}
{'type': 'loss', 'content': 0.05574162304401398, 'timestamp': '2025-10-02 00:56:30.525096', 'step': 25445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:30.580317', 'step': 25445, 'epoch': 3}
{'type': 'loss', 'content': 0.054480280727148056, 'timestamp': '2025-10-02 00:56:30.582757', 'step': 25446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:30.637229', 'step': 25446, 'epoch': 3}
{'type': 'loss', 'content': 0.04878335818648338, 'timestamp': '2025-10-02 00:56:30.642895', 'step': 25447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:30.698636', 'step': 25447, 'epoch': 3}
{'type': 'loss', 'content': 0.07270044833421707, 'timestamp': '2025-10-02 00:56:30.708322', 'step': 25448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:30.762876', 'step': 25448, 'epoch': 3}
{'type': 'loss', 'content': 0.1147765964269638, 'timestamp': '2025-10-02 00:56:30.765112', 'step': 25449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:30.819362', 'step': 25449, 'epoch': 3}
{'type': 'loss', 'content': 0.044118862599134445, 'timestamp': '2025-10-02 00:56:30.821637', 'step': 25450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:30.876870', 'step': 25450, 'epoch': 3}
{'type': 'loss', 'content': 0.05828909948468208, 'timestamp': '2025-10-02 00:56:30.879765', 'step': 25451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:30.938933', 'step': 25451, 'epoch': 3}
{'type': 'loss', 'content': 0.02619442716240883, 'timestamp': '2025-10-02 00:56:30.949900', 'step': 25452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:31.004643', 'step': 25452, 'epoch': 3}
{'type': 'loss', 'content': 0.15908534824848175, 'timestamp': '2025-10-02 00:56:31.006930', 'step': 25453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:31.062179', 'step': 25453, 'epoch': 3}
{'type': 'loss', 'content': 0.034764666110277176, 'timestamp': '2025-10-02 00:56:31.064626', 'step': 25454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:56:31.133297', 'step': 25454, 'epoch': 3}
{'type': 'loss', 'content': 0.03649723902344704, 'timestamp': '2025-10-02 00:56:31.145710', 'step': 25455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:31.201767', 'step': 25455, 'epoch': 3}
{'type': 'loss', 'content': 0.025956880301237106, 'timestamp': '2025-10-02 00:56:31.207848', 'step': 25456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:31.264100', 'step': 25456, 'epoch': 3}
{'type': 'loss', 'content': 0.09046310186386108, 'timestamp': '2025-10-02 00:56:31.266986', 'step': 25457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:31.322706', 'step': 25457, 'epoch': 3}
{'type': 'loss', 'content': 0.07501812279224396, 'timestamp': '2025-10-02 00:56:31.325654', 'step': 25458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:31.381081', 'step': 25458, 'epoch': 3}
{'type': 'loss', 'content': 0.06104730814695358, 'timestamp': '2025-10-02 00:56:31.383765', 'step': 25459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:31.440129', 'step': 25459, 'epoch': 3}
{'type': 'loss', 'content': 0.05282164365053177, 'timestamp': '2025-10-02 00:56:31.446302', 'step': 25460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:31.500609', 'step': 25460, 'epoch': 3}
{'type': 'loss', 'content': 0.026899531483650208, 'timestamp': '2025-10-02 00:56:31.506313', 'step': 25461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:31.560216', 'step': 25461, 'epoch': 3}
{'type': 'loss', 'content': 0.08049677312374115, 'timestamp': '2025-10-02 00:56:31.562567', 'step': 25462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:31.616947', 'step': 25462, 'epoch': 3}
{'type': 'loss', 'content': 0.05596836283802986, 'timestamp': '2025-10-02 00:56:31.619506', 'step': 25463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:31.674003', 'step': 25463, 'epoch': 3}
{'type': 'loss', 'content': 0.07396014034748077, 'timestamp': '2025-10-02 00:56:31.679924', 'step': 25464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:31.734372', 'step': 25464, 'epoch': 3}
{'type': 'loss', 'content': 0.09429650008678436, 'timestamp': '2025-10-02 00:56:31.736561', 'step': 25465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:31.791357', 'step': 25465, 'epoch': 3}
{'type': 'loss', 'content': 0.016175106167793274, 'timestamp': '2025-10-02 00:56:31.798624', 'step': 25466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:31.853933', 'step': 25466, 'epoch': 3}
{'type': 'loss', 'content': 0.0016773630632087588, 'timestamp': '2025-10-02 00:56:31.856458', 'step': 25467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:31.911675', 'step': 25467, 'epoch': 3}
{'type': 'loss', 'content': 0.03439764305949211, 'timestamp': '2025-10-02 00:56:31.918169', 'step': 25468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:56:31.978823', 'step': 25468, 'epoch': 3}
{'type': 'loss', 'content': 0.025197550654411316, 'timestamp': '2025-10-02 00:56:31.990435', 'step': 25469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:32.045846', 'step': 25469, 'epoch': 3}
{'type': 'loss', 'content': 0.11997785419225693, 'timestamp': '2025-10-02 00:56:32.048631', 'step': 25470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:32.103182', 'step': 25470, 'epoch': 3}
{'type': 'loss', 'content': 0.015756234526634216, 'timestamp': '2025-10-02 00:56:32.105842', 'step': 25471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:32.160354', 'step': 25471, 'epoch': 3}
{'type': 'loss', 'content': 0.07782714813947678, 'timestamp': '2025-10-02 00:56:32.167721', 'step': 25472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:32.222024', 'step': 25472, 'epoch': 3}
{'type': 'loss', 'content': 0.06239210069179535, 'timestamp': '2025-10-02 00:56:32.224344', 'step': 25473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:32.278656', 'step': 25473, 'epoch': 3}
{'type': 'loss', 'content': 0.02675386145710945, 'timestamp': '2025-10-02 00:56:32.281116', 'step': 25474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:32.335697', 'step': 25474, 'epoch': 3}
{'type': 'loss', 'content': 0.12480415403842926, 'timestamp': '2025-10-02 00:56:32.338235', 'step': 25475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:32.394052', 'step': 25475, 'epoch': 3}
{'type': 'loss', 'content': 0.046093668788671494, 'timestamp': '2025-10-02 00:56:32.400013', 'step': 25476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:32.455247', 'step': 25476, 'epoch': 3}
{'type': 'loss', 'content': 0.03473363071680069, 'timestamp': '2025-10-02 00:56:32.457675', 'step': 25477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:32.512568', 'step': 25477, 'epoch': 3}
{'type': 'loss', 'content': 0.0707276314496994, 'timestamp': '2025-10-02 00:56:32.515016', 'step': 25478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:32.569318', 'step': 25478, 'epoch': 3}
{'type': 'loss', 'content': 0.02776307426393032, 'timestamp': '2025-10-02 00:56:32.578368', 'step': 25479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:32.634285', 'step': 25479, 'epoch': 3}
{'type': 'loss', 'content': 0.10606077313423157, 'timestamp': '2025-10-02 00:56:32.641308', 'step': 25480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:32.696023', 'step': 25480, 'epoch': 3}
{'type': 'loss', 'content': 0.02385745942592621, 'timestamp': '2025-10-02 00:56:32.698404', 'step': 25481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:32.754063', 'step': 25481, 'epoch': 3}
{'type': 'loss', 'content': 0.022223303094506264, 'timestamp': '2025-10-02 00:56:32.763306', 'step': 25482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:32.819680', 'step': 25482, 'epoch': 3}
{'type': 'loss', 'content': 0.11016059666872025, 'timestamp': '2025-10-02 00:56:32.822269', 'step': 25483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:32.878033', 'step': 25483, 'epoch': 3}
{'type': 'loss', 'content': 0.018789149820804596, 'timestamp': '2025-10-02 00:56:32.885380', 'step': 25484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:32.940347', 'step': 25484, 'epoch': 3}
{'type': 'loss', 'content': 0.02481437474489212, 'timestamp': '2025-10-02 00:56:32.943132', 'step': 25485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:32.997403', 'step': 25485, 'epoch': 3}
{'type': 'loss', 'content': 0.08313044160604477, 'timestamp': '2025-10-02 00:56:32.999815', 'step': 25486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:33.056959', 'step': 25486, 'epoch': 3}
{'type': 'loss', 'content': 0.07466664165258408, 'timestamp': '2025-10-02 00:56:33.062714', 'step': 25487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:33.119028', 'step': 25487, 'epoch': 3}
{'type': 'loss', 'content': 0.0029634376987814903, 'timestamp': '2025-10-02 00:56:33.126584', 'step': 25488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:33.180073', 'step': 25488, 'epoch': 3}
{'type': 'loss', 'content': 0.08892952650785446, 'timestamp': '2025-10-02 00:56:33.182238', 'step': 25489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:33.238097', 'step': 25489, 'epoch': 3}
{'type': 'loss', 'content': 0.018001116812229156, 'timestamp': '2025-10-02 00:56:33.247650', 'step': 25490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:33.303330', 'step': 25490, 'epoch': 3}
{'type': 'loss', 'content': 0.035843249410390854, 'timestamp': '2025-10-02 00:56:33.305843', 'step': 25491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:33.365810', 'step': 25491, 'epoch': 3}
{'type': 'loss', 'content': 0.07625538855791092, 'timestamp': '2025-10-02 00:56:33.376677', 'step': 25492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:33.431238', 'step': 25492, 'epoch': 3}
{'type': 'loss', 'content': 0.029325494542717934, 'timestamp': '2025-10-02 00:56:33.433884', 'step': 25493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:33.489785', 'step': 25493, 'epoch': 3}
{'type': 'loss', 'content': 0.10893110930919647, 'timestamp': '2025-10-02 00:56:33.492367', 'step': 25494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:33.546320', 'step': 25494, 'epoch': 3}
{'type': 'loss', 'content': 0.0667354017496109, 'timestamp': '2025-10-02 00:56:33.548878', 'step': 25495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:33.610598', 'step': 25495, 'epoch': 3}
{'type': 'loss', 'content': 0.013682805001735687, 'timestamp': '2025-10-02 00:56:33.621952', 'step': 25496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:33.675835', 'step': 25496, 'epoch': 3}
{'type': 'loss', 'content': 0.08810915052890778, 'timestamp': '2025-10-02 00:56:33.678404', 'step': 25497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:33.733363', 'step': 25497, 'epoch': 3}
{'type': 'loss', 'content': 0.023535633459687233, 'timestamp': '2025-10-02 00:56:33.736153', 'step': 25498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:33.791004', 'step': 25498, 'epoch': 3}
{'type': 'loss', 'content': 0.04605823755264282, 'timestamp': '2025-10-02 00:56:33.794343', 'step': 25499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:33.851024', 'step': 25499, 'epoch': 3}
{'type': 'loss', 'content': 0.06065661087632179, 'timestamp': '2025-10-02 00:56:33.858708', 'step': 25500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 25500', 'timestamp': '2025-10-02 00:56:34.318850', 'step': 25500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:34.385542', 'step': 25500, 'epoch': 3}
{'type': 'loss', 'content': 0.022808706387877464, 'timestamp': '2025-10-02 00:56:34.390173', 'step': 25501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:34.445952', 'step': 25501, 'epoch': 3}
{'type': 'loss', 'content': 0.11829004436731339, 'timestamp': '2025-10-02 00:56:34.448282', 'step': 25502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:34.502929', 'step': 25502, 'epoch': 3}
{'type': 'loss', 'content': 0.025060441344976425, 'timestamp': '2025-10-02 00:56:34.505382', 'step': 25503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:34.561311', 'step': 25503, 'epoch': 3}
{'type': 'loss', 'content': 0.009243414737284184, 'timestamp': '2025-10-02 00:56:34.572775', 'step': 25504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:56:34.632327', 'step': 25504, 'epoch': 3}
{'type': 'loss', 'content': 0.08293136954307556, 'timestamp': '2025-10-02 00:56:34.634680', 'step': 25505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:34.689685', 'step': 25505, 'epoch': 3}
{'type': 'loss', 'content': 0.05318191275000572, 'timestamp': '2025-10-02 00:56:34.692206', 'step': 25506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:34.747470', 'step': 25506, 'epoch': 3}
{'type': 'loss', 'content': 0.05933481827378273, 'timestamp': '2025-10-02 00:56:34.749909', 'step': 25507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:34.804640', 'step': 25507, 'epoch': 3}
{'type': 'loss', 'content': 0.03723646700382233, 'timestamp': '2025-10-02 00:56:34.810794', 'step': 25508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:34.871346', 'step': 25508, 'epoch': 3}
{'type': 'loss', 'content': 0.1260892003774643, 'timestamp': '2025-10-02 00:56:34.873744', 'step': 25509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:34.928732', 'step': 25509, 'epoch': 3}
{'type': 'loss', 'content': 0.003909936174750328, 'timestamp': '2025-10-02 00:56:34.930930', 'step': 25510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:34.987194', 'step': 25510, 'epoch': 3}
{'type': 'loss', 'content': 0.050993986427783966, 'timestamp': '2025-10-02 00:56:34.989588', 'step': 25511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:35.047361', 'step': 25511, 'epoch': 3}
{'type': 'loss', 'content': 0.03437026962637901, 'timestamp': '2025-10-02 00:56:35.053059', 'step': 25512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:35.106725', 'step': 25512, 'epoch': 3}
{'type': 'loss', 'content': 0.12431097030639648, 'timestamp': '2025-10-02 00:56:35.109172', 'step': 25513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:35.164072', 'step': 25513, 'epoch': 3}
{'type': 'loss', 'content': 0.10228559374809265, 'timestamp': '2025-10-02 00:56:35.166500', 'step': 25514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:35.220945', 'step': 25514, 'epoch': 3}
{'type': 'loss', 'content': 0.09383773058652878, 'timestamp': '2025-10-02 00:56:35.227431', 'step': 25515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:35.282322', 'step': 25515, 'epoch': 3}
{'type': 'loss', 'content': 0.10081125795841217, 'timestamp': '2025-10-02 00:56:35.288396', 'step': 25516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:35.342775', 'step': 25516, 'epoch': 3}
{'type': 'loss', 'content': 0.02776246704161167, 'timestamp': '2025-10-02 00:56:35.344944', 'step': 25517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:35.400636', 'step': 25517, 'epoch': 3}
{'type': 'loss', 'content': 0.03859495744109154, 'timestamp': '2025-10-02 00:56:35.409332', 'step': 25518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:35.464191', 'step': 25518, 'epoch': 3}
{'type': 'loss', 'content': 0.14385837316513062, 'timestamp': '2025-10-02 00:56:35.466507', 'step': 25519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:35.521835', 'step': 25519, 'epoch': 3}
{'type': 'loss', 'content': 0.024963192641735077, 'timestamp': '2025-10-02 00:56:35.528126', 'step': 25520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:35.585241', 'step': 25520, 'epoch': 3}
{'type': 'loss', 'content': 0.06766006350517273, 'timestamp': '2025-10-02 00:56:35.596157', 'step': 25521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:35.651305', 'step': 25521, 'epoch': 3}
{'type': 'loss', 'content': 0.02763756550848484, 'timestamp': '2025-10-02 00:56:35.653289', 'step': 25522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:35.709923', 'step': 25522, 'epoch': 3}
{'type': 'loss', 'content': 0.017027903348207474, 'timestamp': '2025-10-02 00:56:35.712238', 'step': 25523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:56:35.774162', 'step': 25523, 'epoch': 3}
{'type': 'loss', 'content': 0.018997900187969208, 'timestamp': '2025-10-02 00:56:35.785551', 'step': 25524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:35.839743', 'step': 25524, 'epoch': 3}
{'type': 'loss', 'content': 0.1848314106464386, 'timestamp': '2025-10-02 00:56:35.842065', 'step': 25525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:35.898207', 'step': 25525, 'epoch': 3}
{'type': 'loss', 'content': 0.006954742129892111, 'timestamp': '2025-10-02 00:56:35.907732', 'step': 25526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:35.964780', 'step': 25526, 'epoch': 3}
{'type': 'loss', 'content': 0.052537377923727036, 'timestamp': '2025-10-02 00:56:35.969887', 'step': 25527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:56:36.037462', 'step': 25527, 'epoch': 3}
{'type': 'loss', 'content': 0.029766004532575607, 'timestamp': '2025-10-02 00:56:36.050172', 'step': 25528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:56:36.125743', 'step': 25528, 'epoch': 3}
{'type': 'loss', 'content': 0.0075735533609986305, 'timestamp': '2025-10-02 00:56:36.140865', 'step': 25529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:36.197104', 'step': 25529, 'epoch': 3}
{'type': 'loss', 'content': 0.029399940744042397, 'timestamp': '2025-10-02 00:56:36.204088', 'step': 25530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:36.259638', 'step': 25530, 'epoch': 3}
{'type': 'loss', 'content': 0.11621173471212387, 'timestamp': '2025-10-02 00:56:36.262161', 'step': 25531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:36.317188', 'step': 25531, 'epoch': 3}
{'type': 'loss', 'content': 0.0557820200920105, 'timestamp': '2025-10-02 00:56:36.323221', 'step': 25532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:36.378060', 'step': 25532, 'epoch': 3}
{'type': 'loss', 'content': 0.07399245351552963, 'timestamp': '2025-10-02 00:56:36.380494', 'step': 25533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:36.435598', 'step': 25533, 'epoch': 3}
{'type': 'loss', 'content': 0.052033163607120514, 'timestamp': '2025-10-02 00:56:36.440832', 'step': 25534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:36.496647', 'step': 25534, 'epoch': 3}
{'type': 'loss', 'content': 0.02744886837899685, 'timestamp': '2025-10-02 00:56:36.503368', 'step': 25535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:36.558303', 'step': 25535, 'epoch': 3}
{'type': 'loss', 'content': 0.08813680708408356, 'timestamp': '2025-10-02 00:56:36.563912', 'step': 25536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:36.618380', 'step': 25536, 'epoch': 3}
{'type': 'loss', 'content': 0.01357156690210104, 'timestamp': '2025-10-02 00:56:36.627333', 'step': 25537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:36.682686', 'step': 25537, 'epoch': 3}
{'type': 'loss', 'content': 0.050168365240097046, 'timestamp': '2025-10-02 00:56:36.691335', 'step': 25538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:36.747875', 'step': 25538, 'epoch': 3}
{'type': 'loss', 'content': 0.03556244075298309, 'timestamp': '2025-10-02 00:56:36.753225', 'step': 25539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:36.813329', 'step': 25539, 'epoch': 3}
{'type': 'loss', 'content': 0.024081822484731674, 'timestamp': '2025-10-02 00:56:36.824271', 'step': 25540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:36.880387', 'step': 25540, 'epoch': 3}
{'type': 'loss', 'content': 0.03679370880126953, 'timestamp': '2025-10-02 00:56:36.882612', 'step': 25541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:36.939538', 'step': 25541, 'epoch': 3}
{'type': 'loss', 'content': 0.06808880716562271, 'timestamp': '2025-10-02 00:56:36.941912', 'step': 25542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:36.997445', 'step': 25542, 'epoch': 3}
{'type': 'loss', 'content': 0.05075546354055405, 'timestamp': '2025-10-02 00:56:36.999743', 'step': 25543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:37.055409', 'step': 25543, 'epoch': 3}
{'type': 'loss', 'content': 0.02767784334719181, 'timestamp': '2025-10-02 00:56:37.063158', 'step': 25544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:37.124000', 'step': 25544, 'epoch': 3}
{'type': 'loss', 'content': 0.03348080813884735, 'timestamp': '2025-10-02 00:56:37.135277', 'step': 25545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:37.191021', 'step': 25545, 'epoch': 3}
{'type': 'loss', 'content': 0.028258847072720528, 'timestamp': '2025-10-02 00:56:37.193257', 'step': 25546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:56:37.263944', 'step': 25546, 'epoch': 3}
{'type': 'loss', 'content': 0.018940620124340057, 'timestamp': '2025-10-02 00:56:37.276366', 'step': 25547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:37.340089', 'step': 25547, 'epoch': 3}
{'type': 'loss', 'content': 0.0491371788084507, 'timestamp': '2025-10-02 00:56:37.346203', 'step': 25548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:37.400480', 'step': 25548, 'epoch': 3}
{'type': 'loss', 'content': 0.09202142804861069, 'timestamp': '2025-10-02 00:56:37.403437', 'step': 25549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:37.460718', 'step': 25549, 'epoch': 3}
{'type': 'loss', 'content': 0.09356855601072311, 'timestamp': '2025-10-02 00:56:37.462707', 'step': 25550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:37.519423', 'step': 25550, 'epoch': 3}
{'type': 'loss', 'content': 0.010809391736984253, 'timestamp': '2025-10-02 00:56:37.526236', 'step': 25551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:37.584183', 'step': 25551, 'epoch': 3}
{'type': 'loss', 'content': 0.045941971242427826, 'timestamp': '2025-10-02 00:56:37.590454', 'step': 25552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:56:37.651760', 'step': 25552, 'epoch': 3}
{'type': 'loss', 'content': 0.015469195321202278, 'timestamp': '2025-10-02 00:56:37.663399', 'step': 25553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:37.721761', 'step': 25553, 'epoch': 3}
{'type': 'loss', 'content': 0.023178964853286743, 'timestamp': '2025-10-02 00:56:37.728790', 'step': 25554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:37.788053', 'step': 25554, 'epoch': 3}
{'type': 'loss', 'content': 0.04739820957183838, 'timestamp': '2025-10-02 00:56:37.790589', 'step': 25555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:37.847655', 'step': 25555, 'epoch': 3}
{'type': 'loss', 'content': 0.0615120567381382, 'timestamp': '2025-10-02 00:56:37.858061', 'step': 25556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:37.915523', 'step': 25556, 'epoch': 3}
{'type': 'loss', 'content': 0.019229000434279442, 'timestamp': '2025-10-02 00:56:37.918409', 'step': 25557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:56:37.990729', 'step': 25557, 'epoch': 3}
{'type': 'loss', 'content': 0.021298669278621674, 'timestamp': '2025-10-02 00:56:38.003159', 'step': 25558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:38.061683', 'step': 25558, 'epoch': 3}
{'type': 'loss', 'content': 0.012282797135412693, 'timestamp': '2025-10-02 00:56:38.068565', 'step': 25559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:38.128370', 'step': 25559, 'epoch': 3}
{'type': 'loss', 'content': 0.10342571139335632, 'timestamp': '2025-10-02 00:56:38.134312', 'step': 25560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:38.191450', 'step': 25560, 'epoch': 3}
{'type': 'loss', 'content': 0.04635424166917801, 'timestamp': '2025-10-02 00:56:38.195162', 'step': 25561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:38.252078', 'step': 25561, 'epoch': 3}
{'type': 'loss', 'content': 0.03143720328807831, 'timestamp': '2025-10-02 00:56:38.255175', 'step': 25562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:38.312396', 'step': 25562, 'epoch': 3}
{'type': 'loss', 'content': 0.07229448854923248, 'timestamp': '2025-10-02 00:56:38.315209', 'step': 25563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:38.374535', 'step': 25563, 'epoch': 3}
{'type': 'loss', 'content': 0.03166234865784645, 'timestamp': '2025-10-02 00:56:38.385427', 'step': 25564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:38.443792', 'step': 25564, 'epoch': 3}
{'type': 'loss', 'content': 0.054409246891736984, 'timestamp': '2025-10-02 00:56:38.449131', 'step': 25565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:38.507777', 'step': 25565, 'epoch': 3}
{'type': 'loss', 'content': 0.012680944986641407, 'timestamp': '2025-10-02 00:56:38.510406', 'step': 25566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:38.573875', 'step': 25566, 'epoch': 3}
{'type': 'loss', 'content': 0.06468649953603745, 'timestamp': '2025-10-02 00:56:38.584304', 'step': 25567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:38.642604', 'step': 25567, 'epoch': 3}
{'type': 'loss', 'content': 0.03154822438955307, 'timestamp': '2025-10-02 00:56:38.648637', 'step': 25568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:38.705508', 'step': 25568, 'epoch': 3}
{'type': 'loss', 'content': 0.021330570802092552, 'timestamp': '2025-10-02 00:56:38.712543', 'step': 25569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:38.771833', 'step': 25569, 'epoch': 3}
{'type': 'loss', 'content': 0.015572493895888329, 'timestamp': '2025-10-02 00:56:38.776721', 'step': 25570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:38.834790', 'step': 25570, 'epoch': 3}
{'type': 'loss', 'content': 0.0940544605255127, 'timestamp': '2025-10-02 00:56:38.837490', 'step': 25571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:38.895767', 'step': 25571, 'epoch': 3}
{'type': 'loss', 'content': 0.07297922670841217, 'timestamp': '2025-10-02 00:56:38.901707', 'step': 25572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:38.957180', 'step': 25572, 'epoch': 3}
{'type': 'loss', 'content': 0.003878273768350482, 'timestamp': '2025-10-02 00:56:38.965994', 'step': 25573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:39.022665', 'step': 25573, 'epoch': 3}
{'type': 'loss', 'content': 0.047527093440294266, 'timestamp': '2025-10-02 00:56:39.028041', 'step': 25574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:39.086029', 'step': 25574, 'epoch': 3}
{'type': 'loss', 'content': 0.07149548083543777, 'timestamp': '2025-10-02 00:56:39.090294', 'step': 25575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:39.150331', 'step': 25575, 'epoch': 3}
{'type': 'loss', 'content': 0.08422520756721497, 'timestamp': '2025-10-02 00:56:39.161381', 'step': 25576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:39.217149', 'step': 25576, 'epoch': 3}
{'type': 'loss', 'content': 0.06226380541920662, 'timestamp': '2025-10-02 00:56:39.220588', 'step': 25577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:39.278854', 'step': 25577, 'epoch': 3}
{'type': 'loss', 'content': 0.10826697200536728, 'timestamp': '2025-10-02 00:56:39.281542', 'step': 25578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:39.336871', 'step': 25578, 'epoch': 3}
{'type': 'loss', 'content': 0.1219245120882988, 'timestamp': '2025-10-02 00:56:39.339118', 'step': 25579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:39.393924', 'step': 25579, 'epoch': 3}
{'type': 'loss', 'content': 0.09340666234493256, 'timestamp': '2025-10-02 00:56:39.399992', 'step': 25580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:39.455200', 'step': 25580, 'epoch': 3}
{'type': 'loss', 'content': 0.0773392841219902, 'timestamp': '2025-10-02 00:56:39.457455', 'step': 25581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:39.518964', 'step': 25581, 'epoch': 3}
{'type': 'loss', 'content': 0.02805200405418873, 'timestamp': '2025-10-02 00:56:39.529426', 'step': 25582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:39.586845', 'step': 25582, 'epoch': 3}
{'type': 'loss', 'content': 0.07180904597043991, 'timestamp': '2025-10-02 00:56:39.591127', 'step': 25583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:39.653069', 'step': 25583, 'epoch': 3}
{'type': 'loss', 'content': 0.02512550726532936, 'timestamp': '2025-10-02 00:56:39.664265', 'step': 25584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:39.718900', 'step': 25584, 'epoch': 3}
{'type': 'loss', 'content': 0.05145353451371193, 'timestamp': '2025-10-02 00:56:39.721399', 'step': 25585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:39.777751', 'step': 25585, 'epoch': 3}
{'type': 'loss', 'content': 0.11615650355815887, 'timestamp': '2025-10-02 00:56:39.780254', 'step': 25586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:39.835920', 'step': 25586, 'epoch': 3}
{'type': 'loss', 'content': 0.03862222284078598, 'timestamp': '2025-10-02 00:56:39.842850', 'step': 25587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:39.898174', 'step': 25587, 'epoch': 3}
{'type': 'loss', 'content': 0.018264897167682648, 'timestamp': '2025-10-02 00:56:39.904455', 'step': 25588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:39.959067', 'step': 25588, 'epoch': 3}
{'type': 'loss', 'content': 0.02728610672056675, 'timestamp': '2025-10-02 00:56:39.961370', 'step': 25589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:40.023329', 'step': 25589, 'epoch': 3}
{'type': 'loss', 'content': 0.032511670142412186, 'timestamp': '2025-10-02 00:56:40.034124', 'step': 25590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:40.090296', 'step': 25590, 'epoch': 3}
{'type': 'loss', 'content': 0.018465250730514526, 'timestamp': '2025-10-02 00:56:40.095392', 'step': 25591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:40.152628', 'step': 25591, 'epoch': 3}
{'type': 'loss', 'content': 0.009006834588944912, 'timestamp': '2025-10-02 00:56:40.160220', 'step': 25592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:40.214895', 'step': 25592, 'epoch': 3}
{'type': 'loss', 'content': 0.02573675476014614, 'timestamp': '2025-10-02 00:56:40.223991', 'step': 25593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:40.280327', 'step': 25593, 'epoch': 3}
{'type': 'loss', 'content': 0.0612274669110775, 'timestamp': '2025-10-02 00:56:40.282623', 'step': 25594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:40.338348', 'step': 25594, 'epoch': 3}
{'type': 'loss', 'content': 0.04472026228904724, 'timestamp': '2025-10-02 00:56:40.340586', 'step': 25595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:40.395713', 'step': 25595, 'epoch': 3}
{'type': 'loss', 'content': 0.13436438143253326, 'timestamp': '2025-10-02 00:56:40.401979', 'step': 25596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:40.457200', 'step': 25596, 'epoch': 3}
{'type': 'loss', 'content': 0.02622069977223873, 'timestamp': '2025-10-02 00:56:40.464199', 'step': 25597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:40.519430', 'step': 25597, 'epoch': 3}
{'type': 'loss', 'content': 0.05614711344242096, 'timestamp': '2025-10-02 00:56:40.521748', 'step': 25598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:40.576920', 'step': 25598, 'epoch': 3}
{'type': 'loss', 'content': 0.012260963208973408, 'timestamp': '2025-10-02 00:56:40.579241', 'step': 25599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:40.633869', 'step': 25599, 'epoch': 3}
{'type': 'loss', 'content': 0.027093414217233658, 'timestamp': '2025-10-02 00:56:40.640180', 'step': 25600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:40.695209', 'step': 25600, 'epoch': 3}
{'type': 'loss', 'content': 0.06808014959096909, 'timestamp': '2025-10-02 00:56:40.697522', 'step': 25601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:40.752080', 'step': 25601, 'epoch': 3}
{'type': 'loss', 'content': 0.06087928265333176, 'timestamp': '2025-10-02 00:56:40.754693', 'step': 25602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:40.809738', 'step': 25602, 'epoch': 3}
{'type': 'loss', 'content': 0.09314029663801193, 'timestamp': '2025-10-02 00:56:40.811978', 'step': 25603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:40.866519', 'step': 25603, 'epoch': 3}
{'type': 'loss', 'content': 0.02325226366519928, 'timestamp': '2025-10-02 00:56:40.874343', 'step': 25604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:40.936343', 'step': 25604, 'epoch': 3}
{'type': 'loss', 'content': 0.011677705682814121, 'timestamp': '2025-10-02 00:56:40.948070', 'step': 25605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:41.003677', 'step': 25605, 'epoch': 3}
{'type': 'loss', 'content': 0.04751050844788551, 'timestamp': '2025-10-02 00:56:41.013127', 'step': 25606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:41.069441', 'step': 25606, 'epoch': 3}
{'type': 'loss', 'content': 0.03675714507699013, 'timestamp': '2025-10-02 00:56:41.071947', 'step': 25607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:41.128802', 'step': 25607, 'epoch': 3}
{'type': 'loss', 'content': 0.02198575623333454, 'timestamp': '2025-10-02 00:56:41.138568', 'step': 25608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:41.193523', 'step': 25608, 'epoch': 3}
{'type': 'loss', 'content': 0.030059870332479477, 'timestamp': '2025-10-02 00:56:41.196329', 'step': 25609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:41.251858', 'step': 25609, 'epoch': 3}
{'type': 'loss', 'content': 0.0125722112134099, 'timestamp': '2025-10-02 00:56:41.260709', 'step': 25610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:41.317044', 'step': 25610, 'epoch': 3}
{'type': 'loss', 'content': 0.03944498300552368, 'timestamp': '2025-10-02 00:56:41.321043', 'step': 25611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:41.383076', 'step': 25611, 'epoch': 3}
{'type': 'loss', 'content': 0.025865934789180756, 'timestamp': '2025-10-02 00:56:41.389146', 'step': 25612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:41.444495', 'step': 25612, 'epoch': 3}
{'type': 'loss', 'content': 0.05382242798805237, 'timestamp': '2025-10-02 00:56:41.446901', 'step': 25613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:41.504466', 'step': 25613, 'epoch': 3}
{'type': 'loss', 'content': 0.01811673305928707, 'timestamp': '2025-10-02 00:56:41.513997', 'step': 25614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:41.571082', 'step': 25614, 'epoch': 3}
{'type': 'loss', 'content': 0.0507972277700901, 'timestamp': '2025-10-02 00:56:41.579763', 'step': 25615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:41.635654', 'step': 25615, 'epoch': 3}
{'type': 'loss', 'content': 0.002902010688558221, 'timestamp': '2025-10-02 00:56:41.645513', 'step': 25616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:41.700402', 'step': 25616, 'epoch': 3}
{'type': 'loss', 'content': 0.07725218683481216, 'timestamp': '2025-10-02 00:56:41.702853', 'step': 25617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:41.757322', 'step': 25617, 'epoch': 3}
{'type': 'loss', 'content': 0.061372801661491394, 'timestamp': '2025-10-02 00:56:41.759812', 'step': 25618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:41.814402', 'step': 25618, 'epoch': 3}
{'type': 'loss', 'content': 0.016084687784314156, 'timestamp': '2025-10-02 00:56:41.816682', 'step': 25619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:41.874877', 'step': 25619, 'epoch': 3}
{'type': 'loss', 'content': 0.08475344628095627, 'timestamp': '2025-10-02 00:56:41.881148', 'step': 25620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:41.940333', 'step': 25620, 'epoch': 3}
{'type': 'loss', 'content': 0.04748275876045227, 'timestamp': '2025-10-02 00:56:41.948154', 'step': 25621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:42.004107', 'step': 25621, 'epoch': 3}
{'type': 'loss', 'content': 0.030586058273911476, 'timestamp': '2025-10-02 00:56:42.006433', 'step': 25622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:42.064140', 'step': 25622, 'epoch': 3}
{'type': 'loss', 'content': 0.05327766761183739, 'timestamp': '2025-10-02 00:56:42.066230', 'step': 25623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:42.124306', 'step': 25623, 'epoch': 3}
{'type': 'loss', 'content': 0.040500469505786896, 'timestamp': '2025-10-02 00:56:42.135839', 'step': 25624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:42.196027', 'step': 25624, 'epoch': 3}
{'type': 'loss', 'content': 0.025448793545365334, 'timestamp': '2025-10-02 00:56:42.198418', 'step': 25625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:56:42.266373', 'step': 25625, 'epoch': 3}
{'type': 'loss', 'content': 0.0009366041049361229, 'timestamp': '2025-10-02 00:56:42.278423', 'step': 25626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:42.340085', 'step': 25626, 'epoch': 3}
{'type': 'loss', 'content': 0.026191651821136475, 'timestamp': '2025-10-02 00:56:42.342411', 'step': 25627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:42.397483', 'step': 25627, 'epoch': 3}
{'type': 'loss', 'content': 0.04253927990794182, 'timestamp': '2025-10-02 00:56:42.403657', 'step': 25628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:42.465958', 'step': 25628, 'epoch': 3}
{'type': 'loss', 'content': 0.01990882121026516, 'timestamp': '2025-10-02 00:56:42.477310', 'step': 25629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:42.532771', 'step': 25629, 'epoch': 3}
{'type': 'loss', 'content': 0.06830619275569916, 'timestamp': '2025-10-02 00:56:42.535027', 'step': 25630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:42.590807', 'step': 25630, 'epoch': 3}
{'type': 'loss', 'content': 0.05097711831331253, 'timestamp': '2025-10-02 00:56:42.593542', 'step': 25631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:42.649038', 'step': 25631, 'epoch': 3}
{'type': 'loss', 'content': 0.015281863510608673, 'timestamp': '2025-10-02 00:56:42.655055', 'step': 25632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:42.708663', 'step': 25632, 'epoch': 3}
{'type': 'loss', 'content': 0.0726732537150383, 'timestamp': '2025-10-02 00:56:42.710753', 'step': 25633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:42.764748', 'step': 25633, 'epoch': 3}
{'type': 'loss', 'content': 0.05693751201033592, 'timestamp': '2025-10-02 00:56:42.766974', 'step': 25634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:42.821904', 'step': 25634, 'epoch': 3}
{'type': 'loss', 'content': 0.04312872514128685, 'timestamp': '2025-10-02 00:56:42.827251', 'step': 25635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:42.883378', 'step': 25635, 'epoch': 3}
{'type': 'loss', 'content': 0.0010735159739851952, 'timestamp': '2025-10-02 00:56:42.890667', 'step': 25636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:42.945717', 'step': 25636, 'epoch': 3}
{'type': 'loss', 'content': 0.011485377326607704, 'timestamp': '2025-10-02 00:56:42.948365', 'step': 25637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:56:43.015795', 'step': 25637, 'epoch': 3}
{'type': 'loss', 'content': 0.027313854545354843, 'timestamp': '2025-10-02 00:56:43.027800', 'step': 25638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:43.083512', 'step': 25638, 'epoch': 3}
{'type': 'loss', 'content': 0.057323772460222244, 'timestamp': '2025-10-02 00:56:43.089964', 'step': 25639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:43.146120', 'step': 25639, 'epoch': 3}
{'type': 'loss', 'content': 0.021712470799684525, 'timestamp': '2025-10-02 00:56:43.152083', 'step': 25640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:43.205939', 'step': 25640, 'epoch': 3}
{'type': 'loss', 'content': 0.09955618530511856, 'timestamp': '2025-10-02 00:56:43.208099', 'step': 25641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:43.263319', 'step': 25641, 'epoch': 3}
{'type': 'loss', 'content': 0.010435507632791996, 'timestamp': '2025-10-02 00:56:43.265647', 'step': 25642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:43.321029', 'step': 25642, 'epoch': 3}
{'type': 'loss', 'content': 0.007091077510267496, 'timestamp': '2025-10-02 00:56:43.323264', 'step': 25643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:43.377150', 'step': 25643, 'epoch': 3}
{'type': 'loss', 'content': 0.11099197715520859, 'timestamp': '2025-10-02 00:56:43.383120', 'step': 25644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:43.437739', 'step': 25644, 'epoch': 3}
{'type': 'loss', 'content': 0.057707883417606354, 'timestamp': '2025-10-02 00:56:43.440593', 'step': 25645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:43.497488', 'step': 25645, 'epoch': 3}
{'type': 'loss', 'content': 0.044019244611263275, 'timestamp': '2025-10-02 00:56:43.499855', 'step': 25646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:43.557048', 'step': 25646, 'epoch': 3}
{'type': 'loss', 'content': 0.09864103049039841, 'timestamp': '2025-10-02 00:56:43.559208', 'step': 25647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:43.619286', 'step': 25647, 'epoch': 3}
{'type': 'loss', 'content': 0.020636683329939842, 'timestamp': '2025-10-02 00:56:43.625241', 'step': 25648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:43.679838', 'step': 25648, 'epoch': 3}
{'type': 'loss', 'content': 0.011554227210581303, 'timestamp': '2025-10-02 00:56:43.682357', 'step': 25649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:43.748071', 'step': 25649, 'epoch': 3}
{'type': 'loss', 'content': 0.06357668340206146, 'timestamp': '2025-10-02 00:56:43.750600', 'step': 25650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:43.805987', 'step': 25650, 'epoch': 3}
{'type': 'loss', 'content': 0.1191527247428894, 'timestamp': '2025-10-02 00:56:43.808913', 'step': 25651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:43.864227', 'step': 25651, 'epoch': 3}
{'type': 'loss', 'content': 0.09318014979362488, 'timestamp': '2025-10-02 00:56:43.871870', 'step': 25652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:43.926759', 'step': 25652, 'epoch': 3}
{'type': 'loss', 'content': 0.02251039445400238, 'timestamp': '2025-10-02 00:56:43.935714', 'step': 25653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:44.001724', 'step': 25653, 'epoch': 3}
{'type': 'loss', 'content': 0.08526065200567245, 'timestamp': '2025-10-02 00:56:44.010587', 'step': 25654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:44.064789', 'step': 25654, 'epoch': 3}
{'type': 'loss', 'content': 0.10964857786893845, 'timestamp': '2025-10-02 00:56:44.067044', 'step': 25655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:44.121632', 'step': 25655, 'epoch': 3}
{'type': 'loss', 'content': 0.037631407380104065, 'timestamp': '2025-10-02 00:56:44.128034', 'step': 25656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:44.183003', 'step': 25656, 'epoch': 3}
{'type': 'loss', 'content': 0.0414314903318882, 'timestamp': '2025-10-02 00:56:44.188034', 'step': 25657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:44.242678', 'step': 25657, 'epoch': 3}
{'type': 'loss', 'content': 0.020284105092287064, 'timestamp': '2025-10-02 00:56:44.247886', 'step': 25658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:44.305254', 'step': 25658, 'epoch': 3}
{'type': 'loss', 'content': 0.010683869943022728, 'timestamp': '2025-10-02 00:56:44.307457', 'step': 25659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:44.361898', 'step': 25659, 'epoch': 3}
{'type': 'loss', 'content': 0.057724639773368835, 'timestamp': '2025-10-02 00:56:44.371882', 'step': 25660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:44.430019', 'step': 25660, 'epoch': 3}
{'type': 'loss', 'content': 0.11315789818763733, 'timestamp': '2025-10-02 00:56:44.437649', 'step': 25661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:44.493581', 'step': 25661, 'epoch': 3}
{'type': 'loss', 'content': 0.06839076429605484, 'timestamp': '2025-10-02 00:56:44.495575', 'step': 25662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:44.550197', 'step': 25662, 'epoch': 3}
{'type': 'loss', 'content': 0.019353676587343216, 'timestamp': '2025-10-02 00:56:44.552797', 'step': 25663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:44.609251', 'step': 25663, 'epoch': 3}
{'type': 'loss', 'content': 0.04694712907075882, 'timestamp': '2025-10-02 00:56:44.615514', 'step': 25664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:44.669205', 'step': 25664, 'epoch': 3}
{'type': 'loss', 'content': 0.13533398509025574, 'timestamp': '2025-10-02 00:56:44.671039', 'step': 25665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:44.726291', 'step': 25665, 'epoch': 3}
{'type': 'loss', 'content': 0.015629593282938004, 'timestamp': '2025-10-02 00:56:44.735825', 'step': 25666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:44.791429', 'step': 25666, 'epoch': 3}
{'type': 'loss', 'content': 0.0917242020368576, 'timestamp': '2025-10-02 00:56:44.793791', 'step': 25667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:44.849047', 'step': 25667, 'epoch': 3}
{'type': 'loss', 'content': 0.028596391901373863, 'timestamp': '2025-10-02 00:56:44.855082', 'step': 25668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:44.911698', 'step': 25668, 'epoch': 3}
{'type': 'loss', 'content': 0.07007874548435211, 'timestamp': '2025-10-02 00:56:44.913949', 'step': 25669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:44.968176', 'step': 25669, 'epoch': 3}
{'type': 'loss', 'content': 0.016623472794890404, 'timestamp': '2025-10-02 00:56:44.973565', 'step': 25670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:45.030160', 'step': 25670, 'epoch': 3}
{'type': 'loss', 'content': 0.08189442753791809, 'timestamp': '2025-10-02 00:56:45.038891', 'step': 25671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:45.094408', 'step': 25671, 'epoch': 3}
{'type': 'loss', 'content': 0.09334272891283035, 'timestamp': '2025-10-02 00:56:45.100361', 'step': 25672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:45.154132', 'step': 25672, 'epoch': 3}
{'type': 'loss', 'content': 0.10701951384544373, 'timestamp': '2025-10-02 00:56:45.156414', 'step': 25673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:45.210385', 'step': 25673, 'epoch': 3}
{'type': 'loss', 'content': 0.11976637691259384, 'timestamp': '2025-10-02 00:56:45.212950', 'step': 25674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:45.267617', 'step': 25674, 'epoch': 3}
{'type': 'loss', 'content': 0.09403325617313385, 'timestamp': '2025-10-02 00:56:45.270238', 'step': 25675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:45.324829', 'step': 25675, 'epoch': 3}
{'type': 'loss', 'content': 0.12523311376571655, 'timestamp': '2025-10-02 00:56:45.330827', 'step': 25676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:45.385341', 'step': 25676, 'epoch': 3}
{'type': 'loss', 'content': 0.00039550737710669637, 'timestamp': '2025-10-02 00:56:45.387937', 'step': 25677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:45.443794', 'step': 25677, 'epoch': 3}
{'type': 'loss', 'content': 0.06251787394285202, 'timestamp': '2025-10-02 00:56:45.446024', 'step': 25678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:45.503004', 'step': 25678, 'epoch': 3}
{'type': 'loss', 'content': 0.029661815613508224, 'timestamp': '2025-10-02 00:56:45.508214', 'step': 25679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:45.562644', 'step': 25679, 'epoch': 3}
{'type': 'loss', 'content': 0.03170020505785942, 'timestamp': '2025-10-02 00:56:45.568412', 'step': 25680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:45.626220', 'step': 25680, 'epoch': 3}
{'type': 'loss', 'content': 0.0016556056216359138, 'timestamp': '2025-10-02 00:56:45.636435', 'step': 25681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:45.691263', 'step': 25681, 'epoch': 3}
{'type': 'loss', 'content': 0.12546052038669586, 'timestamp': '2025-10-02 00:56:45.693745', 'step': 25682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:45.749060', 'step': 25682, 'epoch': 3}
{'type': 'loss', 'content': 0.07976784557104111, 'timestamp': '2025-10-02 00:56:45.758571', 'step': 25683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:45.812890', 'step': 25683, 'epoch': 3}
{'type': 'loss', 'content': 0.12080171704292297, 'timestamp': '2025-10-02 00:56:45.818979', 'step': 25684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:45.872439', 'step': 25684, 'epoch': 3}
{'type': 'loss', 'content': 0.08303806185722351, 'timestamp': '2025-10-02 00:56:45.875347', 'step': 25685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:45.930856', 'step': 25685, 'epoch': 3}
{'type': 'loss', 'content': 0.06813795119524002, 'timestamp': '2025-10-02 00:56:45.933233', 'step': 25686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:45.987902', 'step': 25686, 'epoch': 3}
{'type': 'loss', 'content': 0.1518786996603012, 'timestamp': '2025-10-02 00:56:45.990272', 'step': 25687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:46.045919', 'step': 25687, 'epoch': 3}
{'type': 'loss', 'content': 0.02256048657000065, 'timestamp': '2025-10-02 00:56:46.055539', 'step': 25688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:46.110358', 'step': 25688, 'epoch': 3}
{'type': 'loss', 'content': 0.018063407391309738, 'timestamp': '2025-10-02 00:56:46.112566', 'step': 25689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:46.166901', 'step': 25689, 'epoch': 3}
{'type': 'loss', 'content': 0.03327419236302376, 'timestamp': '2025-10-02 00:56:46.169379', 'step': 25690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:46.225698', 'step': 25690, 'epoch': 3}
{'type': 'loss', 'content': 0.04372076690196991, 'timestamp': '2025-10-02 00:56:46.228064', 'step': 25691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:46.283286', 'step': 25691, 'epoch': 3}
{'type': 'loss', 'content': 0.03230656310915947, 'timestamp': '2025-10-02 00:56:46.289491', 'step': 25692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:46.347310', 'step': 25692, 'epoch': 3}
{'type': 'loss', 'content': 0.016460372135043144, 'timestamp': '2025-10-02 00:56:46.358207', 'step': 25693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:46.413254', 'step': 25693, 'epoch': 3}
{'type': 'loss', 'content': 0.07530422508716583, 'timestamp': '2025-10-02 00:56:46.415523', 'step': 25694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:46.470467', 'step': 25694, 'epoch': 3}
{'type': 'loss', 'content': 0.056425560265779495, 'timestamp': '2025-10-02 00:56:46.472795', 'step': 25695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:46.527379', 'step': 25695, 'epoch': 3}
{'type': 'loss', 'content': 0.012655269354581833, 'timestamp': '2025-10-02 00:56:46.533212', 'step': 25696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:46.587956', 'step': 25696, 'epoch': 3}
{'type': 'loss', 'content': 0.06217329949140549, 'timestamp': '2025-10-02 00:56:46.590365', 'step': 25697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:46.645135', 'step': 25697, 'epoch': 3}
{'type': 'loss', 'content': 0.056389156728982925, 'timestamp': '2025-10-02 00:56:46.651928', 'step': 25698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:56:46.707283', 'step': 25698, 'epoch': 3}
{'type': 'loss', 'content': 0.10769965499639511, 'timestamp': '2025-10-02 00:56:46.709478', 'step': 25699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:46.764156', 'step': 25699, 'epoch': 3}
{'type': 'loss', 'content': 0.006900216452777386, 'timestamp': '2025-10-02 00:56:46.770498', 'step': 25700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:46.825036', 'step': 25700, 'epoch': 3}
{'type': 'loss', 'content': 0.05186459794640541, 'timestamp': '2025-10-02 00:56:46.828047', 'step': 25701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:56:46.892800', 'step': 25701, 'epoch': 3}
{'type': 'loss', 'content': 0.05392051488161087, 'timestamp': '2025-10-02 00:56:46.903662', 'step': 25702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:46.961892', 'step': 25702, 'epoch': 3}
{'type': 'loss', 'content': 0.0057749515399336815, 'timestamp': '2025-10-02 00:56:46.964446', 'step': 25703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:47.020019', 'step': 25703, 'epoch': 3}
{'type': 'loss', 'content': 0.01448079477995634, 'timestamp': '2025-10-02 00:56:47.026522', 'step': 25704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:47.087651', 'step': 25704, 'epoch': 3}
{'type': 'loss', 'content': 0.044461388140916824, 'timestamp': '2025-10-02 00:56:47.098962', 'step': 25705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:47.158151', 'step': 25705, 'epoch': 3}
{'type': 'loss', 'content': 0.09752510488033295, 'timestamp': '2025-10-02 00:56:47.167677', 'step': 25706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:47.225392', 'step': 25706, 'epoch': 3}
{'type': 'loss', 'content': 0.01311829686164856, 'timestamp': '2025-10-02 00:56:47.228075', 'step': 25707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:47.284817', 'step': 25707, 'epoch': 3}
{'type': 'loss', 'content': 0.03206464648246765, 'timestamp': '2025-10-02 00:56:47.291412', 'step': 25708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:47.348005', 'step': 25708, 'epoch': 3}
{'type': 'loss', 'content': 0.05531765893101692, 'timestamp': '2025-10-02 00:56:47.350576', 'step': 25709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:47.408692', 'step': 25709, 'epoch': 3}
{'type': 'loss', 'content': 0.005107310134917498, 'timestamp': '2025-10-02 00:56:47.415621', 'step': 25710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:47.471373', 'step': 25710, 'epoch': 3}
{'type': 'loss', 'content': 0.006961216684430838, 'timestamp': '2025-10-02 00:56:47.480176', 'step': 25711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:47.538710', 'step': 25711, 'epoch': 3}
{'type': 'loss', 'content': 0.026432301849126816, 'timestamp': '2025-10-02 00:56:47.545042', 'step': 25712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:47.602237', 'step': 25712, 'epoch': 3}
{'type': 'loss', 'content': 0.006647590547800064, 'timestamp': '2025-10-02 00:56:47.611817', 'step': 25713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:47.668316', 'step': 25713, 'epoch': 3}
{'type': 'loss', 'content': 0.09940563142299652, 'timestamp': '2025-10-02 00:56:47.670643', 'step': 25714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:56:47.733552', 'step': 25714, 'epoch': 3}
{'type': 'loss', 'content': 0.009146574884653091, 'timestamp': '2025-10-02 00:56:47.743692', 'step': 25715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:47.800637', 'step': 25715, 'epoch': 3}
{'type': 'loss', 'content': 0.011571886949241161, 'timestamp': '2025-10-02 00:56:47.809949', 'step': 25716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:47.866938', 'step': 25716, 'epoch': 3}
{'type': 'loss', 'content': 0.020885122939944267, 'timestamp': '2025-10-02 00:56:47.872174', 'step': 25717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:56:47.951629', 'step': 25717, 'epoch': 3}
{'type': 'loss', 'content': 0.018952637910842896, 'timestamp': '2025-10-02 00:56:47.965451', 'step': 25718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:48.021179', 'step': 25718, 'epoch': 3}
{'type': 'loss', 'content': 0.08769901841878891, 'timestamp': '2025-10-02 00:56:48.023764', 'step': 25719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:48.080597', 'step': 25719, 'epoch': 3}
{'type': 'loss', 'content': 0.10205498337745667, 'timestamp': '2025-10-02 00:56:48.087744', 'step': 25720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:48.143810', 'step': 25720, 'epoch': 3}
{'type': 'loss', 'content': 0.029089389368891716, 'timestamp': '2025-10-02 00:56:48.149141', 'step': 25721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:48.205004', 'step': 25721, 'epoch': 3}
{'type': 'loss', 'content': 0.06280562281608582, 'timestamp': '2025-10-02 00:56:48.214488', 'step': 25722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:56:48.270110', 'step': 25722, 'epoch': 3}
{'type': 'loss', 'content': 0.08360517770051956, 'timestamp': '2025-10-02 00:56:48.272473', 'step': 25723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:48.326823', 'step': 25723, 'epoch': 3}
{'type': 'loss', 'content': 0.06460432708263397, 'timestamp': '2025-10-02 00:56:48.332645', 'step': 25724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:48.386757', 'step': 25724, 'epoch': 3}
{'type': 'loss', 'content': 0.07222224771976471, 'timestamp': '2025-10-02 00:56:48.393624', 'step': 25725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:48.448674', 'step': 25725, 'epoch': 3}
{'type': 'loss', 'content': 0.034323640167713165, 'timestamp': '2025-10-02 00:56:48.451110', 'step': 25726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:48.507631', 'step': 25726, 'epoch': 3}
{'type': 'loss', 'content': 0.009178699925541878, 'timestamp': '2025-10-02 00:56:48.511352', 'step': 25727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:48.567459', 'step': 25727, 'epoch': 3}
{'type': 'loss', 'content': 0.0630471482872963, 'timestamp': '2025-10-02 00:56:48.573138', 'step': 25728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:48.628145', 'step': 25728, 'epoch': 3}
{'type': 'loss', 'content': 0.0654831975698471, 'timestamp': '2025-10-02 00:56:48.630614', 'step': 25729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:48.686264', 'step': 25729, 'epoch': 3}
{'type': 'loss', 'content': 0.03502679616212845, 'timestamp': '2025-10-02 00:56:48.695233', 'step': 25730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:56:48.750887', 'step': 25730, 'epoch': 3}
{'type': 'loss', 'content': 0.02411874756217003, 'timestamp': '2025-10-02 00:56:48.753385', 'step': 25731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:48.807982', 'step': 25731, 'epoch': 3}
{'type': 'loss', 'content': 0.1317834109067917, 'timestamp': '2025-10-02 00:56:48.814101', 'step': 25732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:48.869756', 'step': 25732, 'epoch': 3}
{'type': 'loss', 'content': 0.06238919496536255, 'timestamp': '2025-10-02 00:56:48.871613', 'step': 25733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:48.933931', 'step': 25733, 'epoch': 3}
{'type': 'loss', 'content': 0.041906725615262985, 'timestamp': '2025-10-02 00:56:48.939358', 'step': 25734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:56:48.995644', 'step': 25734, 'epoch': 3}
{'type': 'loss', 'content': 0.053346410393714905, 'timestamp': '2025-10-02 00:56:48.998492', 'step': 25735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:49.054978', 'step': 25735, 'epoch': 3}
{'type': 'loss', 'content': 0.08996235579252243, 'timestamp': '2025-10-02 00:56:49.065239', 'step': 25736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:56:49.120082', 'step': 25736, 'epoch': 3}
{'type': 'loss', 'content': 0.10958018153905869, 'timestamp': '2025-10-02 00:56:49.122650', 'step': 25737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:49.178328', 'step': 25737, 'epoch': 3}
{'type': 'loss', 'content': 0.019316963851451874, 'timestamp': '2025-10-02 00:56:49.183675', 'step': 25738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:56:49.239264', 'step': 25738, 'epoch': 3}
{'type': 'loss', 'content': 0.026197729632258415, 'timestamp': '2025-10-02 00:56:49.246057', 'step': 25739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:56:49.301362', 'step': 25739, 'epoch': 3}
{'type': 'loss', 'content': 0.08528704196214676, 'timestamp': '2025-10-02 00:56:49.311468', 'step': 25740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:49.366893', 'step': 25740, 'epoch': 3}
{'type': 'loss', 'content': 0.07240289449691772, 'timestamp': '2025-10-02 00:56:49.369211', 'step': 25741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:56:49.424384', 'step': 25741, 'epoch': 3}
{'type': 'loss', 'content': 0.03920833021402359, 'timestamp': '2025-10-02 00:56:49.427690', 'step': 25742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:49.484535', 'step': 25742, 'epoch': 3}
{'type': 'loss', 'content': 0.02042560465633869, 'timestamp': '2025-10-02 00:56:49.489697', 'step': 25743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:56:49.544875', 'step': 25743, 'epoch': 3}
{'type': 'loss', 'content': 0.0366443395614624, 'timestamp': '2025-10-02 00:56:49.551660', 'step': 25744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:56:49.611398', 'step': 25744, 'epoch': 3}
{'type': 'loss', 'content': 0.020839137956500053, 'timestamp': '2025-10-02 00:56:49.622686', 'step': 25745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:56:49.678008', 'step': 25745, 'epoch': 3}
{'type': 'loss', 'content': 0.013181806541979313, 'timestamp': '2025-10-02 00:56:49.687139', 'step': 25746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:49.742753', 'step': 25746, 'epoch': 3}
{'type': 'loss', 'content': 0.050669148564338684, 'timestamp': '2025-10-02 00:56:49.745288', 'step': 25747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:56:49.800210', 'step': 25747, 'epoch': 3}
{'type': 'loss', 'content': 0.08345892280340195, 'timestamp': '2025-10-02 00:56:49.805999', 'step': 25748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:56:49.860804', 'step': 25748, 'epoch': 3}
{'type': 'loss', 'content': 0.06608033180236816, 'timestamp': '2025-10-02 00:56:49.863230', 'step': 25749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:56:49.917682', 'step': 25749, 'epoch': 3}
{'type': 'loss', 'content': 0.10274800658226013, 'timestamp': '2025-10-02 00:56:49.920467', 'step': 25750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:56:49.975702', 'step': 25750, 'epoch': 3}
{'type': 'loss', 'content': 0.02949175238609314, 'timestamp': '2025-10-02 00:56:49.978497', 'step': 25751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:56:50.033759', 'step': 25751, 'epoch': 3}
{'type': 'loss', 'content': 0.007980452850461006, 'timestamp': '2025-10-02 00:56:50.039819', 'step': 25752, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:57:16.804620', 'step': 25752, 'epoch': 3}
{'type': 'pplx', 'content': 96.85950468681466, 'timestamp': '2025-10-02 00:57:16.808059', 'step': 25752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:16.863232', 'step': 25752, 'epoch': 3}
{'type': 'loss', 'content': 0.055987853556871414, 'timestamp': '2025-10-02 00:57:16.868897', 'step': 25753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:16.924642', 'step': 25753, 'epoch': 3}
{'type': 'loss', 'content': 0.11611161381006241, 'timestamp': '2025-10-02 00:57:16.926638', 'step': 25754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:16.981872', 'step': 25754, 'epoch': 3}
{'type': 'loss', 'content': 0.031110214069485664, 'timestamp': '2025-10-02 00:57:16.984301', 'step': 25755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:17.037998', 'step': 25755, 'epoch': 3}
{'type': 'loss', 'content': 0.19308340549468994, 'timestamp': '2025-10-02 00:57:17.044477', 'step': 25756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:17.098339', 'step': 25756, 'epoch': 3}
{'type': 'loss', 'content': 0.08448679745197296, 'timestamp': '2025-10-02 00:57:17.100681', 'step': 25757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:17.156459', 'step': 25757, 'epoch': 3}
{'type': 'loss', 'content': 0.023839566856622696, 'timestamp': '2025-10-02 00:57:17.158979', 'step': 25758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:17.214950', 'step': 25758, 'epoch': 3}
{'type': 'loss', 'content': 0.040090639144182205, 'timestamp': '2025-10-02 00:57:17.216955', 'step': 25759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:17.271064', 'step': 25759, 'epoch': 3}
{'type': 'loss', 'content': 0.06634257733821869, 'timestamp': '2025-10-02 00:57:17.276859', 'step': 25760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:17.333209', 'step': 25760, 'epoch': 3}
{'type': 'loss', 'content': 0.019727623090147972, 'timestamp': '2025-10-02 00:57:17.342267', 'step': 25761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:17.397055', 'step': 25761, 'epoch': 3}
{'type': 'loss', 'content': 0.03108236752450466, 'timestamp': '2025-10-02 00:57:17.403887', 'step': 25762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:17.460309', 'step': 25762, 'epoch': 3}
{'type': 'loss', 'content': 0.08439335972070694, 'timestamp': '2025-10-02 00:57:17.462740', 'step': 25763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:17.518655', 'step': 25763, 'epoch': 3}
{'type': 'loss', 'content': 0.015122883021831512, 'timestamp': '2025-10-02 00:57:17.528918', 'step': 25764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:17.582815', 'step': 25764, 'epoch': 3}
{'type': 'loss', 'content': 0.023994846269488335, 'timestamp': '2025-10-02 00:57:17.589882', 'step': 25765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:17.644755', 'step': 25765, 'epoch': 3}
{'type': 'loss', 'content': 0.04719102010130882, 'timestamp': '2025-10-02 00:57:17.653928', 'step': 25766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:17.709036', 'step': 25766, 'epoch': 3}
{'type': 'loss', 'content': 0.07129703462123871, 'timestamp': '2025-10-02 00:57:17.711626', 'step': 25767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:17.766048', 'step': 25767, 'epoch': 3}
{'type': 'loss', 'content': 0.03493804484605789, 'timestamp': '2025-10-02 00:57:17.771775', 'step': 25768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:17.825430', 'step': 25768, 'epoch': 3}
{'type': 'loss', 'content': 0.07366689294576645, 'timestamp': '2025-10-02 00:57:17.827288', 'step': 25769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:17.882127', 'step': 25769, 'epoch': 3}
{'type': 'loss', 'content': 0.10726200044155121, 'timestamp': '2025-10-02 00:57:17.884283', 'step': 25770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:17.940538', 'step': 25770, 'epoch': 3}
{'type': 'loss', 'content': 0.02277897484600544, 'timestamp': '2025-10-02 00:57:17.945982', 'step': 25771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:18.007353', 'step': 25771, 'epoch': 3}
{'type': 'loss', 'content': 0.042972076684236526, 'timestamp': '2025-10-02 00:57:18.018610', 'step': 25772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:18.072330', 'step': 25772, 'epoch': 3}
{'type': 'loss', 'content': 0.07990791648626328, 'timestamp': '2025-10-02 00:57:18.074611', 'step': 25773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:18.128377', 'step': 25773, 'epoch': 3}
{'type': 'loss', 'content': 0.1171160638332367, 'timestamp': '2025-10-02 00:57:18.130445', 'step': 25774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:18.184062', 'step': 25774, 'epoch': 3}
{'type': 'loss', 'content': 0.10258852690458298, 'timestamp': '2025-10-02 00:57:18.185828', 'step': 25775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:18.241099', 'step': 25775, 'epoch': 3}
{'type': 'loss', 'content': 0.04662685841321945, 'timestamp': '2025-10-02 00:57:18.251512', 'step': 25776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:18.305268', 'step': 25776, 'epoch': 3}
{'type': 'loss', 'content': 0.0789128839969635, 'timestamp': '2025-10-02 00:57:18.307127', 'step': 25777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:18.361878', 'step': 25777, 'epoch': 3}
{'type': 'loss', 'content': 0.07839666306972504, 'timestamp': '2025-10-02 00:57:18.368925', 'step': 25778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:18.424092', 'step': 25778, 'epoch': 3}
{'type': 'loss', 'content': 0.008104194886982441, 'timestamp': '2025-10-02 00:57:18.429408', 'step': 25779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:18.483583', 'step': 25779, 'epoch': 3}
{'type': 'loss', 'content': 0.05947047472000122, 'timestamp': '2025-10-02 00:57:18.493505', 'step': 25780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:18.553234', 'step': 25780, 'epoch': 3}
{'type': 'loss', 'content': 0.032476648688316345, 'timestamp': '2025-10-02 00:57:18.564481', 'step': 25781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:18.619437', 'step': 25781, 'epoch': 3}
{'type': 'loss', 'content': 0.016177712008357048, 'timestamp': '2025-10-02 00:57:18.624897', 'step': 25782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:18.683687', 'step': 25782, 'epoch': 3}
{'type': 'loss', 'content': 0.017066510394215584, 'timestamp': '2025-10-02 00:57:18.693961', 'step': 25783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:18.748427', 'step': 25783, 'epoch': 3}
{'type': 'loss', 'content': 0.011920438148081303, 'timestamp': '2025-10-02 00:57:18.758228', 'step': 25784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:57:18.811799', 'step': 25784, 'epoch': 3}
{'type': 'loss', 'content': 0.08129216730594635, 'timestamp': '2025-10-02 00:57:18.814266', 'step': 25785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:18.870054', 'step': 25785, 'epoch': 3}
{'type': 'loss', 'content': 0.009864956140518188, 'timestamp': '2025-10-02 00:57:18.872185', 'step': 25786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:18.926954', 'step': 25786, 'epoch': 3}
{'type': 'loss', 'content': 0.006754220463335514, 'timestamp': '2025-10-02 00:57:18.933818', 'step': 25787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:18.988847', 'step': 25787, 'epoch': 3}
{'type': 'loss', 'content': 0.06606551259756088, 'timestamp': '2025-10-02 00:57:18.994592', 'step': 25788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:19.049675', 'step': 25788, 'epoch': 3}
{'type': 'loss', 'content': 0.13836708664894104, 'timestamp': '2025-10-02 00:57:19.059889', 'step': 25789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:19.114366', 'step': 25789, 'epoch': 3}
{'type': 'loss', 'content': 0.028060538694262505, 'timestamp': '2025-10-02 00:57:19.121394', 'step': 25790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:19.176998', 'step': 25790, 'epoch': 3}
{'type': 'loss', 'content': 0.08176302164793015, 'timestamp': '2025-10-02 00:57:19.186503', 'step': 25791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:19.242143', 'step': 25791, 'epoch': 3}
{'type': 'loss', 'content': 0.02233586274087429, 'timestamp': '2025-10-02 00:57:19.249851', 'step': 25792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:19.303984', 'step': 25792, 'epoch': 3}
{'type': 'loss', 'content': 0.020645393058657646, 'timestamp': '2025-10-02 00:57:19.306456', 'step': 25793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:19.360766', 'step': 25793, 'epoch': 3}
{'type': 'loss', 'content': 0.06429626047611237, 'timestamp': '2025-10-02 00:57:19.363812', 'step': 25794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:19.419501', 'step': 25794, 'epoch': 3}
{'type': 'loss', 'content': 0.02805083803832531, 'timestamp': '2025-10-02 00:57:19.424844', 'step': 25795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:19.480635', 'step': 25795, 'epoch': 3}
{'type': 'loss', 'content': 0.039033401757478714, 'timestamp': '2025-10-02 00:57:19.490530', 'step': 25796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:19.544655', 'step': 25796, 'epoch': 3}
{'type': 'loss', 'content': 0.09475625306367874, 'timestamp': '2025-10-02 00:57:19.547003', 'step': 25797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:19.602357', 'step': 25797, 'epoch': 3}
{'type': 'loss', 'content': 0.028017420321702957, 'timestamp': '2025-10-02 00:57:19.604534', 'step': 25798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:19.658799', 'step': 25798, 'epoch': 3}
{'type': 'loss', 'content': 0.056903909891843796, 'timestamp': '2025-10-02 00:57:19.660983', 'step': 25799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:19.715368', 'step': 25799, 'epoch': 3}
{'type': 'loss', 'content': 0.01272972859442234, 'timestamp': '2025-10-02 00:57:19.720947', 'step': 25800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:19.774433', 'step': 25800, 'epoch': 3}
{'type': 'loss', 'content': 0.07976116240024567, 'timestamp': '2025-10-02 00:57:19.776314', 'step': 25801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:19.830679', 'step': 25801, 'epoch': 3}
{'type': 'loss', 'content': 0.09784834831953049, 'timestamp': '2025-10-02 00:57:19.832562', 'step': 25802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:57:19.886520', 'step': 25802, 'epoch': 3}
{'type': 'loss', 'content': 0.09892015904188156, 'timestamp': '2025-10-02 00:57:19.890745', 'step': 25803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:19.953620', 'step': 25803, 'epoch': 3}
{'type': 'loss', 'content': 0.009438689798116684, 'timestamp': '2025-10-02 00:57:19.965032', 'step': 25804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:20.019502', 'step': 25804, 'epoch': 3}
{'type': 'loss', 'content': 0.04790569469332695, 'timestamp': '2025-10-02 00:57:20.028683', 'step': 25805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:20.085626', 'step': 25805, 'epoch': 3}
{'type': 'loss', 'content': 0.02267235890030861, 'timestamp': '2025-10-02 00:57:20.094979', 'step': 25806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:20.149662', 'step': 25806, 'epoch': 3}
{'type': 'loss', 'content': 0.039826855063438416, 'timestamp': '2025-10-02 00:57:20.151563', 'step': 25807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:20.206575', 'step': 25807, 'epoch': 3}
{'type': 'loss', 'content': 0.01441067736595869, 'timestamp': '2025-10-02 00:57:20.212125', 'step': 25808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:20.268343', 'step': 25808, 'epoch': 3}
{'type': 'loss', 'content': 0.04317201301455498, 'timestamp': '2025-10-02 00:57:20.270707', 'step': 25809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:20.325182', 'step': 25809, 'epoch': 3}
{'type': 'loss', 'content': 0.01531762070953846, 'timestamp': '2025-10-02 00:57:20.327120', 'step': 25810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:20.381829', 'step': 25810, 'epoch': 3}
{'type': 'loss', 'content': 0.05971580743789673, 'timestamp': '2025-10-02 00:57:20.389111', 'step': 25811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:20.444418', 'step': 25811, 'epoch': 3}
{'type': 'loss', 'content': 0.01863950490951538, 'timestamp': '2025-10-02 00:57:20.452819', 'step': 25812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:20.517925', 'step': 25812, 'epoch': 3}
{'type': 'loss', 'content': 0.10067863762378693, 'timestamp': '2025-10-02 00:57:20.520654', 'step': 25813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:20.575320', 'step': 25813, 'epoch': 3}
{'type': 'loss', 'content': 0.028265872970223427, 'timestamp': '2025-10-02 00:57:20.577421', 'step': 25814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:20.633675', 'step': 25814, 'epoch': 3}
{'type': 'loss', 'content': 0.04031992331147194, 'timestamp': '2025-10-02 00:57:20.635454', 'step': 25815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:20.691342', 'step': 25815, 'epoch': 3}
{'type': 'loss', 'content': 0.0295212734490633, 'timestamp': '2025-10-02 00:57:20.697435', 'step': 25816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:57:20.751207', 'step': 25816, 'epoch': 3}
{'type': 'loss', 'content': 0.04482636973261833, 'timestamp': '2025-10-02 00:57:20.753570', 'step': 25817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:20.808049', 'step': 25817, 'epoch': 3}
{'type': 'loss', 'content': 0.03395436331629753, 'timestamp': '2025-10-02 00:57:20.813555', 'step': 25818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:20.869217', 'step': 25818, 'epoch': 3}
{'type': 'loss', 'content': 0.05240744724869728, 'timestamp': '2025-10-02 00:57:20.878690', 'step': 25819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:20.934394', 'step': 25819, 'epoch': 3}
{'type': 'loss', 'content': 0.027056263759732246, 'timestamp': '2025-10-02 00:57:20.944726', 'step': 25820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:21.002083', 'step': 25820, 'epoch': 3}
{'type': 'loss', 'content': 0.04123584181070328, 'timestamp': '2025-10-02 00:57:21.013044', 'step': 25821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:21.068364', 'step': 25821, 'epoch': 3}
{'type': 'loss', 'content': 0.05354957655072212, 'timestamp': '2025-10-02 00:57:21.070353', 'step': 25822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:21.125397', 'step': 25822, 'epoch': 3}
{'type': 'loss', 'content': 0.03592007979750633, 'timestamp': '2025-10-02 00:57:21.134396', 'step': 25823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:21.190740', 'step': 25823, 'epoch': 3}
{'type': 'loss', 'content': 0.008686017245054245, 'timestamp': '2025-10-02 00:57:21.196728', 'step': 25824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:21.250765', 'step': 25824, 'epoch': 3}
{'type': 'loss', 'content': 0.10062588006258011, 'timestamp': '2025-10-02 00:57:21.256133', 'step': 25825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:21.311458', 'step': 25825, 'epoch': 3}
{'type': 'loss', 'content': 0.09264519065618515, 'timestamp': '2025-10-02 00:57:21.313853', 'step': 25826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:21.369200', 'step': 25826, 'epoch': 3}
{'type': 'loss', 'content': 0.10563637316226959, 'timestamp': '2025-10-02 00:57:21.372563', 'step': 25827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:21.427883', 'step': 25827, 'epoch': 3}
{'type': 'loss', 'content': 0.012849689461290836, 'timestamp': '2025-10-02 00:57:21.434038', 'step': 25828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:21.488638', 'step': 25828, 'epoch': 3}
{'type': 'loss', 'content': 0.022694461047649384, 'timestamp': '2025-10-02 00:57:21.491399', 'step': 25829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:21.545912', 'step': 25829, 'epoch': 3}
{'type': 'loss', 'content': 0.050412703305482864, 'timestamp': '2025-10-02 00:57:21.551256', 'step': 25830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:21.607588', 'step': 25830, 'epoch': 3}
{'type': 'loss', 'content': 0.07354114949703217, 'timestamp': '2025-10-02 00:57:21.609901', 'step': 25831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:21.664636', 'step': 25831, 'epoch': 3}
{'type': 'loss', 'content': 0.047458138316869736, 'timestamp': '2025-10-02 00:57:21.674373', 'step': 25832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:21.730199', 'step': 25832, 'epoch': 3}
{'type': 'loss', 'content': 0.07082594931125641, 'timestamp': '2025-10-02 00:57:21.732577', 'step': 25833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:21.788420', 'step': 25833, 'epoch': 3}
{'type': 'loss', 'content': 0.023417387157678604, 'timestamp': '2025-10-02 00:57:21.795704', 'step': 25834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:21.851168', 'step': 25834, 'epoch': 3}
{'type': 'loss', 'content': 0.07353640347719193, 'timestamp': '2025-10-02 00:57:21.853291', 'step': 25835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:21.907942', 'step': 25835, 'epoch': 3}
{'type': 'loss', 'content': 0.118826724588871, 'timestamp': '2025-10-02 00:57:21.914236', 'step': 25836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:21.968841', 'step': 25836, 'epoch': 3}
{'type': 'loss', 'content': 0.04586612805724144, 'timestamp': '2025-10-02 00:57:21.971220', 'step': 25837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:22.026004', 'step': 25837, 'epoch': 3}
{'type': 'loss', 'content': 0.01546456664800644, 'timestamp': '2025-10-02 00:57:22.028539', 'step': 25838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:22.083146', 'step': 25838, 'epoch': 3}
{'type': 'loss', 'content': 0.11813245713710785, 'timestamp': '2025-10-02 00:57:22.085396', 'step': 25839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:22.145991', 'step': 25839, 'epoch': 3}
{'type': 'loss', 'content': 0.015090898610651493, 'timestamp': '2025-10-02 00:57:22.152276', 'step': 25840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:22.206667', 'step': 25840, 'epoch': 3}
{'type': 'loss', 'content': 0.019059093669056892, 'timestamp': '2025-10-02 00:57:22.216864', 'step': 25841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:22.272191', 'step': 25841, 'epoch': 3}
{'type': 'loss', 'content': 0.07106877118349075, 'timestamp': '2025-10-02 00:57:22.274801', 'step': 25842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:22.334547', 'step': 25842, 'epoch': 3}
{'type': 'loss', 'content': 0.02943723089993, 'timestamp': '2025-10-02 00:57:22.344742', 'step': 25843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:22.400158', 'step': 25843, 'epoch': 3}
{'type': 'loss', 'content': 0.01909383200109005, 'timestamp': '2025-10-02 00:57:22.410503', 'step': 25844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:22.464818', 'step': 25844, 'epoch': 3}
{'type': 'loss', 'content': 0.011386732570827007, 'timestamp': '2025-10-02 00:57:22.467460', 'step': 25845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:22.521816', 'step': 25845, 'epoch': 3}
{'type': 'loss', 'content': 0.07876546680927277, 'timestamp': '2025-10-02 00:57:22.527324', 'step': 25846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:22.582713', 'step': 25846, 'epoch': 3}
{'type': 'loss', 'content': 0.08059372007846832, 'timestamp': '2025-10-02 00:57:22.585054', 'step': 25847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:22.639835', 'step': 25847, 'epoch': 3}
{'type': 'loss', 'content': 0.05273335427045822, 'timestamp': '2025-10-02 00:57:22.646157', 'step': 25848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:22.702231', 'step': 25848, 'epoch': 3}
{'type': 'loss', 'content': 0.048412472009658813, 'timestamp': '2025-10-02 00:57:22.707805', 'step': 25849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:22.763072', 'step': 25849, 'epoch': 3}
{'type': 'loss', 'content': 0.06839560717344284, 'timestamp': '2025-10-02 00:57:22.765460', 'step': 25850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:57:22.835341', 'step': 25850, 'epoch': 3}
{'type': 'loss', 'content': 0.00010860887414310127, 'timestamp': '2025-10-02 00:57:22.847635', 'step': 25851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:22.903644', 'step': 25851, 'epoch': 3}
{'type': 'loss', 'content': 0.04333753511309624, 'timestamp': '2025-10-02 00:57:22.909468', 'step': 25852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:22.963708', 'step': 25852, 'epoch': 3}
{'type': 'loss', 'content': 0.04518032819032669, 'timestamp': '2025-10-02 00:57:22.966123', 'step': 25853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:23.027794', 'step': 25853, 'epoch': 3}
{'type': 'loss', 'content': 0.01445323321968317, 'timestamp': '2025-10-02 00:57:23.038304', 'step': 25854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:23.093593', 'step': 25854, 'epoch': 3}
{'type': 'loss', 'content': 0.014927821233868599, 'timestamp': '2025-10-02 00:57:23.095842', 'step': 25855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:23.151105', 'step': 25855, 'epoch': 3}
{'type': 'loss', 'content': 0.007289361208677292, 'timestamp': '2025-10-02 00:57:23.161432', 'step': 25856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:23.215066', 'step': 25856, 'epoch': 3}
{'type': 'loss', 'content': 0.06508226692676544, 'timestamp': '2025-10-02 00:57:23.217887', 'step': 25857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:23.274240', 'step': 25857, 'epoch': 3}
{'type': 'loss', 'content': 0.05947038531303406, 'timestamp': '2025-10-02 00:57:23.277110', 'step': 25858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:23.334674', 'step': 25858, 'epoch': 3}
{'type': 'loss', 'content': 0.12378902733325958, 'timestamp': '2025-10-02 00:57:23.337922', 'step': 25859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:23.395634', 'step': 25859, 'epoch': 3}
{'type': 'loss', 'content': 0.047405537217855453, 'timestamp': '2025-10-02 00:57:23.405889', 'step': 25860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:23.467641', 'step': 25860, 'epoch': 3}
{'type': 'loss', 'content': 0.03262627124786377, 'timestamp': '2025-10-02 00:57:23.474938', 'step': 25861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:57:23.540011', 'step': 25861, 'epoch': 3}
{'type': 'loss', 'content': 0.038265660405159, 'timestamp': '2025-10-02 00:57:23.550822', 'step': 25862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:23.610881', 'step': 25862, 'epoch': 3}
{'type': 'loss', 'content': 0.042697563767433167, 'timestamp': '2025-10-02 00:57:23.617849', 'step': 25863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:23.676914', 'step': 25863, 'epoch': 3}
{'type': 'loss', 'content': 0.019475722685456276, 'timestamp': '2025-10-02 00:57:23.687243', 'step': 25864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:23.743483', 'step': 25864, 'epoch': 3}
{'type': 'loss', 'content': 0.006861092988401651, 'timestamp': '2025-10-02 00:57:23.749067', 'step': 25865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:23.805667', 'step': 25865, 'epoch': 3}
{'type': 'loss', 'content': 0.04442570358514786, 'timestamp': '2025-10-02 00:57:23.808626', 'step': 25866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:23.864711', 'step': 25866, 'epoch': 3}
{'type': 'loss', 'content': 0.05899881571531296, 'timestamp': '2025-10-02 00:57:23.867213', 'step': 25867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:23.922715', 'step': 25867, 'epoch': 3}
{'type': 'loss', 'content': 0.0866575613617897, 'timestamp': '2025-10-02 00:57:23.929328', 'step': 25868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:23.985099', 'step': 25868, 'epoch': 3}
{'type': 'loss', 'content': 0.03125523775815964, 'timestamp': '2025-10-02 00:57:23.987513', 'step': 25869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:24.043606', 'step': 25869, 'epoch': 3}
{'type': 'loss', 'content': 0.07253797352313995, 'timestamp': '2025-10-02 00:57:24.046103', 'step': 25870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:24.102982', 'step': 25870, 'epoch': 3}
{'type': 'loss', 'content': 0.04489773139357567, 'timestamp': '2025-10-02 00:57:24.105581', 'step': 25871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:24.160846', 'step': 25871, 'epoch': 3}
{'type': 'loss', 'content': 0.05799001455307007, 'timestamp': '2025-10-02 00:57:24.167123', 'step': 25872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:24.224665', 'step': 25872, 'epoch': 3}
{'type': 'loss', 'content': 0.061277441680431366, 'timestamp': '2025-10-02 00:57:24.231966', 'step': 25873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:24.288538', 'step': 25873, 'epoch': 3}
{'type': 'loss', 'content': 0.055393315851688385, 'timestamp': '2025-10-02 00:57:24.291275', 'step': 25874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:24.348878', 'step': 25874, 'epoch': 3}
{'type': 'loss', 'content': 0.05272958427667618, 'timestamp': '2025-10-02 00:57:24.358389', 'step': 25875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:24.413843', 'step': 25875, 'epoch': 3}
{'type': 'loss', 'content': 0.030420884490013123, 'timestamp': '2025-10-02 00:57:24.420092', 'step': 25876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:24.475052', 'step': 25876, 'epoch': 3}
{'type': 'loss', 'content': 0.0786992609500885, 'timestamp': '2025-10-02 00:57:24.477758', 'step': 25877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:24.533744', 'step': 25877, 'epoch': 3}
{'type': 'loss', 'content': 0.08839473128318787, 'timestamp': '2025-10-02 00:57:24.536040', 'step': 25878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:24.590672', 'step': 25878, 'epoch': 3}
{'type': 'loss', 'content': 0.04175802320241928, 'timestamp': '2025-10-02 00:57:24.596169', 'step': 25879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:24.652122', 'step': 25879, 'epoch': 3}
{'type': 'loss', 'content': 0.020240988582372665, 'timestamp': '2025-10-02 00:57:24.658476', 'step': 25880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:24.713504', 'step': 25880, 'epoch': 3}
{'type': 'loss', 'content': 0.0853017270565033, 'timestamp': '2025-10-02 00:57:24.715869', 'step': 25881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:24.770193', 'step': 25881, 'epoch': 3}
{'type': 'loss', 'content': 0.05582781881093979, 'timestamp': '2025-10-02 00:57:24.772451', 'step': 25882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:24.828397', 'step': 25882, 'epoch': 3}
{'type': 'loss', 'content': 0.033477671444416046, 'timestamp': '2025-10-02 00:57:24.833816', 'step': 25883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:24.888261', 'step': 25883, 'epoch': 3}
{'type': 'loss', 'content': 0.0909525603055954, 'timestamp': '2025-10-02 00:57:24.894136', 'step': 25884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:24.949776', 'step': 25884, 'epoch': 3}
{'type': 'loss', 'content': 0.055927012115716934, 'timestamp': '2025-10-02 00:57:24.952107', 'step': 25885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:25.006513', 'step': 25885, 'epoch': 3}
{'type': 'loss', 'content': 0.010014434345066547, 'timestamp': '2025-10-02 00:57:25.008477', 'step': 25886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:25.063154', 'step': 25886, 'epoch': 3}
{'type': 'loss', 'content': 0.043975040316581726, 'timestamp': '2025-10-02 00:57:25.065531', 'step': 25887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:25.120502', 'step': 25887, 'epoch': 3}
{'type': 'loss', 'content': 0.03331676125526428, 'timestamp': '2025-10-02 00:57:25.128401', 'step': 25888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:25.183417', 'step': 25888, 'epoch': 3}
{'type': 'loss', 'content': 0.011850384995341301, 'timestamp': '2025-10-02 00:57:25.192715', 'step': 25889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:25.248570', 'step': 25889, 'epoch': 3}
{'type': 'loss', 'content': 0.0011912448098883033, 'timestamp': '2025-10-02 00:57:25.251648', 'step': 25890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:25.308199', 'step': 25890, 'epoch': 3}
{'type': 'loss', 'content': 0.02885451726615429, 'timestamp': '2025-10-02 00:57:25.317718', 'step': 25891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:25.372278', 'step': 25891, 'epoch': 3}
{'type': 'loss', 'content': 0.06573302298784256, 'timestamp': '2025-10-02 00:57:25.378560', 'step': 25892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:25.440037', 'step': 25892, 'epoch': 3}
{'type': 'loss', 'content': 0.016921352595090866, 'timestamp': '2025-10-02 00:57:25.451366', 'step': 25893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:25.507737', 'step': 25893, 'epoch': 3}
{'type': 'loss', 'content': 0.026944506913423538, 'timestamp': '2025-10-02 00:57:25.510307', 'step': 25894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:25.564910', 'step': 25894, 'epoch': 3}
{'type': 'loss', 'content': 0.027920417487621307, 'timestamp': '2025-10-02 00:57:25.570507', 'step': 25895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:25.626767', 'step': 25895, 'epoch': 3}
{'type': 'loss', 'content': 0.04555799812078476, 'timestamp': '2025-10-02 00:57:25.633087', 'step': 25896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:25.688038', 'step': 25896, 'epoch': 3}
{'type': 'loss', 'content': 0.04219076782464981, 'timestamp': '2025-10-02 00:57:25.693702', 'step': 25897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:25.748429', 'step': 25897, 'epoch': 3}
{'type': 'loss', 'content': 0.11485898494720459, 'timestamp': '2025-10-02 00:57:25.750786', 'step': 25898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:25.812486', 'step': 25898, 'epoch': 3}
{'type': 'loss', 'content': 0.02067251317203045, 'timestamp': '2025-10-02 00:57:25.822949', 'step': 25899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:25.877236', 'step': 25899, 'epoch': 3}
{'type': 'loss', 'content': 0.04740524664521217, 'timestamp': '2025-10-02 00:57:25.883567', 'step': 25900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:25.944721', 'step': 25900, 'epoch': 3}
{'type': 'loss', 'content': 0.005049331113696098, 'timestamp': '2025-10-02 00:57:25.956232', 'step': 25901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:26.012078', 'step': 25901, 'epoch': 3}
{'type': 'loss', 'content': 0.03287510946393013, 'timestamp': '2025-10-02 00:57:26.021595', 'step': 25902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:26.076707', 'step': 25902, 'epoch': 3}
{'type': 'loss', 'content': 0.05668732896447182, 'timestamp': '2025-10-02 00:57:26.079105', 'step': 25903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:26.133938', 'step': 25903, 'epoch': 3}
{'type': 'loss', 'content': 0.07772456854581833, 'timestamp': '2025-10-02 00:57:26.140058', 'step': 25904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:26.200487', 'step': 25904, 'epoch': 3}
{'type': 'loss', 'content': 0.04306722432374954, 'timestamp': '2025-10-02 00:57:26.211807', 'step': 25905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:26.268023', 'step': 25905, 'epoch': 3}
{'type': 'loss', 'content': 0.010401299223303795, 'timestamp': '2025-10-02 00:57:26.270454', 'step': 25906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:26.324826', 'step': 25906, 'epoch': 3}
{'type': 'loss', 'content': 0.048149678856134415, 'timestamp': '2025-10-02 00:57:26.331934', 'step': 25907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:26.386432', 'step': 25907, 'epoch': 3}
{'type': 'loss', 'content': 0.05492718145251274, 'timestamp': '2025-10-02 00:57:26.392331', 'step': 25908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:26.447815', 'step': 25908, 'epoch': 3}
{'type': 'loss', 'content': 0.037072017788887024, 'timestamp': '2025-10-02 00:57:26.450133', 'step': 25909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:26.504152', 'step': 25909, 'epoch': 3}
{'type': 'loss', 'content': 0.0663551613688469, 'timestamp': '2025-10-02 00:57:26.506732', 'step': 25910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:26.561667', 'step': 25910, 'epoch': 3}
{'type': 'loss', 'content': 0.017692938446998596, 'timestamp': '2025-10-02 00:57:26.571007', 'step': 25911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:26.625759', 'step': 25911, 'epoch': 3}
{'type': 'loss', 'content': 0.08407141268253326, 'timestamp': '2025-10-02 00:57:26.631777', 'step': 25912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:26.689937', 'step': 25912, 'epoch': 3}
{'type': 'loss', 'content': 0.05848969519138336, 'timestamp': '2025-10-02 00:57:26.700945', 'step': 25913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:26.776431', 'step': 25913, 'epoch': 3}
{'type': 'loss', 'content': 0.04622603952884674, 'timestamp': '2025-10-02 00:57:26.787080', 'step': 25914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:26.842109', 'step': 25914, 'epoch': 3}
{'type': 'loss', 'content': 0.09175318479537964, 'timestamp': '2025-10-02 00:57:26.844490', 'step': 25915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:26.899173', 'step': 25915, 'epoch': 3}
{'type': 'loss', 'content': 0.0575626865029335, 'timestamp': '2025-10-02 00:57:26.906522', 'step': 25916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:26.963846', 'step': 25916, 'epoch': 3}
{'type': 'loss', 'content': 0.08887946605682373, 'timestamp': '2025-10-02 00:57:26.966804', 'step': 25917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:27.026133', 'step': 25917, 'epoch': 3}
{'type': 'loss', 'content': 0.018748968839645386, 'timestamp': '2025-10-02 00:57:27.036308', 'step': 25918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:27.093412', 'step': 25918, 'epoch': 3}
{'type': 'loss', 'content': 0.1447782814502716, 'timestamp': '2025-10-02 00:57:27.099456', 'step': 25919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:27.155813', 'step': 25919, 'epoch': 3}
{'type': 'loss', 'content': 0.011346825398504734, 'timestamp': '2025-10-02 00:57:27.161917', 'step': 25920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:27.219790', 'step': 25920, 'epoch': 3}
{'type': 'loss', 'content': 0.0372437983751297, 'timestamp': '2025-10-02 00:57:27.230767', 'step': 25921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:27.289887', 'step': 25921, 'epoch': 3}
{'type': 'loss', 'content': 0.029290001839399338, 'timestamp': '2025-10-02 00:57:27.292248', 'step': 25922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:27.346122', 'step': 25922, 'epoch': 3}
{'type': 'loss', 'content': 0.04406297579407692, 'timestamp': '2025-10-02 00:57:27.348350', 'step': 25923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:27.411005', 'step': 25923, 'epoch': 3}
{'type': 'loss', 'content': 0.08796823024749756, 'timestamp': '2025-10-02 00:57:27.417046', 'step': 25924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:27.471894', 'step': 25924, 'epoch': 3}
{'type': 'loss', 'content': 0.02168668620288372, 'timestamp': '2025-10-02 00:57:27.479158', 'step': 25925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:27.534878', 'step': 25925, 'epoch': 3}
{'type': 'loss', 'content': 0.048216693103313446, 'timestamp': '2025-10-02 00:57:27.536959', 'step': 25926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:27.592084', 'step': 25926, 'epoch': 3}
{'type': 'loss', 'content': 0.07318920642137527, 'timestamp': '2025-10-02 00:57:27.594482', 'step': 25927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:27.649121', 'step': 25927, 'epoch': 3}
{'type': 'loss', 'content': 0.009584903717041016, 'timestamp': '2025-10-02 00:57:27.658880', 'step': 25928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:27.714700', 'step': 25928, 'epoch': 3}
{'type': 'loss', 'content': 0.06731464713811874, 'timestamp': '2025-10-02 00:57:27.717400', 'step': 25929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:27.772463', 'step': 25929, 'epoch': 3}
{'type': 'loss', 'content': 0.08249460160732269, 'timestamp': '2025-10-02 00:57:27.774800', 'step': 25930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:27.830580', 'step': 25930, 'epoch': 3}
{'type': 'loss', 'content': 0.01986057497560978, 'timestamp': '2025-10-02 00:57:27.835076', 'step': 25931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:57:27.903379', 'step': 25931, 'epoch': 3}
{'type': 'loss', 'content': 0.005387595854699612, 'timestamp': '2025-10-02 00:57:27.916200', 'step': 25932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:27.970856', 'step': 25932, 'epoch': 3}
{'type': 'loss', 'content': 0.032415855675935745, 'timestamp': '2025-10-02 00:57:27.973353', 'step': 25933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:28.030384', 'step': 25933, 'epoch': 3}
{'type': 'loss', 'content': 0.14610441029071808, 'timestamp': '2025-10-02 00:57:28.032804', 'step': 25934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:28.090762', 'step': 25934, 'epoch': 3}
{'type': 'loss', 'content': 0.06032004579901695, 'timestamp': '2025-10-02 00:57:28.093224', 'step': 25935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:28.148340', 'step': 25935, 'epoch': 3}
{'type': 'loss', 'content': 0.06537739932537079, 'timestamp': '2025-10-02 00:57:28.154403', 'step': 25936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:28.208088', 'step': 25936, 'epoch': 3}
{'type': 'loss', 'content': 0.005079631693661213, 'timestamp': '2025-10-02 00:57:28.210300', 'step': 25937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:28.264112', 'step': 25937, 'epoch': 3}
{'type': 'loss', 'content': 0.1368476003408432, 'timestamp': '2025-10-02 00:57:28.266663', 'step': 25938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:28.323803', 'step': 25938, 'epoch': 3}
{'type': 'loss', 'content': 0.05809598043560982, 'timestamp': '2025-10-02 00:57:28.327132', 'step': 25939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:28.382695', 'step': 25939, 'epoch': 3}
{'type': 'loss', 'content': 0.02869161032140255, 'timestamp': '2025-10-02 00:57:28.388844', 'step': 25940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:28.443281', 'step': 25940, 'epoch': 3}
{'type': 'loss', 'content': 0.03288179636001587, 'timestamp': '2025-10-02 00:57:28.450523', 'step': 25941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:28.508362', 'step': 25941, 'epoch': 3}
{'type': 'loss', 'content': 0.022822273895144463, 'timestamp': '2025-10-02 00:57:28.517691', 'step': 25942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:57:28.585199', 'step': 25942, 'epoch': 3}
{'type': 'loss', 'content': 0.00011610989895416424, 'timestamp': '2025-10-02 00:57:28.597159', 'step': 25943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:57:28.651502', 'step': 25943, 'epoch': 3}
{'type': 'loss', 'content': 0.14014960825443268, 'timestamp': '2025-10-02 00:57:28.657370', 'step': 25944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:28.712737', 'step': 25944, 'epoch': 3}
{'type': 'loss', 'content': 0.03646877780556679, 'timestamp': '2025-10-02 00:57:28.715438', 'step': 25945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:28.770446', 'step': 25945, 'epoch': 3}
{'type': 'loss', 'content': 0.06554844230413437, 'timestamp': '2025-10-02 00:57:28.777571', 'step': 25946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:28.833873', 'step': 25946, 'epoch': 3}
{'type': 'loss', 'content': 0.023787783458828926, 'timestamp': '2025-10-02 00:57:28.843428', 'step': 25947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:28.899626', 'step': 25947, 'epoch': 3}
{'type': 'loss', 'content': 0.09999670088291168, 'timestamp': '2025-10-02 00:57:28.905578', 'step': 25948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:28.961245', 'step': 25948, 'epoch': 3}
{'type': 'loss', 'content': 0.0381978377699852, 'timestamp': '2025-10-02 00:57:28.963924', 'step': 25949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:29.019102', 'step': 25949, 'epoch': 3}
{'type': 'loss', 'content': 0.1010470986366272, 'timestamp': '2025-10-02 00:57:29.021526', 'step': 25950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:29.075960', 'step': 25950, 'epoch': 3}
{'type': 'loss', 'content': 0.05624902620911598, 'timestamp': '2025-10-02 00:57:29.085008', 'step': 25951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:29.139844', 'step': 25951, 'epoch': 3}
{'type': 'loss', 'content': 0.03727208822965622, 'timestamp': '2025-10-02 00:57:29.147653', 'step': 25952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:29.202178', 'step': 25952, 'epoch': 3}
{'type': 'loss', 'content': 0.10091470181941986, 'timestamp': '2025-10-02 00:57:29.204656', 'step': 25953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:29.259578', 'step': 25953, 'epoch': 3}
{'type': 'loss', 'content': 0.01549297384917736, 'timestamp': '2025-10-02 00:57:29.261766', 'step': 25954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:29.317195', 'step': 25954, 'epoch': 3}
{'type': 'loss', 'content': 0.048808034509420395, 'timestamp': '2025-10-02 00:57:29.319629', 'step': 25955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:29.374187', 'step': 25955, 'epoch': 3}
{'type': 'loss', 'content': 0.06207440048456192, 'timestamp': '2025-10-02 00:57:29.380483', 'step': 25956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:29.435650', 'step': 25956, 'epoch': 3}
{'type': 'loss', 'content': 0.019248880445957184, 'timestamp': '2025-10-02 00:57:29.442845', 'step': 25957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:29.499467', 'step': 25957, 'epoch': 3}
{'type': 'loss', 'content': 0.028551513329148293, 'timestamp': '2025-10-02 00:57:29.506549', 'step': 25958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:29.561445', 'step': 25958, 'epoch': 3}
{'type': 'loss', 'content': 0.14543786644935608, 'timestamp': '2025-10-02 00:57:29.567054', 'step': 25959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:29.622293', 'step': 25959, 'epoch': 3}
{'type': 'loss', 'content': 0.03757133707404137, 'timestamp': '2025-10-02 00:57:29.628355', 'step': 25960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:29.682488', 'step': 25960, 'epoch': 3}
{'type': 'loss', 'content': 0.013886390253901482, 'timestamp': '2025-10-02 00:57:29.685238', 'step': 25961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:29.740222', 'step': 25961, 'epoch': 3}
{'type': 'loss', 'content': 0.04348902776837349, 'timestamp': '2025-10-02 00:57:29.743023', 'step': 25962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:57:29.806181', 'step': 25962, 'epoch': 3}
{'type': 'loss', 'content': 0.032516252249479294, 'timestamp': '2025-10-02 00:57:29.817015', 'step': 25963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:29.871660', 'step': 25963, 'epoch': 3}
{'type': 'loss', 'content': 0.04056999459862709, 'timestamp': '2025-10-02 00:57:29.878283', 'step': 25964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:29.932879', 'step': 25964, 'epoch': 3}
{'type': 'loss', 'content': 0.019281376153230667, 'timestamp': '2025-10-02 00:57:29.935337', 'step': 25965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:29.991208', 'step': 25965, 'epoch': 3}
{'type': 'loss', 'content': 0.041761573404073715, 'timestamp': '2025-10-02 00:57:29.993898', 'step': 25966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:30.049445', 'step': 25966, 'epoch': 3}
{'type': 'loss', 'content': 0.0030861294362694025, 'timestamp': '2025-10-02 00:57:30.053735', 'step': 25967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:30.108421', 'step': 25967, 'epoch': 3}
{'type': 'loss', 'content': 0.011747121810913086, 'timestamp': '2025-10-02 00:57:30.114836', 'step': 25968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:30.168410', 'step': 25968, 'epoch': 3}
{'type': 'loss', 'content': 0.01617945358157158, 'timestamp': '2025-10-02 00:57:30.177695', 'step': 25969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:30.233086', 'step': 25969, 'epoch': 3}
{'type': 'loss', 'content': 0.03523515537381172, 'timestamp': '2025-10-02 00:57:30.235547', 'step': 25970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:30.291662', 'step': 25970, 'epoch': 3}
{'type': 'loss', 'content': 0.045664723962545395, 'timestamp': '2025-10-02 00:57:30.294952', 'step': 25971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:30.350655', 'step': 25971, 'epoch': 3}
{'type': 'loss', 'content': 0.00017294353165198117, 'timestamp': '2025-10-02 00:57:30.360793', 'step': 25972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:30.415757', 'step': 25972, 'epoch': 3}
{'type': 'loss', 'content': 0.022870806977152824, 'timestamp': '2025-10-02 00:57:30.418351', 'step': 25973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:30.472260', 'step': 25973, 'epoch': 3}
{'type': 'loss', 'content': 0.11565233767032623, 'timestamp': '2025-10-02 00:57:30.475042', 'step': 25974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:30.530304', 'step': 25974, 'epoch': 3}
{'type': 'loss', 'content': 0.07621681690216064, 'timestamp': '2025-10-02 00:57:30.532907', 'step': 25975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:30.587882', 'step': 25975, 'epoch': 3}
{'type': 'loss', 'content': 0.062127966433763504, 'timestamp': '2025-10-02 00:57:30.594107', 'step': 25976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:30.649987', 'step': 25976, 'epoch': 3}
{'type': 'loss', 'content': 0.03113779053092003, 'timestamp': '2025-10-02 00:57:30.659458', 'step': 25977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:30.714482', 'step': 25977, 'epoch': 3}
{'type': 'loss', 'content': 0.035939764231443405, 'timestamp': '2025-10-02 00:57:30.716670', 'step': 25978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:30.771397', 'step': 25978, 'epoch': 3}
{'type': 'loss', 'content': 0.06142851710319519, 'timestamp': '2025-10-02 00:57:30.778566', 'step': 25979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:30.833913', 'step': 25979, 'epoch': 3}
{'type': 'loss', 'content': 0.0034584091044962406, 'timestamp': '2025-10-02 00:57:30.844019', 'step': 25980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:30.897800', 'step': 25980, 'epoch': 3}
{'type': 'loss', 'content': 0.0773308277130127, 'timestamp': '2025-10-02 00:57:30.900042', 'step': 25981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:30.959557', 'step': 25981, 'epoch': 3}
{'type': 'loss', 'content': 0.025752466171979904, 'timestamp': '2025-10-02 00:57:30.969656', 'step': 25982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:31.031687', 'step': 25982, 'epoch': 3}
{'type': 'loss', 'content': 0.01791975274682045, 'timestamp': '2025-10-02 00:57:31.042269', 'step': 25983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:31.097725', 'step': 25983, 'epoch': 3}
{'type': 'loss', 'content': 0.05668753758072853, 'timestamp': '2025-10-02 00:57:31.103898', 'step': 25984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:31.157792', 'step': 25984, 'epoch': 3}
{'type': 'loss', 'content': 0.11010418087244034, 'timestamp': '2025-10-02 00:57:31.160235', 'step': 25985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:31.214645', 'step': 25985, 'epoch': 3}
{'type': 'loss', 'content': 0.10599302500486374, 'timestamp': '2025-10-02 00:57:31.217050', 'step': 25986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:31.270864', 'step': 25986, 'epoch': 3}
{'type': 'loss', 'content': 0.08116377890110016, 'timestamp': '2025-10-02 00:57:31.273989', 'step': 25987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:31.329508', 'step': 25987, 'epoch': 3}
{'type': 'loss', 'content': 0.0572853609919548, 'timestamp': '2025-10-02 00:57:31.335655', 'step': 25988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:31.400384', 'step': 25988, 'epoch': 3}
{'type': 'loss', 'content': 0.008052784018218517, 'timestamp': '2025-10-02 00:57:31.411909', 'step': 25989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:31.467048', 'step': 25989, 'epoch': 3}
{'type': 'loss', 'content': 0.041336916387081146, 'timestamp': '2025-10-02 00:57:31.476599', 'step': 25990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:31.531784', 'step': 25990, 'epoch': 3}
{'type': 'loss', 'content': 0.08084851503372192, 'timestamp': '2025-10-02 00:57:31.534521', 'step': 25991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:31.589061', 'step': 25991, 'epoch': 3}
{'type': 'loss', 'content': 0.06030651926994324, 'timestamp': '2025-10-02 00:57:31.595088', 'step': 25992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:31.650663', 'step': 25992, 'epoch': 3}
{'type': 'loss', 'content': 0.016767306253314018, 'timestamp': '2025-10-02 00:57:31.657947', 'step': 25993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:31.713229', 'step': 25993, 'epoch': 3}
{'type': 'loss', 'content': 0.1599545031785965, 'timestamp': '2025-10-02 00:57:31.715753', 'step': 25994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:31.775392', 'step': 25994, 'epoch': 3}
{'type': 'loss', 'content': 0.04321487620472908, 'timestamp': '2025-10-02 00:57:31.785565', 'step': 25995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:31.840729', 'step': 25995, 'epoch': 3}
{'type': 'loss', 'content': 0.08763837814331055, 'timestamp': '2025-10-02 00:57:31.847254', 'step': 25996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:31.901418', 'step': 25996, 'epoch': 3}
{'type': 'loss', 'content': 0.05084206163883209, 'timestamp': '2025-10-02 00:57:31.907001', 'step': 25997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:31.961857', 'step': 25997, 'epoch': 3}
{'type': 'loss', 'content': 0.0637565478682518, 'timestamp': '2025-10-02 00:57:31.964404', 'step': 25998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:32.019661', 'step': 25998, 'epoch': 3}
{'type': 'loss', 'content': 0.12207847833633423, 'timestamp': '2025-10-02 00:57:32.024965', 'step': 25999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:32.080086', 'step': 25999, 'epoch': 3}
{'type': 'loss', 'content': 0.15095166862010956, 'timestamp': '2025-10-02 00:57:32.086256', 'step': 26000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 26000', 'timestamp': '2025-10-02 00:57:32.477989', 'step': 26000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:32.534368', 'step': 26000, 'epoch': 3}
{'type': 'loss', 'content': 0.08579874783754349, 'timestamp': '2025-10-02 00:57:32.537365', 'step': 26001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:32.597549', 'step': 26001, 'epoch': 3}
{'type': 'loss', 'content': 0.02193528786301613, 'timestamp': '2025-10-02 00:57:32.600380', 'step': 26002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:32.656756', 'step': 26002, 'epoch': 3}
{'type': 'loss', 'content': 0.07228019088506699, 'timestamp': '2025-10-02 00:57:32.660080', 'step': 26003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:32.718868', 'step': 26003, 'epoch': 3}
{'type': 'loss', 'content': 0.014859515242278576, 'timestamp': '2025-10-02 00:57:32.726193', 'step': 26004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:32.781625', 'step': 26004, 'epoch': 3}
{'type': 'loss', 'content': 0.06521037220954895, 'timestamp': '2025-10-02 00:57:32.784255', 'step': 26005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:32.840407', 'step': 26005, 'epoch': 3}
{'type': 'loss', 'content': 0.06183243542909622, 'timestamp': '2025-10-02 00:57:32.844050', 'step': 26006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:32.901284', 'step': 26006, 'epoch': 3}
{'type': 'loss', 'content': 0.02436264045536518, 'timestamp': '2025-10-02 00:57:32.910793', 'step': 26007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:32.966513', 'step': 26007, 'epoch': 3}
{'type': 'loss', 'content': 0.029248084872961044, 'timestamp': '2025-10-02 00:57:32.972953', 'step': 26008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:33.028873', 'step': 26008, 'epoch': 3}
{'type': 'loss', 'content': 0.04081964120268822, 'timestamp': '2025-10-02 00:57:33.034496', 'step': 26009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:33.099260', 'step': 26009, 'epoch': 3}
{'type': 'loss', 'content': 0.016743283718824387, 'timestamp': '2025-10-02 00:57:33.102236', 'step': 26010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:33.160036', 'step': 26010, 'epoch': 3}
{'type': 'loss', 'content': 0.04854043573141098, 'timestamp': '2025-10-02 00:57:33.163355', 'step': 26011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:33.220748', 'step': 26011, 'epoch': 3}
{'type': 'loss', 'content': 0.013928818516433239, 'timestamp': '2025-10-02 00:57:33.227446', 'step': 26012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:33.286909', 'step': 26012, 'epoch': 3}
{'type': 'loss', 'content': 0.030680766329169273, 'timestamp': '2025-10-02 00:57:33.297909', 'step': 26013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:33.354428', 'step': 26013, 'epoch': 3}
{'type': 'loss', 'content': 0.036914706230163574, 'timestamp': '2025-10-02 00:57:33.356876', 'step': 26014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:33.413680', 'step': 26014, 'epoch': 3}
{'type': 'loss', 'content': 0.02229161374270916, 'timestamp': '2025-10-02 00:57:33.422719', 'step': 26015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:33.487359', 'step': 26015, 'epoch': 3}
{'type': 'loss', 'content': 0.02913052961230278, 'timestamp': '2025-10-02 00:57:33.498767', 'step': 26016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:33.557072', 'step': 26016, 'epoch': 3}
{'type': 'loss', 'content': 0.011701473966240883, 'timestamp': '2025-10-02 00:57:33.560391', 'step': 26017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:33.616863', 'step': 26017, 'epoch': 3}
{'type': 'loss', 'content': 0.01496255025267601, 'timestamp': '2025-10-02 00:57:33.622312', 'step': 26018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:33.678632', 'step': 26018, 'epoch': 3}
{'type': 'loss', 'content': 0.03823189064860344, 'timestamp': '2025-10-02 00:57:33.683971', 'step': 26019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:33.741428', 'step': 26019, 'epoch': 3}
{'type': 'loss', 'content': 0.011685752309858799, 'timestamp': '2025-10-02 00:57:33.749336', 'step': 26020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:33.805941', 'step': 26020, 'epoch': 3}
{'type': 'loss', 'content': 0.06897182762622833, 'timestamp': '2025-10-02 00:57:33.813229', 'step': 26021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:33.871368', 'step': 26021, 'epoch': 3}
{'type': 'loss', 'content': 0.1958140879869461, 'timestamp': '2025-10-02 00:57:33.874529', 'step': 26022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:33.931772', 'step': 26022, 'epoch': 3}
{'type': 'loss', 'content': 0.07013442367315292, 'timestamp': '2025-10-02 00:57:33.934747', 'step': 26023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:33.991530', 'step': 26023, 'epoch': 3}
{'type': 'loss', 'content': 0.059611015021800995, 'timestamp': '2025-10-02 00:57:33.998075', 'step': 26024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:34.054045', 'step': 26024, 'epoch': 3}
{'type': 'loss', 'content': 0.024302879348397255, 'timestamp': '2025-10-02 00:57:34.059664', 'step': 26025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:34.116167', 'step': 26025, 'epoch': 3}
{'type': 'loss', 'content': 0.015172923915088177, 'timestamp': '2025-10-02 00:57:34.118473', 'step': 26026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:34.173499', 'step': 26026, 'epoch': 3}
{'type': 'loss', 'content': 0.026756979525089264, 'timestamp': '2025-10-02 00:57:34.175909', 'step': 26027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:34.230676', 'step': 26027, 'epoch': 3}
{'type': 'loss', 'content': 0.08334622532129288, 'timestamp': '2025-10-02 00:57:34.236621', 'step': 26028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:34.290899', 'step': 26028, 'epoch': 3}
{'type': 'loss', 'content': 0.05971963331103325, 'timestamp': '2025-10-02 00:57:34.293514', 'step': 26029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:34.354055', 'step': 26029, 'epoch': 3}
{'type': 'loss', 'content': 0.040671490132808685, 'timestamp': '2025-10-02 00:57:34.356278', 'step': 26030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:34.410876', 'step': 26030, 'epoch': 3}
{'type': 'loss', 'content': 0.0758683830499649, 'timestamp': '2025-10-02 00:57:34.413230', 'step': 26031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:34.467780', 'step': 26031, 'epoch': 3}
{'type': 'loss', 'content': 0.013113785535097122, 'timestamp': '2025-10-02 00:57:34.473854', 'step': 26032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:34.528159', 'step': 26032, 'epoch': 3}
{'type': 'loss', 'content': 0.018565669655799866, 'timestamp': '2025-10-02 00:57:34.533787', 'step': 26033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:34.589090', 'step': 26033, 'epoch': 3}
{'type': 'loss', 'content': 0.08094607293605804, 'timestamp': '2025-10-02 00:57:34.591929', 'step': 26034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:34.647056', 'step': 26034, 'epoch': 3}
{'type': 'loss', 'content': 0.05645547807216644, 'timestamp': '2025-10-02 00:57:34.649816', 'step': 26035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:34.705190', 'step': 26035, 'epoch': 3}
{'type': 'loss', 'content': 0.05543358623981476, 'timestamp': '2025-10-02 00:57:34.711175', 'step': 26036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:34.765669', 'step': 26036, 'epoch': 3}
{'type': 'loss', 'content': 0.11177265644073486, 'timestamp': '2025-10-02 00:57:34.768050', 'step': 26037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:34.822836', 'step': 26037, 'epoch': 3}
{'type': 'loss', 'content': 0.13914839923381805, 'timestamp': '2025-10-02 00:57:34.825526', 'step': 26038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:34.880414', 'step': 26038, 'epoch': 3}
{'type': 'loss', 'content': 0.03791943937540054, 'timestamp': '2025-10-02 00:57:34.882926', 'step': 26039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:34.938658', 'step': 26039, 'epoch': 3}
{'type': 'loss', 'content': 0.03617214038968086, 'timestamp': '2025-10-02 00:57:34.945000', 'step': 26040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:35.004803', 'step': 26040, 'epoch': 3}
{'type': 'loss', 'content': 0.030827391892671585, 'timestamp': '2025-10-02 00:57:35.016061', 'step': 26041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:35.071705', 'step': 26041, 'epoch': 3}
{'type': 'loss', 'content': 0.02547585964202881, 'timestamp': '2025-10-02 00:57:35.074297', 'step': 26042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:35.129290', 'step': 26042, 'epoch': 3}
{'type': 'loss', 'content': 0.018744975328445435, 'timestamp': '2025-10-02 00:57:35.134866', 'step': 26043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:35.191339', 'step': 26043, 'epoch': 3}
{'type': 'loss', 'content': 0.027282562106847763, 'timestamp': '2025-10-02 00:57:35.201685', 'step': 26044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:35.255831', 'step': 26044, 'epoch': 3}
{'type': 'loss', 'content': 0.0854930430650711, 'timestamp': '2025-10-02 00:57:35.259682', 'step': 26045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:35.314577', 'step': 26045, 'epoch': 3}
{'type': 'loss', 'content': 0.015279600396752357, 'timestamp': '2025-10-02 00:57:35.317153', 'step': 26046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:35.371665', 'step': 26046, 'epoch': 3}
{'type': 'loss', 'content': 0.07124444097280502, 'timestamp': '2025-10-02 00:57:35.374240', 'step': 26047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:35.428801', 'step': 26047, 'epoch': 3}
{'type': 'loss', 'content': 0.05204097926616669, 'timestamp': '2025-10-02 00:57:35.436626', 'step': 26048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:35.491725', 'step': 26048, 'epoch': 3}
{'type': 'loss', 'content': 0.017612367868423462, 'timestamp': '2025-10-02 00:57:35.499004', 'step': 26049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:35.556169', 'step': 26049, 'epoch': 3}
{'type': 'loss', 'content': 0.07544416934251785, 'timestamp': '2025-10-02 00:57:35.558955', 'step': 26050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:35.619243', 'step': 26050, 'epoch': 3}
{'type': 'loss', 'content': 0.08115172386169434, 'timestamp': '2025-10-02 00:57:35.621928', 'step': 26051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:35.676087', 'step': 26051, 'epoch': 3}
{'type': 'loss', 'content': 0.06250900030136108, 'timestamp': '2025-10-02 00:57:35.682599', 'step': 26052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:35.737240', 'step': 26052, 'epoch': 3}
{'type': 'loss', 'content': 0.0038055418990552425, 'timestamp': '2025-10-02 00:57:35.746506', 'step': 26053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:35.801512', 'step': 26053, 'epoch': 3}
{'type': 'loss', 'content': 0.00981184933334589, 'timestamp': '2025-10-02 00:57:35.810818', 'step': 26054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:35.870876', 'step': 26054, 'epoch': 3}
{'type': 'loss', 'content': 0.08083252608776093, 'timestamp': '2025-10-02 00:57:35.873471', 'step': 26055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:35.929326', 'step': 26055, 'epoch': 3}
{'type': 'loss', 'content': 0.05187974497675896, 'timestamp': '2025-10-02 00:57:35.935841', 'step': 26056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:35.993264', 'step': 26056, 'epoch': 3}
{'type': 'loss', 'content': 0.03178402781486511, 'timestamp': '2025-10-02 00:57:36.004198', 'step': 26057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:36.059614', 'step': 26057, 'epoch': 3}
{'type': 'loss', 'content': 0.022232215851545334, 'timestamp': '2025-10-02 00:57:36.065147', 'step': 26058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:36.126764', 'step': 26058, 'epoch': 3}
{'type': 'loss', 'content': 0.021902382373809814, 'timestamp': '2025-10-02 00:57:36.137341', 'step': 26059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:36.191847', 'step': 26059, 'epoch': 3}
{'type': 'loss', 'content': 0.09364846348762512, 'timestamp': '2025-10-02 00:57:36.197884', 'step': 26060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:36.251988', 'step': 26060, 'epoch': 3}
{'type': 'loss', 'content': 0.06392315775156021, 'timestamp': '2025-10-02 00:57:36.259361', 'step': 26061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:36.314904', 'step': 26061, 'epoch': 3}
{'type': 'loss', 'content': 0.002593433018773794, 'timestamp': '2025-10-02 00:57:36.317272', 'step': 26062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:36.373428', 'step': 26062, 'epoch': 3}
{'type': 'loss', 'content': 0.13116326928138733, 'timestamp': '2025-10-02 00:57:36.376355', 'step': 26063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:36.432075', 'step': 26063, 'epoch': 3}
{'type': 'loss', 'content': 0.002185324439778924, 'timestamp': '2025-10-02 00:57:36.442415', 'step': 26064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:36.497026', 'step': 26064, 'epoch': 3}
{'type': 'loss', 'content': 0.037191059440374374, 'timestamp': '2025-10-02 00:57:36.499379', 'step': 26065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:57:36.553455', 'step': 26065, 'epoch': 3}
{'type': 'loss', 'content': 0.1089005246758461, 'timestamp': '2025-10-02 00:57:36.555653', 'step': 26066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:36.609960', 'step': 26066, 'epoch': 3}
{'type': 'loss', 'content': 0.05869095399975777, 'timestamp': '2025-10-02 00:57:36.612226', 'step': 26067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:36.666528', 'step': 26067, 'epoch': 3}
{'type': 'loss', 'content': 0.029468823224306107, 'timestamp': '2025-10-02 00:57:36.672955', 'step': 26068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:36.726750', 'step': 26068, 'epoch': 3}
{'type': 'loss', 'content': 0.026261093094944954, 'timestamp': '2025-10-02 00:57:36.730146', 'step': 26069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:36.784608', 'step': 26069, 'epoch': 3}
{'type': 'loss', 'content': 0.03620595484972, 'timestamp': '2025-10-02 00:57:36.787755', 'step': 26070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:36.842138', 'step': 26070, 'epoch': 3}
{'type': 'loss', 'content': 0.03793272748589516, 'timestamp': '2025-10-02 00:57:36.844274', 'step': 26071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:36.898740', 'step': 26071, 'epoch': 3}
{'type': 'loss', 'content': 0.04926803708076477, 'timestamp': '2025-10-02 00:57:36.905328', 'step': 26072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:36.959438', 'step': 26072, 'epoch': 3}
{'type': 'loss', 'content': 0.053141314536333084, 'timestamp': '2025-10-02 00:57:36.965093', 'step': 26073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:37.019496', 'step': 26073, 'epoch': 3}
{'type': 'loss', 'content': 0.09940119832754135, 'timestamp': '2025-10-02 00:57:37.026591', 'step': 26074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:37.082515', 'step': 26074, 'epoch': 3}
{'type': 'loss', 'content': 0.059041544795036316, 'timestamp': '2025-10-02 00:57:37.085151', 'step': 26075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:37.139930', 'step': 26075, 'epoch': 3}
{'type': 'loss', 'content': 0.037116724997758865, 'timestamp': '2025-10-02 00:57:37.145963', 'step': 26076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:37.199541', 'step': 26076, 'epoch': 3}
{'type': 'loss', 'content': 0.036044806241989136, 'timestamp': '2025-10-02 00:57:37.202209', 'step': 26077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:37.256730', 'step': 26077, 'epoch': 3}
{'type': 'loss', 'content': 0.06893643736839294, 'timestamp': '2025-10-02 00:57:37.258966', 'step': 26078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:57:37.321196', 'step': 26078, 'epoch': 3}
{'type': 'loss', 'content': 0.037940409034490585, 'timestamp': '2025-10-02 00:57:37.332038', 'step': 26079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:37.386918', 'step': 26079, 'epoch': 3}
{'type': 'loss', 'content': 0.05543120577931404, 'timestamp': '2025-10-02 00:57:37.392837', 'step': 26080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:37.447811', 'step': 26080, 'epoch': 3}
{'type': 'loss', 'content': 0.015617702156305313, 'timestamp': '2025-10-02 00:57:37.457039', 'step': 26081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:37.516189', 'step': 26081, 'epoch': 3}
{'type': 'loss', 'content': 0.03677835315465927, 'timestamp': '2025-10-02 00:57:37.526393', 'step': 26082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:37.580792', 'step': 26082, 'epoch': 3}
{'type': 'loss', 'content': 0.08369474112987518, 'timestamp': '2025-10-02 00:57:37.583089', 'step': 26083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:37.639114', 'step': 26083, 'epoch': 3}
{'type': 'loss', 'content': 0.051085300743579865, 'timestamp': '2025-10-02 00:57:37.649425', 'step': 26084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:37.705079', 'step': 26084, 'epoch': 3}
{'type': 'loss', 'content': 0.02722945623099804, 'timestamp': '2025-10-02 00:57:37.707430', 'step': 26085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:37.761711', 'step': 26085, 'epoch': 3}
{'type': 'loss', 'content': 0.07145726680755615, 'timestamp': '2025-10-02 00:57:37.764071', 'step': 26086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:37.819333', 'step': 26086, 'epoch': 3}
{'type': 'loss', 'content': 0.043611373752355576, 'timestamp': '2025-10-02 00:57:37.821761', 'step': 26087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:37.876946', 'step': 26087, 'epoch': 3}
{'type': 'loss', 'content': 0.04257294908165932, 'timestamp': '2025-10-02 00:57:37.883200', 'step': 26088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:37.937773', 'step': 26088, 'epoch': 3}
{'type': 'loss', 'content': 0.014648422598838806, 'timestamp': '2025-10-02 00:57:37.943522', 'step': 26089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:37.999206', 'step': 26089, 'epoch': 3}
{'type': 'loss', 'content': 0.05998876318335533, 'timestamp': '2025-10-02 00:57:38.001656', 'step': 26090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:38.057189', 'step': 26090, 'epoch': 3}
{'type': 'loss', 'content': 0.010781506076455116, 'timestamp': '2025-10-02 00:57:38.062663', 'step': 26091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:38.121870', 'step': 26091, 'epoch': 3}
{'type': 'loss', 'content': 0.028248410671949387, 'timestamp': '2025-10-02 00:57:38.132833', 'step': 26092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:38.187328', 'step': 26092, 'epoch': 3}
{'type': 'loss', 'content': 0.002371251815930009, 'timestamp': '2025-10-02 00:57:38.194689', 'step': 26093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:38.250492', 'step': 26093, 'epoch': 3}
{'type': 'loss', 'content': 0.022067779675126076, 'timestamp': '2025-10-02 00:57:38.260008', 'step': 26094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:38.316558', 'step': 26094, 'epoch': 3}
{'type': 'loss', 'content': 0.04609593749046326, 'timestamp': '2025-10-02 00:57:38.326069', 'step': 26095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:38.380808', 'step': 26095, 'epoch': 3}
{'type': 'loss', 'content': 0.007684232667088509, 'timestamp': '2025-10-02 00:57:38.386752', 'step': 26096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:38.440326', 'step': 26096, 'epoch': 3}
{'type': 'loss', 'content': 0.03130760043859482, 'timestamp': '2025-10-02 00:57:38.442666', 'step': 26097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:38.504852', 'step': 26097, 'epoch': 3}
{'type': 'loss', 'content': 0.04103637486696243, 'timestamp': '2025-10-02 00:57:38.515542', 'step': 26098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:38.577127', 'step': 26098, 'epoch': 3}
{'type': 'loss', 'content': 0.010067659430205822, 'timestamp': '2025-10-02 00:57:38.587612', 'step': 26099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:38.643718', 'step': 26099, 'epoch': 3}
{'type': 'loss', 'content': 0.011208772659301758, 'timestamp': '2025-10-02 00:57:38.649591', 'step': 26100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:38.704027', 'step': 26100, 'epoch': 3}
{'type': 'loss', 'content': 0.013067969121038914, 'timestamp': '2025-10-02 00:57:38.713408', 'step': 26101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:38.769300', 'step': 26101, 'epoch': 3}
{'type': 'loss', 'content': 0.016224119812250137, 'timestamp': '2025-10-02 00:57:38.771594', 'step': 26102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:38.826539', 'step': 26102, 'epoch': 3}
{'type': 'loss', 'content': 0.04171766713261604, 'timestamp': '2025-10-02 00:57:38.833655', 'step': 26103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:38.888852', 'step': 26103, 'epoch': 3}
{'type': 'loss', 'content': 0.05769920349121094, 'timestamp': '2025-10-02 00:57:38.894946', 'step': 26104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:38.949321', 'step': 26104, 'epoch': 3}
{'type': 'loss', 'content': 0.061944760382175446, 'timestamp': '2025-10-02 00:57:38.952333', 'step': 26105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:39.008641', 'step': 26105, 'epoch': 3}
{'type': 'loss', 'content': 0.11529204994440079, 'timestamp': '2025-10-02 00:57:39.010999', 'step': 26106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:39.065174', 'step': 26106, 'epoch': 3}
{'type': 'loss', 'content': 0.04944280907511711, 'timestamp': '2025-10-02 00:57:39.067615', 'step': 26107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:39.130441', 'step': 26107, 'epoch': 3}
{'type': 'loss', 'content': 0.014863426797091961, 'timestamp': '2025-10-02 00:57:39.141864', 'step': 26108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:39.196466', 'step': 26108, 'epoch': 3}
{'type': 'loss', 'content': 0.01288899127393961, 'timestamp': '2025-10-02 00:57:39.198746', 'step': 26109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:39.252800', 'step': 26109, 'epoch': 3}
{'type': 'loss', 'content': 0.1622648686170578, 'timestamp': '2025-10-02 00:57:39.255245', 'step': 26110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:57:39.330578', 'step': 26110, 'epoch': 3}
{'type': 'loss', 'content': 0.015708347782492638, 'timestamp': '2025-10-02 00:57:39.343785', 'step': 26111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:39.398522', 'step': 26111, 'epoch': 3}
{'type': 'loss', 'content': 0.021378139033913612, 'timestamp': '2025-10-02 00:57:39.405328', 'step': 26112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:39.459710', 'step': 26112, 'epoch': 3}
{'type': 'loss', 'content': 0.07158634811639786, 'timestamp': '2025-10-02 00:57:39.462033', 'step': 26113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:39.517455', 'step': 26113, 'epoch': 3}
{'type': 'loss', 'content': 0.12213185429573059, 'timestamp': '2025-10-02 00:57:39.519810', 'step': 26114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:39.574943', 'step': 26114, 'epoch': 3}
{'type': 'loss', 'content': 0.08432015031576157, 'timestamp': '2025-10-02 00:57:39.577410', 'step': 26115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:39.631830', 'step': 26115, 'epoch': 3}
{'type': 'loss', 'content': 0.07132019847631454, 'timestamp': '2025-10-02 00:57:39.638489', 'step': 26116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:39.692656', 'step': 26116, 'epoch': 3}
{'type': 'loss', 'content': 0.07467743009328842, 'timestamp': '2025-10-02 00:57:39.695000', 'step': 26117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:39.749366', 'step': 26117, 'epoch': 3}
{'type': 'loss', 'content': 0.01690780557692051, 'timestamp': '2025-10-02 00:57:39.751957', 'step': 26118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:39.811496', 'step': 26118, 'epoch': 3}
{'type': 'loss', 'content': 0.015296087600290775, 'timestamp': '2025-10-02 00:57:39.821694', 'step': 26119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:39.877326', 'step': 26119, 'epoch': 3}
{'type': 'loss', 'content': 0.12273254245519638, 'timestamp': '2025-10-02 00:57:39.883442', 'step': 26120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:39.937806', 'step': 26120, 'epoch': 3}
{'type': 'loss', 'content': 0.03435260057449341, 'timestamp': '2025-10-02 00:57:39.940435', 'step': 26121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:39.999952', 'step': 26121, 'epoch': 3}
{'type': 'loss', 'content': 0.0327557735145092, 'timestamp': '2025-10-02 00:57:40.010119', 'step': 26122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:40.064399', 'step': 26122, 'epoch': 3}
{'type': 'loss', 'content': 0.11272457987070084, 'timestamp': '2025-10-02 00:57:40.066687', 'step': 26123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:40.121772', 'step': 26123, 'epoch': 3}
{'type': 'loss', 'content': 0.011210691183805466, 'timestamp': '2025-10-02 00:57:40.131725', 'step': 26124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:40.187045', 'step': 26124, 'epoch': 3}
{'type': 'loss', 'content': 0.06564076244831085, 'timestamp': '2025-10-02 00:57:40.189525', 'step': 26125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:40.244288', 'step': 26125, 'epoch': 3}
{'type': 'loss', 'content': 0.06151598319411278, 'timestamp': '2025-10-02 00:57:40.246611', 'step': 26126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:40.308623', 'step': 26126, 'epoch': 3}
{'type': 'loss', 'content': 0.04107658192515373, 'timestamp': '2025-10-02 00:57:40.319061', 'step': 26127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:40.375501', 'step': 26127, 'epoch': 3}
{'type': 'loss', 'content': 0.05279965698719025, 'timestamp': '2025-10-02 00:57:40.381832', 'step': 26128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:40.435927', 'step': 26128, 'epoch': 3}
{'type': 'loss', 'content': 0.07191026210784912, 'timestamp': '2025-10-02 00:57:40.438595', 'step': 26129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:40.493063', 'step': 26129, 'epoch': 3}
{'type': 'loss', 'content': 0.11162453144788742, 'timestamp': '2025-10-02 00:57:40.495818', 'step': 26130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:40.550071', 'step': 26130, 'epoch': 3}
{'type': 'loss', 'content': 0.16618399322032928, 'timestamp': '2025-10-02 00:57:40.552776', 'step': 26131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:40.614380', 'step': 26131, 'epoch': 3}
{'type': 'loss', 'content': 0.03500764071941376, 'timestamp': '2025-10-02 00:57:40.625611', 'step': 26132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:40.680066', 'step': 26132, 'epoch': 3}
{'type': 'loss', 'content': 0.030555076897144318, 'timestamp': '2025-10-02 00:57:40.682485', 'step': 26133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:40.738179', 'step': 26133, 'epoch': 3}
{'type': 'loss', 'content': 0.07493368536233902, 'timestamp': '2025-10-02 00:57:40.747711', 'step': 26134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:40.804132', 'step': 26134, 'epoch': 3}
{'type': 'loss', 'content': 0.055069610476493835, 'timestamp': '2025-10-02 00:57:40.813630', 'step': 26135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:40.870941', 'step': 26135, 'epoch': 3}
{'type': 'loss', 'content': 0.019202042371034622, 'timestamp': '2025-10-02 00:57:40.876755', 'step': 26136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:40.937226', 'step': 26136, 'epoch': 3}
{'type': 'loss', 'content': 0.0279522854834795, 'timestamp': '2025-10-02 00:57:40.948548', 'step': 26137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:41.007535', 'step': 26137, 'epoch': 3}
{'type': 'loss', 'content': 0.04547692835330963, 'timestamp': '2025-10-02 00:57:41.017727', 'step': 26138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:41.072830', 'step': 26138, 'epoch': 3}
{'type': 'loss', 'content': 0.10769245028495789, 'timestamp': '2025-10-02 00:57:41.075061', 'step': 26139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:41.128651', 'step': 26139, 'epoch': 3}
{'type': 'loss', 'content': 0.08889034390449524, 'timestamp': '2025-10-02 00:57:41.135172', 'step': 26140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:41.190481', 'step': 26140, 'epoch': 3}
{'type': 'loss', 'content': 0.07385153323411942, 'timestamp': '2025-10-02 00:57:41.200058', 'step': 26141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:41.254804', 'step': 26141, 'epoch': 3}
{'type': 'loss', 'content': 0.07461057603359222, 'timestamp': '2025-10-02 00:57:41.257667', 'step': 26142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:41.314508', 'step': 26142, 'epoch': 3}
{'type': 'loss', 'content': 0.03276779502630234, 'timestamp': '2025-10-02 00:57:41.321790', 'step': 26143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:41.376812', 'step': 26143, 'epoch': 3}
{'type': 'loss', 'content': 0.1261773556470871, 'timestamp': '2025-10-02 00:57:41.393483', 'step': 26144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:41.448192', 'step': 26144, 'epoch': 3}
{'type': 'loss', 'content': 0.0592665895819664, 'timestamp': '2025-10-02 00:57:41.450658', 'step': 26145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:41.512920', 'step': 26145, 'epoch': 3}
{'type': 'loss', 'content': 0.04281337559223175, 'timestamp': '2025-10-02 00:57:41.523390', 'step': 26146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:41.580500', 'step': 26146, 'epoch': 3}
{'type': 'loss', 'content': 0.06674829870462418, 'timestamp': '2025-10-02 00:57:41.583519', 'step': 26147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:41.643278', 'step': 26147, 'epoch': 3}
{'type': 'loss', 'content': 0.09370734542608261, 'timestamp': '2025-10-02 00:57:41.649400', 'step': 26148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:41.705143', 'step': 26148, 'epoch': 3}
{'type': 'loss', 'content': 0.014170852489769459, 'timestamp': '2025-10-02 00:57:41.710075', 'step': 26149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:41.775656', 'step': 26149, 'epoch': 3}
{'type': 'loss', 'content': 0.052470218390226364, 'timestamp': '2025-10-02 00:57:41.786082', 'step': 26150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:41.844636', 'step': 26150, 'epoch': 3}
{'type': 'loss', 'content': 0.025581827387213707, 'timestamp': '2025-10-02 00:57:41.847969', 'step': 26151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:41.905920', 'step': 26151, 'epoch': 3}
{'type': 'loss', 'content': 0.012045632116496563, 'timestamp': '2025-10-02 00:57:41.911927', 'step': 26152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:41.966955', 'step': 26152, 'epoch': 3}
{'type': 'loss', 'content': 0.1013149619102478, 'timestamp': '2025-10-02 00:57:41.970028', 'step': 26153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:57:42.038691', 'step': 26153, 'epoch': 3}
{'type': 'loss', 'content': 0.017132893204689026, 'timestamp': '2025-10-02 00:57:42.050617', 'step': 26154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:42.108993', 'step': 26154, 'epoch': 3}
{'type': 'loss', 'content': 0.05604260042309761, 'timestamp': '2025-10-02 00:57:42.112869', 'step': 26155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:42.168720', 'step': 26155, 'epoch': 3}
{'type': 'loss', 'content': 0.15955844521522522, 'timestamp': '2025-10-02 00:57:42.175532', 'step': 26156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:42.234110', 'step': 26156, 'epoch': 3}
{'type': 'loss', 'content': 0.088432677090168, 'timestamp': '2025-10-02 00:57:42.245049', 'step': 26157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:42.303289', 'step': 26157, 'epoch': 3}
{'type': 'loss', 'content': 0.10512210428714752, 'timestamp': '2025-10-02 00:57:42.306280', 'step': 26158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:57:42.369982', 'step': 26158, 'epoch': 3}
{'type': 'loss', 'content': 0.014535835944116116, 'timestamp': '2025-10-02 00:57:42.380846', 'step': 26159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:42.437853', 'step': 26159, 'epoch': 3}
{'type': 'loss', 'content': 0.029570570215582848, 'timestamp': '2025-10-02 00:57:42.444041', 'step': 26160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:42.499736', 'step': 26160, 'epoch': 3}
{'type': 'loss', 'content': 0.09761601686477661, 'timestamp': '2025-10-02 00:57:42.502379', 'step': 26161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:42.558662', 'step': 26161, 'epoch': 3}
{'type': 'loss', 'content': 0.03719792515039444, 'timestamp': '2025-10-02 00:57:42.567683', 'step': 26162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:42.625422', 'step': 26162, 'epoch': 3}
{'type': 'loss', 'content': 0.03830907121300697, 'timestamp': '2025-10-02 00:57:42.632581', 'step': 26163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:42.689041', 'step': 26163, 'epoch': 3}
{'type': 'loss', 'content': 0.06641358137130737, 'timestamp': '2025-10-02 00:57:42.695202', 'step': 26164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:42.750638', 'step': 26164, 'epoch': 3}
{'type': 'loss', 'content': 0.08969271183013916, 'timestamp': '2025-10-02 00:57:42.753320', 'step': 26165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:42.810249', 'step': 26165, 'epoch': 3}
{'type': 'loss', 'content': 0.0325581319630146, 'timestamp': '2025-10-02 00:57:42.815963', 'step': 26166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:42.872962', 'step': 26166, 'epoch': 3}
{'type': 'loss', 'content': 0.0221354141831398, 'timestamp': '2025-10-02 00:57:42.880160', 'step': 26167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:42.936360', 'step': 26167, 'epoch': 3}
{'type': 'loss', 'content': 0.02681182324886322, 'timestamp': '2025-10-02 00:57:42.943245', 'step': 26168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:43.000352', 'step': 26168, 'epoch': 3}
{'type': 'loss', 'content': 0.042211394757032394, 'timestamp': '2025-10-02 00:57:43.007532', 'step': 26169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:43.066384', 'step': 26169, 'epoch': 3}
{'type': 'loss', 'content': 0.06604587286710739, 'timestamp': '2025-10-02 00:57:43.068872', 'step': 26170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:43.127255', 'step': 26170, 'epoch': 3}
{'type': 'loss', 'content': 0.06508392095565796, 'timestamp': '2025-10-02 00:57:43.130542', 'step': 26171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:43.186666', 'step': 26171, 'epoch': 3}
{'type': 'loss', 'content': 0.018799614161252975, 'timestamp': '2025-10-02 00:57:43.194720', 'step': 26172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:43.248879', 'step': 26172, 'epoch': 3}
{'type': 'loss', 'content': 0.05049234628677368, 'timestamp': '2025-10-02 00:57:43.251303', 'step': 26173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:43.306028', 'step': 26173, 'epoch': 3}
{'type': 'loss', 'content': 0.17085713148117065, 'timestamp': '2025-10-02 00:57:43.308212', 'step': 26174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:43.364539', 'step': 26174, 'epoch': 3}
{'type': 'loss', 'content': 0.055409062653779984, 'timestamp': '2025-10-02 00:57:43.367312', 'step': 26175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:43.422606', 'step': 26175, 'epoch': 3}
{'type': 'loss', 'content': 0.10932593792676926, 'timestamp': '2025-10-02 00:57:43.428712', 'step': 26176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:43.482983', 'step': 26176, 'epoch': 3}
{'type': 'loss', 'content': 0.020128145813941956, 'timestamp': '2025-10-02 00:57:43.485699', 'step': 26177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:43.541622', 'step': 26177, 'epoch': 3}
{'type': 'loss', 'content': 0.05163231119513512, 'timestamp': '2025-10-02 00:57:43.544052', 'step': 26178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:43.599115', 'step': 26178, 'epoch': 3}
{'type': 'loss', 'content': 0.11400555819272995, 'timestamp': '2025-10-02 00:57:43.601943', 'step': 26179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:43.656938', 'step': 26179, 'epoch': 3}
{'type': 'loss', 'content': 0.03249021992087364, 'timestamp': '2025-10-02 00:57:43.662984', 'step': 26180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:43.720728', 'step': 26180, 'epoch': 3}
{'type': 'loss', 'content': 0.029903706163167953, 'timestamp': '2025-10-02 00:57:43.731695', 'step': 26181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:57:43.803006', 'step': 26181, 'epoch': 3}
{'type': 'loss', 'content': 0.009970811195671558, 'timestamp': '2025-10-02 00:57:43.815653', 'step': 26182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:43.875183', 'step': 26182, 'epoch': 3}
{'type': 'loss', 'content': 0.05553373694419861, 'timestamp': '2025-10-02 00:57:43.877376', 'step': 26183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:43.934269', 'step': 26183, 'epoch': 3}
{'type': 'loss', 'content': 0.007126692682504654, 'timestamp': '2025-10-02 00:57:43.942233', 'step': 26184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:43.995740', 'step': 26184, 'epoch': 3}
{'type': 'loss', 'content': 0.08122187852859497, 'timestamp': '2025-10-02 00:57:43.998220', 'step': 26185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:44.051908', 'step': 26185, 'epoch': 3}
{'type': 'loss', 'content': 0.12920811772346497, 'timestamp': '2025-10-02 00:57:44.054555', 'step': 26186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:44.109174', 'step': 26186, 'epoch': 3}
{'type': 'loss', 'content': 0.05038662627339363, 'timestamp': '2025-10-02 00:57:44.114811', 'step': 26187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:44.169433', 'step': 26187, 'epoch': 3}
{'type': 'loss', 'content': 0.01664194092154503, 'timestamp': '2025-10-02 00:57:44.175306', 'step': 26188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:44.228980', 'step': 26188, 'epoch': 3}
{'type': 'loss', 'content': 0.016437338665127754, 'timestamp': '2025-10-02 00:57:44.231322', 'step': 26189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:44.285098', 'step': 26189, 'epoch': 3}
{'type': 'loss', 'content': 0.018250882625579834, 'timestamp': '2025-10-02 00:57:44.287749', 'step': 26190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:44.342775', 'step': 26190, 'epoch': 3}
{'type': 'loss', 'content': 0.0342690572142601, 'timestamp': '2025-10-02 00:57:44.348342', 'step': 26191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:44.406804', 'step': 26191, 'epoch': 3}
{'type': 'loss', 'content': 0.03573885187506676, 'timestamp': '2025-10-02 00:57:44.417074', 'step': 26192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:44.472152', 'step': 26192, 'epoch': 3}
{'type': 'loss', 'content': 0.062427815049886703, 'timestamp': '2025-10-02 00:57:44.474717', 'step': 26193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:44.531408', 'step': 26193, 'epoch': 3}
{'type': 'loss', 'content': 0.010621370747685432, 'timestamp': '2025-10-02 00:57:44.540935', 'step': 26194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:44.596926', 'step': 26194, 'epoch': 3}
{'type': 'loss', 'content': 0.028092334046959877, 'timestamp': '2025-10-02 00:57:44.599367', 'step': 26195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:44.654745', 'step': 26195, 'epoch': 3}
{'type': 'loss', 'content': 0.0054992143996059895, 'timestamp': '2025-10-02 00:57:44.660608', 'step': 26196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:44.715044', 'step': 26196, 'epoch': 3}
{'type': 'loss', 'content': 0.021018534898757935, 'timestamp': '2025-10-02 00:57:44.722456', 'step': 26197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:44.777904', 'step': 26197, 'epoch': 3}
{'type': 'loss', 'content': 0.07527723163366318, 'timestamp': '2025-10-02 00:57:44.780213', 'step': 26198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:44.835852', 'step': 26198, 'epoch': 3}
{'type': 'loss', 'content': 0.04479921609163284, 'timestamp': '2025-10-02 00:57:44.843095', 'step': 26199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:44.898091', 'step': 26199, 'epoch': 3}
{'type': 'loss', 'content': 0.10430886596441269, 'timestamp': '2025-10-02 00:57:44.904396', 'step': 26200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:44.959412', 'step': 26200, 'epoch': 3}
{'type': 'loss', 'content': 0.0026163470465689898, 'timestamp': '2025-10-02 00:57:44.966637', 'step': 26201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:45.020958', 'step': 26201, 'epoch': 3}
{'type': 'loss', 'content': 0.054396722465753555, 'timestamp': '2025-10-02 00:57:45.023141', 'step': 26202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:45.077676', 'step': 26202, 'epoch': 3}
{'type': 'loss', 'content': 0.005165780894458294, 'timestamp': '2025-10-02 00:57:45.080209', 'step': 26203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:45.135796', 'step': 26203, 'epoch': 3}
{'type': 'loss', 'content': 0.04500367119908333, 'timestamp': '2025-10-02 00:57:45.141953', 'step': 26204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:45.196196', 'step': 26204, 'epoch': 3}
{'type': 'loss', 'content': 0.04552349075675011, 'timestamp': '2025-10-02 00:57:45.198595', 'step': 26205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:45.256300', 'step': 26205, 'epoch': 3}
{'type': 'loss', 'content': 0.027280785143375397, 'timestamp': '2025-10-02 00:57:45.262035', 'step': 26206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:45.317524', 'step': 26206, 'epoch': 3}
{'type': 'loss', 'content': 0.05649387836456299, 'timestamp': '2025-10-02 00:57:45.322941', 'step': 26207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:45.378354', 'step': 26207, 'epoch': 3}
{'type': 'loss', 'content': 0.035765837877988815, 'timestamp': '2025-10-02 00:57:45.386316', 'step': 26208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:45.448939', 'step': 26208, 'epoch': 3}
{'type': 'loss', 'content': 0.03859824687242508, 'timestamp': '2025-10-02 00:57:45.460275', 'step': 26209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:45.516225', 'step': 26209, 'epoch': 3}
{'type': 'loss', 'content': 0.057909972965717316, 'timestamp': '2025-10-02 00:57:45.518432', 'step': 26210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:45.573802', 'step': 26210, 'epoch': 3}
{'type': 'loss', 'content': 0.06407546997070312, 'timestamp': '2025-10-02 00:57:45.583288', 'step': 26211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:57:45.652956', 'step': 26211, 'epoch': 3}
{'type': 'loss', 'content': 0.017244908958673477, 'timestamp': '2025-10-02 00:57:45.666103', 'step': 26212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:45.729417', 'step': 26212, 'epoch': 3}
{'type': 'loss', 'content': 0.044070158153772354, 'timestamp': '2025-10-02 00:57:45.740733', 'step': 26213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:45.797526', 'step': 26213, 'epoch': 3}
{'type': 'loss', 'content': 0.040008142590522766, 'timestamp': '2025-10-02 00:57:45.800118', 'step': 26214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:45.855822', 'step': 26214, 'epoch': 3}
{'type': 'loss', 'content': 0.037083834409713745, 'timestamp': '2025-10-02 00:57:45.858442', 'step': 26215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:45.914386', 'step': 26215, 'epoch': 3}
{'type': 'loss', 'content': 0.02494366653263569, 'timestamp': '2025-10-02 00:57:45.924694', 'step': 26216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:45.986124', 'step': 26216, 'epoch': 3}
{'type': 'loss', 'content': 0.003139705862849951, 'timestamp': '2025-10-02 00:57:45.997630', 'step': 26217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:46.051875', 'step': 26217, 'epoch': 3}
{'type': 'loss', 'content': 0.07525492459535599, 'timestamp': '2025-10-02 00:57:46.055656', 'step': 26218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:46.110445', 'step': 26218, 'epoch': 3}
{'type': 'loss', 'content': 0.15097171068191528, 'timestamp': '2025-10-02 00:57:46.113246', 'step': 26219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:46.168656', 'step': 26219, 'epoch': 3}
{'type': 'loss', 'content': 0.014146441593766212, 'timestamp': '2025-10-02 00:57:46.175050', 'step': 26220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:46.230081', 'step': 26220, 'epoch': 3}
{'type': 'loss', 'content': 0.08398111164569855, 'timestamp': '2025-10-02 00:57:46.232629', 'step': 26221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:46.287793', 'step': 26221, 'epoch': 3}
{'type': 'loss', 'content': 0.04619273915886879, 'timestamp': '2025-10-02 00:57:46.290425', 'step': 26222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:46.350962', 'step': 26222, 'epoch': 3}
{'type': 'loss', 'content': 0.0020795667078346014, 'timestamp': '2025-10-02 00:57:46.360207', 'step': 26223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:46.414807', 'step': 26223, 'epoch': 3}
{'type': 'loss', 'content': 0.04678119346499443, 'timestamp': '2025-10-02 00:57:46.423212', 'step': 26224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:46.477782', 'step': 26224, 'epoch': 3}
{'type': 'loss', 'content': 0.09473126381635666, 'timestamp': '2025-10-02 00:57:46.485151', 'step': 26225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:46.540082', 'step': 26225, 'epoch': 3}
{'type': 'loss', 'content': 0.048399873077869415, 'timestamp': '2025-10-02 00:57:46.542988', 'step': 26226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:46.598239', 'step': 26226, 'epoch': 3}
{'type': 'loss', 'content': 0.032210350036621094, 'timestamp': '2025-10-02 00:57:46.600650', 'step': 26227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:46.654766', 'step': 26227, 'epoch': 3}
{'type': 'loss', 'content': 0.11473306268453598, 'timestamp': '2025-10-02 00:57:46.662529', 'step': 26228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:46.724515', 'step': 26228, 'epoch': 3}
{'type': 'loss', 'content': 0.051016949117183685, 'timestamp': '2025-10-02 00:57:46.736078', 'step': 26229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:46.791227', 'step': 26229, 'epoch': 3}
{'type': 'loss', 'content': 0.04253659024834633, 'timestamp': '2025-10-02 00:57:46.793989', 'step': 26230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:46.849117', 'step': 26230, 'epoch': 3}
{'type': 'loss', 'content': 0.016773691400885582, 'timestamp': '2025-10-02 00:57:46.851743', 'step': 26231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:46.906600', 'step': 26231, 'epoch': 3}
{'type': 'loss', 'content': 0.05341833084821701, 'timestamp': '2025-10-02 00:57:46.912798', 'step': 26232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:46.966817', 'step': 26232, 'epoch': 3}
{'type': 'loss', 'content': 0.10186612606048584, 'timestamp': '2025-10-02 00:57:46.970771', 'step': 26233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:47.027312', 'step': 26233, 'epoch': 3}
{'type': 'loss', 'content': 0.03680083528161049, 'timestamp': '2025-10-02 00:57:47.034696', 'step': 26234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:47.089241', 'step': 26234, 'epoch': 3}
{'type': 'loss', 'content': 0.07415977865457535, 'timestamp': '2025-10-02 00:57:47.092405', 'step': 26235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:47.152784', 'step': 26235, 'epoch': 3}
{'type': 'loss', 'content': 0.0011787834810093045, 'timestamp': '2025-10-02 00:57:47.163744', 'step': 26236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:47.218324', 'step': 26236, 'epoch': 3}
{'type': 'loss', 'content': 0.03751093149185181, 'timestamp': '2025-10-02 00:57:47.220853', 'step': 26237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:47.274659', 'step': 26237, 'epoch': 3}
{'type': 'loss', 'content': 0.08015340566635132, 'timestamp': '2025-10-02 00:57:47.277456', 'step': 26238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:47.332116', 'step': 26238, 'epoch': 3}
{'type': 'loss', 'content': 0.05511230230331421, 'timestamp': '2025-10-02 00:57:47.334705', 'step': 26239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:47.389507', 'step': 26239, 'epoch': 3}
{'type': 'loss', 'content': 0.04118906706571579, 'timestamp': '2025-10-02 00:57:47.399385', 'step': 26240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:47.454432', 'step': 26240, 'epoch': 3}
{'type': 'loss', 'content': 0.07780484110116959, 'timestamp': '2025-10-02 00:57:47.456751', 'step': 26241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:47.510866', 'step': 26241, 'epoch': 3}
{'type': 'loss', 'content': 0.04160400480031967, 'timestamp': '2025-10-02 00:57:47.513501', 'step': 26242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:47.569162', 'step': 26242, 'epoch': 3}
{'type': 'loss', 'content': 0.0535237155854702, 'timestamp': '2025-10-02 00:57:47.572464', 'step': 26243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:47.628185', 'step': 26243, 'epoch': 3}
{'type': 'loss', 'content': 0.03858084976673126, 'timestamp': '2025-10-02 00:57:47.634742', 'step': 26244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:47.688840', 'step': 26244, 'epoch': 3}
{'type': 'loss', 'content': 0.03419093042612076, 'timestamp': '2025-10-02 00:57:47.691581', 'step': 26245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:47.752504', 'step': 26245, 'epoch': 3}
{'type': 'loss', 'content': 0.03223850950598717, 'timestamp': '2025-10-02 00:57:47.762662', 'step': 26246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:47.816930', 'step': 26246, 'epoch': 3}
{'type': 'loss', 'content': 0.04505554214119911, 'timestamp': '2025-10-02 00:57:47.819719', 'step': 26247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:47.875196', 'step': 26247, 'epoch': 3}
{'type': 'loss', 'content': 0.02426423691213131, 'timestamp': '2025-10-02 00:57:47.883233', 'step': 26248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:47.938188', 'step': 26248, 'epoch': 3}
{'type': 'loss', 'content': 0.03235422447323799, 'timestamp': '2025-10-02 00:57:47.940694', 'step': 26249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:47.999676', 'step': 26249, 'epoch': 3}
{'type': 'loss', 'content': 0.014273970387876034, 'timestamp': '2025-10-02 00:57:48.009860', 'step': 26250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:48.064748', 'step': 26250, 'epoch': 3}
{'type': 'loss', 'content': 0.10177960246801376, 'timestamp': '2025-10-02 00:57:48.067613', 'step': 26251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:48.125607', 'step': 26251, 'epoch': 3}
{'type': 'loss', 'content': 0.06276974081993103, 'timestamp': '2025-10-02 00:57:48.131945', 'step': 26252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:48.187060', 'step': 26252, 'epoch': 3}
{'type': 'loss', 'content': 0.03169736638665199, 'timestamp': '2025-10-02 00:57:48.194220', 'step': 26253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:48.251204', 'step': 26253, 'epoch': 3}
{'type': 'loss', 'content': 0.0015476791886612773, 'timestamp': '2025-10-02 00:57:48.256735', 'step': 26254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:48.312370', 'step': 26254, 'epoch': 3}
{'type': 'loss', 'content': 0.10577064752578735, 'timestamp': '2025-10-02 00:57:48.314637', 'step': 26255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:48.370097', 'step': 26255, 'epoch': 3}
{'type': 'loss', 'content': 0.0477803535759449, 'timestamp': '2025-10-02 00:57:48.380397', 'step': 26256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:48.435838', 'step': 26256, 'epoch': 3}
{'type': 'loss', 'content': 0.04727751389145851, 'timestamp': '2025-10-02 00:57:48.438781', 'step': 26257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:48.493915', 'step': 26257, 'epoch': 3}
{'type': 'loss', 'content': 0.13074098527431488, 'timestamp': '2025-10-02 00:57:48.496546', 'step': 26258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:48.551753', 'step': 26258, 'epoch': 3}
{'type': 'loss', 'content': 0.09360455721616745, 'timestamp': '2025-10-02 00:57:48.554126', 'step': 26259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:48.608663', 'step': 26259, 'epoch': 3}
{'type': 'loss', 'content': 0.08308513462543488, 'timestamp': '2025-10-02 00:57:48.614868', 'step': 26260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:48.669363', 'step': 26260, 'epoch': 3}
{'type': 'loss', 'content': 0.049237024039030075, 'timestamp': '2025-10-02 00:57:48.671826', 'step': 26261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:48.726501', 'step': 26261, 'epoch': 3}
{'type': 'loss', 'content': 0.006690874230116606, 'timestamp': '2025-10-02 00:57:48.733758', 'step': 26262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:48.789404', 'step': 26262, 'epoch': 3}
{'type': 'loss', 'content': 0.013157702051103115, 'timestamp': '2025-10-02 00:57:48.796665', 'step': 26263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:57:48.868547', 'step': 26263, 'epoch': 3}
{'type': 'loss', 'content': 0.0012861330760642886, 'timestamp': '2025-10-02 00:57:48.881806', 'step': 26264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:48.936770', 'step': 26264, 'epoch': 3}
{'type': 'loss', 'content': 0.04595097526907921, 'timestamp': '2025-10-02 00:57:48.939351', 'step': 26265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:48.993443', 'step': 26265, 'epoch': 3}
{'type': 'loss', 'content': 0.03611529618501663, 'timestamp': '2025-10-02 00:57:48.996207', 'step': 26266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:57:49.074308', 'step': 26266, 'epoch': 3}
{'type': 'loss', 'content': 0.027780670672655106, 'timestamp': '2025-10-02 00:57:49.088117', 'step': 26267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:49.144060', 'step': 26267, 'epoch': 3}
{'type': 'loss', 'content': 0.017965184524655342, 'timestamp': '2025-10-02 00:57:49.153806', 'step': 26268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:49.208277', 'step': 26268, 'epoch': 3}
{'type': 'loss', 'content': 0.030925512313842773, 'timestamp': '2025-10-02 00:57:49.215526', 'step': 26269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:49.275652', 'step': 26269, 'epoch': 3}
{'type': 'loss', 'content': 0.015316682867705822, 'timestamp': '2025-10-02 00:57:49.285774', 'step': 26270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:49.341208', 'step': 26270, 'epoch': 3}
{'type': 'loss', 'content': 0.010994800366461277, 'timestamp': '2025-10-02 00:57:49.348291', 'step': 26271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:49.403373', 'step': 26271, 'epoch': 3}
{'type': 'loss', 'content': 0.07453330606222153, 'timestamp': '2025-10-02 00:57:49.409388', 'step': 26272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:49.463701', 'step': 26272, 'epoch': 3}
{'type': 'loss', 'content': 0.01690417155623436, 'timestamp': '2025-10-02 00:57:49.472868', 'step': 26273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:49.528598', 'step': 26273, 'epoch': 3}
{'type': 'loss', 'content': 0.06803467869758606, 'timestamp': '2025-10-02 00:57:49.531091', 'step': 26274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:57:49.605715', 'step': 26274, 'epoch': 3}
{'type': 'loss', 'content': 0.019445810467004776, 'timestamp': '2025-10-02 00:57:49.619143', 'step': 26275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:49.674495', 'step': 26275, 'epoch': 3}
{'type': 'loss', 'content': 0.10039927065372467, 'timestamp': '2025-10-02 00:57:49.681088', 'step': 26276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:57:49.748762', 'step': 26276, 'epoch': 3}
{'type': 'loss', 'content': 0.050214722752571106, 'timestamp': '2025-10-02 00:57:49.762118', 'step': 26277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:49.817993', 'step': 26277, 'epoch': 3}
{'type': 'loss', 'content': 0.036947332322597504, 'timestamp': '2025-10-02 00:57:49.827522', 'step': 26278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:49.882903', 'step': 26278, 'epoch': 3}
{'type': 'loss', 'content': 0.03477633744478226, 'timestamp': '2025-10-02 00:57:49.885659', 'step': 26279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:49.941161', 'step': 26279, 'epoch': 3}
{'type': 'loss', 'content': 0.03258151561021805, 'timestamp': '2025-10-02 00:57:49.949234', 'step': 26280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:50.002794', 'step': 26280, 'epoch': 3}
{'type': 'loss', 'content': 0.08037257939577103, 'timestamp': '2025-10-02 00:57:50.005247', 'step': 26281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:50.060008', 'step': 26281, 'epoch': 3}
{'type': 'loss', 'content': 0.07107574492692947, 'timestamp': '2025-10-02 00:57:50.062356', 'step': 26282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:50.117377', 'step': 26282, 'epoch': 3}
{'type': 'loss', 'content': 0.02655983529984951, 'timestamp': '2025-10-02 00:57:50.124579', 'step': 26283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:50.178729', 'step': 26283, 'epoch': 3}
{'type': 'loss', 'content': 0.05246282368898392, 'timestamp': '2025-10-02 00:57:50.184920', 'step': 26284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:50.240210', 'step': 26284, 'epoch': 3}
{'type': 'loss', 'content': 0.055119436234235764, 'timestamp': '2025-10-02 00:57:50.242884', 'step': 26285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:50.297379', 'step': 26285, 'epoch': 3}
{'type': 'loss', 'content': 0.08194541931152344, 'timestamp': '2025-10-02 00:57:50.299790', 'step': 26286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:50.356148', 'step': 26286, 'epoch': 3}
{'type': 'loss', 'content': 0.05034811049699783, 'timestamp': '2025-10-02 00:57:50.358830', 'step': 26287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:50.413619', 'step': 26287, 'epoch': 3}
{'type': 'loss', 'content': 0.04813386872410774, 'timestamp': '2025-10-02 00:57:50.420162', 'step': 26288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:50.478177', 'step': 26288, 'epoch': 3}
{'type': 'loss', 'content': 0.01785060577094555, 'timestamp': '2025-10-02 00:57:50.489119', 'step': 26289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:50.544322', 'step': 26289, 'epoch': 3}
{'type': 'loss', 'content': 0.010789413005113602, 'timestamp': '2025-10-02 00:57:50.546670', 'step': 26290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:50.602294', 'step': 26290, 'epoch': 3}
{'type': 'loss', 'content': 0.01686006225645542, 'timestamp': '2025-10-02 00:57:50.604880', 'step': 26291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:50.660854', 'step': 26291, 'epoch': 3}
{'type': 'loss', 'content': 0.05746520310640335, 'timestamp': '2025-10-02 00:57:50.668845', 'step': 26292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:50.727046', 'step': 26292, 'epoch': 3}
{'type': 'loss', 'content': 0.06760404258966446, 'timestamp': '2025-10-02 00:57:50.730264', 'step': 26293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:50.786669', 'step': 26293, 'epoch': 3}
{'type': 'loss', 'content': 0.013786256313323975, 'timestamp': '2025-10-02 00:57:50.793934', 'step': 26294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:50.850993', 'step': 26294, 'epoch': 3}
{'type': 'loss', 'content': 0.08169801533222198, 'timestamp': '2025-10-02 00:57:50.854543', 'step': 26295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:50.912612', 'step': 26295, 'epoch': 3}
{'type': 'loss', 'content': 0.024968668818473816, 'timestamp': '2025-10-02 00:57:50.920599', 'step': 26296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:50.978327', 'step': 26296, 'epoch': 3}
{'type': 'loss', 'content': 0.07689673453569412, 'timestamp': '2025-10-02 00:57:50.981427', 'step': 26297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:51.038388', 'step': 26297, 'epoch': 3}
{'type': 'loss', 'content': 0.06422077864408493, 'timestamp': '2025-10-02 00:57:51.041463', 'step': 26298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:51.099660', 'step': 26298, 'epoch': 3}
{'type': 'loss', 'content': 0.03443676233291626, 'timestamp': '2025-10-02 00:57:51.103132', 'step': 26299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:51.160054', 'step': 26299, 'epoch': 3}
{'type': 'loss', 'content': 0.017176391556859016, 'timestamp': '2025-10-02 00:57:51.168027', 'step': 26300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:51.224160', 'step': 26300, 'epoch': 3}
{'type': 'loss', 'content': 0.044489409774541855, 'timestamp': '2025-10-02 00:57:51.227562', 'step': 26301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:51.284507', 'step': 26301, 'epoch': 3}
{'type': 'loss', 'content': 0.07269006222486496, 'timestamp': '2025-10-02 00:57:51.287758', 'step': 26302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:51.346700', 'step': 26302, 'epoch': 3}
{'type': 'loss', 'content': 0.040982138365507126, 'timestamp': '2025-10-02 00:57:51.356256', 'step': 26303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:51.413665', 'step': 26303, 'epoch': 3}
{'type': 'loss', 'content': 0.012899437919259071, 'timestamp': '2025-10-02 00:57:51.420162', 'step': 26304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:51.475823', 'step': 26304, 'epoch': 3}
{'type': 'loss', 'content': 0.04108047112822533, 'timestamp': '2025-10-02 00:57:51.479087', 'step': 26305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:51.536365', 'step': 26305, 'epoch': 3}
{'type': 'loss', 'content': 0.05871553346514702, 'timestamp': '2025-10-02 00:57:51.539630', 'step': 26306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:51.596128', 'step': 26306, 'epoch': 3}
{'type': 'loss', 'content': 0.1292528659105301, 'timestamp': '2025-10-02 00:57:51.599875', 'step': 26307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:51.659445', 'step': 26307, 'epoch': 3}
{'type': 'loss', 'content': 0.024969207122921944, 'timestamp': '2025-10-02 00:57:51.667459', 'step': 26308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:51.726189', 'step': 26308, 'epoch': 3}
{'type': 'loss', 'content': 0.05722487345337868, 'timestamp': '2025-10-02 00:57:51.730333', 'step': 26309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:51.788038', 'step': 26309, 'epoch': 3}
{'type': 'loss', 'content': 0.02064092643558979, 'timestamp': '2025-10-02 00:57:51.791236', 'step': 26310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:51.848573', 'step': 26310, 'epoch': 3}
{'type': 'loss', 'content': 0.029388029128313065, 'timestamp': '2025-10-02 00:57:51.850978', 'step': 26311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:51.907349', 'step': 26311, 'epoch': 3}
{'type': 'loss', 'content': 0.04830767214298248, 'timestamp': '2025-10-02 00:57:51.914261', 'step': 26312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:51.970195', 'step': 26312, 'epoch': 3}
{'type': 'loss', 'content': 0.007940235547721386, 'timestamp': '2025-10-02 00:57:51.980474', 'step': 26313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:52.039031', 'step': 26313, 'epoch': 3}
{'type': 'loss', 'content': 0.006341359578073025, 'timestamp': '2025-10-02 00:57:52.042028', 'step': 26314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:52.099426', 'step': 26314, 'epoch': 3}
{'type': 'loss', 'content': 0.030433321371674538, 'timestamp': '2025-10-02 00:57:52.101739', 'step': 26315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:52.158439', 'step': 26315, 'epoch': 3}
{'type': 'loss', 'content': 0.022631438449025154, 'timestamp': '2025-10-02 00:57:52.165201', 'step': 26316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:52.219280', 'step': 26316, 'epoch': 3}
{'type': 'loss', 'content': 0.025822684168815613, 'timestamp': '2025-10-02 00:57:52.221917', 'step': 26317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 11520070000896.0}, 'timestamp': '2025-10-02 00:57:52.308164', 'step': 26317, 'epoch': 3}
{'type': 'loss', 'content': 0.007098262198269367, 'timestamp': '2025-10-02 00:57:52.323473', 'step': 26318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:52.380415', 'step': 26318, 'epoch': 3}
{'type': 'loss', 'content': 0.05991099402308464, 'timestamp': '2025-10-02 00:57:52.389348', 'step': 26319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:52.452981', 'step': 26319, 'epoch': 3}
{'type': 'loss', 'content': 0.007477326784282923, 'timestamp': '2025-10-02 00:57:52.464412', 'step': 26320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:52.519161', 'step': 26320, 'epoch': 3}
{'type': 'loss', 'content': 0.0016077477484941483, 'timestamp': '2025-10-02 00:57:52.521849', 'step': 26321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:57:52.593240', 'step': 26321, 'epoch': 3}
{'type': 'loss', 'content': 0.011275704950094223, 'timestamp': '2025-10-02 00:57:52.605540', 'step': 26322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:52.661152', 'step': 26322, 'epoch': 3}
{'type': 'loss', 'content': 0.026885153725743294, 'timestamp': '2025-10-02 00:57:52.663409', 'step': 26323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:52.719418', 'step': 26323, 'epoch': 3}
{'type': 'loss', 'content': 0.010149864479899406, 'timestamp': '2025-10-02 00:57:52.727432', 'step': 26324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:52.781988', 'step': 26324, 'epoch': 3}
{'type': 'loss', 'content': 0.05168820917606354, 'timestamp': '2025-10-02 00:57:52.784362', 'step': 26325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:52.840081', 'step': 26325, 'epoch': 3}
{'type': 'loss', 'content': 0.048821792006492615, 'timestamp': '2025-10-02 00:57:52.849613', 'step': 26326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:52.905016', 'step': 26326, 'epoch': 3}
{'type': 'loss', 'content': 0.0472174733877182, 'timestamp': '2025-10-02 00:57:52.914547', 'step': 26327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:52.969803', 'step': 26327, 'epoch': 3}
{'type': 'loss', 'content': 0.13088801503181458, 'timestamp': '2025-10-02 00:57:52.975950', 'step': 26328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:53.030851', 'step': 26328, 'epoch': 3}
{'type': 'loss', 'content': 0.1362789273262024, 'timestamp': '2025-10-02 00:57:53.033286', 'step': 26329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:53.087359', 'step': 26329, 'epoch': 3}
{'type': 'loss', 'content': 0.07254499942064285, 'timestamp': '2025-10-02 00:57:53.089631', 'step': 26330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:53.144641', 'step': 26330, 'epoch': 3}
{'type': 'loss', 'content': 0.029644768685102463, 'timestamp': '2025-10-02 00:57:53.147228', 'step': 26331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:53.201806', 'step': 26331, 'epoch': 3}
{'type': 'loss', 'content': 0.03566456586122513, 'timestamp': '2025-10-02 00:57:53.207550', 'step': 26332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:53.261610', 'step': 26332, 'epoch': 3}
{'type': 'loss', 'content': 0.055142421275377274, 'timestamp': '2025-10-02 00:57:53.264117', 'step': 26333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:53.319058', 'step': 26333, 'epoch': 3}
{'type': 'loss', 'content': 0.049641989171504974, 'timestamp': '2025-10-02 00:57:53.321670', 'step': 26334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:53.376345', 'step': 26334, 'epoch': 3}
{'type': 'loss', 'content': 0.0877680629491806, 'timestamp': '2025-10-02 00:57:53.378841', 'step': 26335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:53.433307', 'step': 26335, 'epoch': 3}
{'type': 'loss', 'content': 0.0914965569972992, 'timestamp': '2025-10-02 00:57:53.439189', 'step': 26336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:53.494103', 'step': 26336, 'epoch': 3}
{'type': 'loss', 'content': 0.010833649896085262, 'timestamp': '2025-10-02 00:57:53.496807', 'step': 26337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:53.551773', 'step': 26337, 'epoch': 3}
{'type': 'loss', 'content': 0.040699198842048645, 'timestamp': '2025-10-02 00:57:53.554479', 'step': 26338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:53.609884', 'step': 26338, 'epoch': 3}
{'type': 'loss', 'content': 0.037422072142362595, 'timestamp': '2025-10-02 00:57:53.615750', 'step': 26339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:53.673508', 'step': 26339, 'epoch': 3}
{'type': 'loss', 'content': 0.00973605178296566, 'timestamp': '2025-10-02 00:57:53.679539', 'step': 26340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:53.737639', 'step': 26340, 'epoch': 3}
{'type': 'loss', 'content': 0.03320617973804474, 'timestamp': '2025-10-02 00:57:53.748557', 'step': 26341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:53.805405', 'step': 26341, 'epoch': 3}
{'type': 'loss', 'content': 0.021490680053830147, 'timestamp': '2025-10-02 00:57:53.810834', 'step': 26342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:53.871964', 'step': 26342, 'epoch': 3}
{'type': 'loss', 'content': 0.03356489539146423, 'timestamp': '2025-10-02 00:57:53.877389', 'step': 26343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:53.932129', 'step': 26343, 'epoch': 3}
{'type': 'loss', 'content': 0.025947120040655136, 'timestamp': '2025-10-02 00:57:53.942140', 'step': 26344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:53.997322', 'step': 26344, 'epoch': 3}
{'type': 'loss', 'content': 0.05470604449510574, 'timestamp': '2025-10-02 00:57:54.000061', 'step': 26345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:54.054927', 'step': 26345, 'epoch': 3}
{'type': 'loss', 'content': 0.018186505883932114, 'timestamp': '2025-10-02 00:57:54.057399', 'step': 26346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:54.112353', 'step': 26346, 'epoch': 3}
{'type': 'loss', 'content': 0.10523105412721634, 'timestamp': '2025-10-02 00:57:54.114792', 'step': 26347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:54.175896', 'step': 26347, 'epoch': 3}
{'type': 'loss', 'content': 0.05774092674255371, 'timestamp': '2025-10-02 00:57:54.186884', 'step': 26348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:54.242277', 'step': 26348, 'epoch': 3}
{'type': 'loss', 'content': 0.05619388818740845, 'timestamp': '2025-10-02 00:57:54.246865', 'step': 26349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:54.303210', 'step': 26349, 'epoch': 3}
{'type': 'loss', 'content': 0.06506109982728958, 'timestamp': '2025-10-02 00:57:54.305638', 'step': 26350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:54.360454', 'step': 26350, 'epoch': 3}
{'type': 'loss', 'content': 0.017087671905755997, 'timestamp': '2025-10-02 00:57:54.363350', 'step': 26351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:54.417433', 'step': 26351, 'epoch': 3}
{'type': 'loss', 'content': 0.03158853203058243, 'timestamp': '2025-10-02 00:57:54.423867', 'step': 26352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:54.482162', 'step': 26352, 'epoch': 3}
{'type': 'loss', 'content': 0.02740563452243805, 'timestamp': '2025-10-02 00:57:54.484489', 'step': 26353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:57:54.547857', 'step': 26353, 'epoch': 3}
{'type': 'loss', 'content': 0.02182312309741974, 'timestamp': '2025-10-02 00:57:54.558714', 'step': 26354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:54.613938', 'step': 26354, 'epoch': 3}
{'type': 'loss', 'content': 0.06175052002072334, 'timestamp': '2025-10-02 00:57:54.617129', 'step': 26355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:54.672410', 'step': 26355, 'epoch': 3}
{'type': 'loss', 'content': 0.04568977281451225, 'timestamp': '2025-10-02 00:57:54.682003', 'step': 26356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:54.744188', 'step': 26356, 'epoch': 3}
{'type': 'loss', 'content': 0.0270713921636343, 'timestamp': '2025-10-02 00:57:54.746986', 'step': 26357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:54.802217', 'step': 26357, 'epoch': 3}
{'type': 'loss', 'content': 0.046817127615213394, 'timestamp': '2025-10-02 00:57:54.804617', 'step': 26358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:54.859695', 'step': 26358, 'epoch': 3}
{'type': 'loss', 'content': 0.06888046115636826, 'timestamp': '2025-10-02 00:57:54.862051', 'step': 26359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:57:54.924280', 'step': 26359, 'epoch': 3}
{'type': 'loss', 'content': 0.012536967173218727, 'timestamp': '2025-10-02 00:57:54.935719', 'step': 26360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:54.993052', 'step': 26360, 'epoch': 3}
{'type': 'loss', 'content': 0.05421372875571251, 'timestamp': '2025-10-02 00:57:54.995383', 'step': 26361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:55.053839', 'step': 26361, 'epoch': 3}
{'type': 'loss', 'content': 0.008133498951792717, 'timestamp': '2025-10-02 00:57:55.061107', 'step': 26362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:55.117714', 'step': 26362, 'epoch': 3}
{'type': 'loss', 'content': 0.035031117498874664, 'timestamp': '2025-10-02 00:57:55.120465', 'step': 26363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:55.176833', 'step': 26363, 'epoch': 3}
{'type': 'loss', 'content': 0.023987755179405212, 'timestamp': '2025-10-02 00:57:55.187162', 'step': 26364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:55.241728', 'step': 26364, 'epoch': 3}
{'type': 'loss', 'content': 0.03755534440279007, 'timestamp': '2025-10-02 00:57:55.247257', 'step': 26365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:55.302430', 'step': 26365, 'epoch': 3}
{'type': 'loss', 'content': 0.06533345580101013, 'timestamp': '2025-10-02 00:57:55.304830', 'step': 26366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:55.361099', 'step': 26366, 'epoch': 3}
{'type': 'loss', 'content': 0.015276125632226467, 'timestamp': '2025-10-02 00:57:55.368191', 'step': 26367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:55.426068', 'step': 26367, 'epoch': 3}
{'type': 'loss', 'content': 0.22776471078395844, 'timestamp': '2025-10-02 00:57:55.432069', 'step': 26368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:55.486585', 'step': 26368, 'epoch': 3}
{'type': 'loss', 'content': 0.04439472407102585, 'timestamp': '2025-10-02 00:57:55.495863', 'step': 26369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:55.552498', 'step': 26369, 'epoch': 3}
{'type': 'loss', 'content': 0.0829625129699707, 'timestamp': '2025-10-02 00:57:55.559868', 'step': 26370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:55.615121', 'step': 26370, 'epoch': 3}
{'type': 'loss', 'content': 0.02924646995961666, 'timestamp': '2025-10-02 00:57:55.617521', 'step': 26371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:55.672224', 'step': 26371, 'epoch': 3}
{'type': 'loss', 'content': 0.06047037988901138, 'timestamp': '2025-10-02 00:57:55.679257', 'step': 26372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:55.733130', 'step': 26372, 'epoch': 3}
{'type': 'loss', 'content': 0.005604539066553116, 'timestamp': '2025-10-02 00:57:55.743347', 'step': 26373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:57:55.805367', 'step': 26373, 'epoch': 3}
{'type': 'loss', 'content': 0.04545510932803154, 'timestamp': '2025-10-02 00:57:55.815824', 'step': 26374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:55.871774', 'step': 26374, 'epoch': 3}
{'type': 'loss', 'content': 0.023926254361867905, 'timestamp': '2025-10-02 00:57:55.880954', 'step': 26375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:55.936035', 'step': 26375, 'epoch': 3}
{'type': 'loss', 'content': 0.05056512728333473, 'timestamp': '2025-10-02 00:57:55.942132', 'step': 26376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:55.996525', 'step': 26376, 'epoch': 3}
{'type': 'loss', 'content': 0.057851266115903854, 'timestamp': '2025-10-02 00:57:56.005642', 'step': 26377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:56.067834', 'step': 26377, 'epoch': 3}
{'type': 'loss', 'content': 0.05729934200644493, 'timestamp': '2025-10-02 00:57:56.070371', 'step': 26378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:56.125078', 'step': 26378, 'epoch': 3}
{'type': 'loss', 'content': 0.04943976178765297, 'timestamp': '2025-10-02 00:57:56.127966', 'step': 26379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:56.183945', 'step': 26379, 'epoch': 3}
{'type': 'loss', 'content': 0.10826403647661209, 'timestamp': '2025-10-02 00:57:56.190120', 'step': 26380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:56.244492', 'step': 26380, 'epoch': 3}
{'type': 'loss', 'content': 0.015759354457259178, 'timestamp': '2025-10-02 00:57:56.246668', 'step': 26381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:56.301611', 'step': 26381, 'epoch': 3}
{'type': 'loss', 'content': 0.045657023787498474, 'timestamp': '2025-10-02 00:57:56.308790', 'step': 26382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:56.364819', 'step': 26382, 'epoch': 3}
{'type': 'loss', 'content': 0.05668529123067856, 'timestamp': '2025-10-02 00:57:56.367339', 'step': 26383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:56.422919', 'step': 26383, 'epoch': 3}
{'type': 'loss', 'content': 0.006433060392737389, 'timestamp': '2025-10-02 00:57:56.430839', 'step': 26384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:56.485489', 'step': 26384, 'epoch': 3}
{'type': 'loss', 'content': 0.02869507484138012, 'timestamp': '2025-10-02 00:57:56.488218', 'step': 26385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:56.542888', 'step': 26385, 'epoch': 3}
{'type': 'loss', 'content': 0.08413670212030411, 'timestamp': '2025-10-02 00:57:56.545263', 'step': 26386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:56.599848', 'step': 26386, 'epoch': 3}
{'type': 'loss', 'content': 0.05267637223005295, 'timestamp': '2025-10-02 00:57:56.602263', 'step': 26387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:56.657976', 'step': 26387, 'epoch': 3}
{'type': 'loss', 'content': 0.02273324877023697, 'timestamp': '2025-10-02 00:57:56.664246', 'step': 26388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:57:56.718396', 'step': 26388, 'epoch': 3}
{'type': 'loss', 'content': 0.07436135411262512, 'timestamp': '2025-10-02 00:57:56.721355', 'step': 26389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:56.776169', 'step': 26389, 'epoch': 3}
{'type': 'loss', 'content': 0.051798705011606216, 'timestamp': '2025-10-02 00:57:56.779071', 'step': 26390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:56.834982', 'step': 26390, 'epoch': 3}
{'type': 'loss', 'content': 0.04529096558690071, 'timestamp': '2025-10-02 00:57:56.837494', 'step': 26391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:56.892825', 'step': 26391, 'epoch': 3}
{'type': 'loss', 'content': 0.05866960808634758, 'timestamp': '2025-10-02 00:57:56.898794', 'step': 26392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:56.953543', 'step': 26392, 'epoch': 3}
{'type': 'loss', 'content': 0.00021125988860148937, 'timestamp': '2025-10-02 00:57:56.959310', 'step': 26393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:57.014436', 'step': 26393, 'epoch': 3}
{'type': 'loss', 'content': 0.04757993668317795, 'timestamp': '2025-10-02 00:57:57.017234', 'step': 26394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:57.077037', 'step': 26394, 'epoch': 3}
{'type': 'loss', 'content': 0.030355731025338173, 'timestamp': '2025-10-02 00:57:57.087199', 'step': 26395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:57.146553', 'step': 26395, 'epoch': 3}
{'type': 'loss', 'content': 0.010113786906003952, 'timestamp': '2025-10-02 00:57:57.156449', 'step': 26396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:57.211599', 'step': 26396, 'epoch': 3}
{'type': 'loss', 'content': 0.018953053280711174, 'timestamp': '2025-10-02 00:57:57.213961', 'step': 26397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:57.269195', 'step': 26397, 'epoch': 3}
{'type': 'loss', 'content': 0.1660565286874771, 'timestamp': '2025-10-02 00:57:57.271536', 'step': 26398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:57.330768', 'step': 26398, 'epoch': 3}
{'type': 'loss', 'content': 0.0763140544295311, 'timestamp': '2025-10-02 00:57:57.340954', 'step': 26399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:57.396311', 'step': 26399, 'epoch': 3}
{'type': 'loss', 'content': 0.0166156142950058, 'timestamp': '2025-10-02 00:57:57.404236', 'step': 26400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:57.459150', 'step': 26400, 'epoch': 3}
{'type': 'loss', 'content': 0.03679615631699562, 'timestamp': '2025-10-02 00:57:57.466171', 'step': 26401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:57.521588', 'step': 26401, 'epoch': 3}
{'type': 'loss', 'content': 0.05325577035546303, 'timestamp': '2025-10-02 00:57:57.523847', 'step': 26402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:57.579179', 'step': 26402, 'epoch': 3}
{'type': 'loss', 'content': 0.0343550480902195, 'timestamp': '2025-10-02 00:57:57.581765', 'step': 26403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:57.637706', 'step': 26403, 'epoch': 3}
{'type': 'loss', 'content': 0.05021252855658531, 'timestamp': '2025-10-02 00:57:57.648232', 'step': 26404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:57.709739', 'step': 26404, 'epoch': 3}
{'type': 'loss', 'content': 0.038279931992292404, 'timestamp': '2025-10-02 00:57:57.717743', 'step': 26405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:57.774078', 'step': 26405, 'epoch': 3}
{'type': 'loss', 'content': 0.029422203078866005, 'timestamp': '2025-10-02 00:57:57.777412', 'step': 26406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:57.834090', 'step': 26406, 'epoch': 3}
{'type': 'loss', 'content': 0.15988849103450775, 'timestamp': '2025-10-02 00:57:57.836769', 'step': 26407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:57.891609', 'step': 26407, 'epoch': 3}
{'type': 'loss', 'content': 0.117716483771801, 'timestamp': '2025-10-02 00:57:57.898317', 'step': 26408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:57.954262', 'step': 26408, 'epoch': 3}
{'type': 'loss', 'content': 0.04683169722557068, 'timestamp': '2025-10-02 00:57:57.956489', 'step': 26409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:58.011679', 'step': 26409, 'epoch': 3}
{'type': 'loss', 'content': 0.044599514454603195, 'timestamp': '2025-10-02 00:57:58.014116', 'step': 26410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:58.068879', 'step': 26410, 'epoch': 3}
{'type': 'loss', 'content': 0.04897787421941757, 'timestamp': '2025-10-02 00:57:58.071296', 'step': 26411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:57:58.126671', 'step': 26411, 'epoch': 3}
{'type': 'loss', 'content': 0.039131589233875275, 'timestamp': '2025-10-02 00:57:58.136544', 'step': 26412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:57:58.191311', 'step': 26412, 'epoch': 3}
{'type': 'loss', 'content': 0.07732770591974258, 'timestamp': '2025-10-02 00:57:58.201380', 'step': 26413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:58.257752', 'step': 26413, 'epoch': 3}
{'type': 'loss', 'content': 0.006532489322125912, 'timestamp': '2025-10-02 00:57:58.259953', 'step': 26414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:58.314412', 'step': 26414, 'epoch': 3}
{'type': 'loss', 'content': 0.06514164805412292, 'timestamp': '2025-10-02 00:57:58.317008', 'step': 26415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:58.372766', 'step': 26415, 'epoch': 3}
{'type': 'loss', 'content': 0.05062092840671539, 'timestamp': '2025-10-02 00:57:58.378825', 'step': 26416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:58.433748', 'step': 26416, 'epoch': 3}
{'type': 'loss', 'content': 0.030130671337246895, 'timestamp': '2025-10-02 00:57:58.441515', 'step': 26417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:57:58.496754', 'step': 26417, 'epoch': 3}
{'type': 'loss', 'content': 0.11481982469558716, 'timestamp': '2025-10-02 00:57:58.499044', 'step': 26418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:58.554334', 'step': 26418, 'epoch': 3}
{'type': 'loss', 'content': 0.05465509369969368, 'timestamp': '2025-10-02 00:57:58.556829', 'step': 26419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:57:58.612648', 'step': 26419, 'epoch': 3}
{'type': 'loss', 'content': 0.01813165843486786, 'timestamp': '2025-10-02 00:57:58.620304', 'step': 26420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:58.675853', 'step': 26420, 'epoch': 3}
{'type': 'loss', 'content': 0.07119100540876389, 'timestamp': '2025-10-02 00:57:58.678408', 'step': 26421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:58.739091', 'step': 26421, 'epoch': 3}
{'type': 'loss', 'content': 0.027397973462939262, 'timestamp': '2025-10-02 00:57:58.749222', 'step': 26422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:58.819171', 'step': 26422, 'epoch': 3}
{'type': 'loss', 'content': 0.03949417546391487, 'timestamp': '2025-10-02 00:57:58.821752', 'step': 26423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:58.877199', 'step': 26423, 'epoch': 3}
{'type': 'loss', 'content': 0.1286790668964386, 'timestamp': '2025-10-02 00:57:58.883236', 'step': 26424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:58.938606', 'step': 26424, 'epoch': 3}
{'type': 'loss', 'content': 0.03795747086405754, 'timestamp': '2025-10-02 00:57:58.941230', 'step': 26425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:58.996699', 'step': 26425, 'epoch': 3}
{'type': 'loss', 'content': 0.07232115417718887, 'timestamp': '2025-10-02 00:57:58.999330', 'step': 26426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:59.059752', 'step': 26426, 'epoch': 3}
{'type': 'loss', 'content': 0.0039675273001194, 'timestamp': '2025-10-02 00:57:59.069889', 'step': 26427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:59.124988', 'step': 26427, 'epoch': 3}
{'type': 'loss', 'content': 0.09963076561689377, 'timestamp': '2025-10-02 00:57:59.131413', 'step': 26428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:57:59.186729', 'step': 26428, 'epoch': 3}
{'type': 'loss', 'content': 0.030957600101828575, 'timestamp': '2025-10-02 00:57:59.188930', 'step': 26429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:57:59.244275', 'step': 26429, 'epoch': 3}
{'type': 'loss', 'content': 0.054309941828250885, 'timestamp': '2025-10-02 00:57:59.246638', 'step': 26430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:57:59.302130', 'step': 26430, 'epoch': 3}
{'type': 'loss', 'content': 0.040332943201065063, 'timestamp': '2025-10-02 00:57:59.304356', 'step': 26431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:57:59.359766', 'step': 26431, 'epoch': 3}
{'type': 'loss', 'content': 0.1401943564414978, 'timestamp': '2025-10-02 00:57:59.366028', 'step': 26432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:57:59.421026', 'step': 26432, 'epoch': 3}
{'type': 'loss', 'content': 0.031512726098299026, 'timestamp': '2025-10-02 00:57:59.424371', 'step': 26433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:57:59.481105', 'step': 26433, 'epoch': 3}
{'type': 'loss', 'content': 0.009580319747328758, 'timestamp': '2025-10-02 00:57:59.486504', 'step': 26434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:57:59.542349', 'step': 26434, 'epoch': 3}
{'type': 'loss', 'content': 0.06483439356088638, 'timestamp': '2025-10-02 00:57:59.545257', 'step': 26435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:57:59.615705', 'step': 26435, 'epoch': 3}
{'type': 'loss', 'content': 0.04149491712450981, 'timestamp': '2025-10-02 00:57:59.628871', 'step': 26436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:57:59.684238', 'step': 26436, 'epoch': 3}
{'type': 'loss', 'content': 0.05776125192642212, 'timestamp': '2025-10-02 00:57:59.687318', 'step': 26437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:57:59.747048', 'step': 26437, 'epoch': 3}
{'type': 'loss', 'content': 0.018962394446134567, 'timestamp': '2025-10-02 00:57:59.757208', 'step': 26438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:57:59.816303', 'step': 26438, 'epoch': 3}
{'type': 'loss', 'content': 0.12120825052261353, 'timestamp': '2025-10-02 00:57:59.818861', 'step': 26439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:57:59.876313', 'step': 26439, 'epoch': 3}
{'type': 'loss', 'content': 0.03766117990016937, 'timestamp': '2025-10-02 00:57:59.883144', 'step': 26440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:57:59.941997', 'step': 26440, 'epoch': 3}
{'type': 'loss', 'content': 0.04159344360232353, 'timestamp': '2025-10-02 00:57:59.945115', 'step': 26441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:00.002700', 'step': 26441, 'epoch': 3}
{'type': 'loss', 'content': 0.017974186688661575, 'timestamp': '2025-10-02 00:58:00.005302', 'step': 26442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:00.063645', 'step': 26442, 'epoch': 3}
{'type': 'loss', 'content': 0.06746387481689453, 'timestamp': '2025-10-02 00:58:00.066716', 'step': 26443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:00.124362', 'step': 26443, 'epoch': 3}
{'type': 'loss', 'content': 0.07367882132530212, 'timestamp': '2025-10-02 00:58:00.131858', 'step': 26444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:00.190474', 'step': 26444, 'epoch': 3}
{'type': 'loss', 'content': 0.06748755276203156, 'timestamp': '2025-10-02 00:58:00.192843', 'step': 26445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:00.250418', 'step': 26445, 'epoch': 3}
{'type': 'loss', 'content': 0.06402149796485901, 'timestamp': '2025-10-02 00:58:00.253405', 'step': 26446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:00.310260', 'step': 26446, 'epoch': 3}
{'type': 'loss', 'content': 0.05702650174498558, 'timestamp': '2025-10-02 00:58:00.315404', 'step': 26447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:00.377862', 'step': 26447, 'epoch': 3}
{'type': 'loss', 'content': 0.036156874150037766, 'timestamp': '2025-10-02 00:58:00.388804', 'step': 26448, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:58:28.534035', 'step': 26448, 'epoch': 3}
{'type': 'pplx', 'content': 91.82963976426315, 'timestamp': '2025-10-02 00:58:28.538471', 'step': 26448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:28.597473', 'step': 26448, 'epoch': 3}
{'type': 'loss', 'content': 0.05417127534747124, 'timestamp': '2025-10-02 00:58:28.600556', 'step': 26449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:28.672740', 'step': 26449, 'epoch': 3}
{'type': 'loss', 'content': 0.02108391374349594, 'timestamp': '2025-10-02 00:58:28.680384', 'step': 26450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:28.744717', 'step': 26450, 'epoch': 3}
{'type': 'loss', 'content': 0.020887626335024834, 'timestamp': '2025-10-02 00:58:28.754222', 'step': 26451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:28.817094', 'step': 26451, 'epoch': 3}
{'type': 'loss', 'content': 0.038410089910030365, 'timestamp': '2025-10-02 00:58:28.828630', 'step': 26452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:58:28.885965', 'step': 26452, 'epoch': 3}
{'type': 'loss', 'content': 0.029363533481955528, 'timestamp': '2025-10-02 00:58:28.895775', 'step': 26453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:28.960719', 'step': 26453, 'epoch': 3}
{'type': 'loss', 'content': 0.06374745815992355, 'timestamp': '2025-10-02 00:58:28.964472', 'step': 26454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:29.021916', 'step': 26454, 'epoch': 3}
{'type': 'loss', 'content': 0.09018916636705399, 'timestamp': '2025-10-02 00:58:29.025052', 'step': 26455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:29.090183', 'step': 26455, 'epoch': 3}
{'type': 'loss', 'content': 0.05595831573009491, 'timestamp': '2025-10-02 00:58:29.101659', 'step': 26456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:29.158063', 'step': 26456, 'epoch': 3}
{'type': 'loss', 'content': 0.10541846603155136, 'timestamp': '2025-10-02 00:58:29.160788', 'step': 26457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:29.222184', 'step': 26457, 'epoch': 3}
{'type': 'loss', 'content': 0.03716307133436203, 'timestamp': '2025-10-02 00:58:29.225649', 'step': 26458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:29.286023', 'step': 26458, 'epoch': 3}
{'type': 'loss', 'content': 0.10284274071455002, 'timestamp': '2025-10-02 00:58:29.289698', 'step': 26459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:29.352056', 'step': 26459, 'epoch': 3}
{'type': 'loss', 'content': 0.08309471607208252, 'timestamp': '2025-10-02 00:58:29.362111', 'step': 26460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:29.423767', 'step': 26460, 'epoch': 3}
{'type': 'loss', 'content': 0.031814634799957275, 'timestamp': '2025-10-02 00:58:29.426940', 'step': 26461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:29.493226', 'step': 26461, 'epoch': 3}
{'type': 'loss', 'content': 0.09029604494571686, 'timestamp': '2025-10-02 00:58:29.497137', 'step': 26462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:29.557018', 'step': 26462, 'epoch': 3}
{'type': 'loss', 'content': 0.045683421194553375, 'timestamp': '2025-10-02 00:58:29.562348', 'step': 26463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:29.622188', 'step': 26463, 'epoch': 3}
{'type': 'loss', 'content': 0.0036642691120505333, 'timestamp': '2025-10-02 00:58:29.633543', 'step': 26464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:29.689902', 'step': 26464, 'epoch': 3}
{'type': 'loss', 'content': 0.06297324597835541, 'timestamp': '2025-10-02 00:58:29.693013', 'step': 26465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:29.750146', 'step': 26465, 'epoch': 3}
{'type': 'loss', 'content': 0.04029900208115578, 'timestamp': '2025-10-02 00:58:29.759633', 'step': 26466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:29.817198', 'step': 26466, 'epoch': 3}
{'type': 'loss', 'content': 0.06484188884496689, 'timestamp': '2025-10-02 00:58:29.819764', 'step': 26467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:29.881452', 'step': 26467, 'epoch': 3}
{'type': 'loss', 'content': 0.034267667680978775, 'timestamp': '2025-10-02 00:58:29.891374', 'step': 26468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:29.948104', 'step': 26468, 'epoch': 3}
{'type': 'loss', 'content': 0.07218047976493835, 'timestamp': '2025-10-02 00:58:29.953466', 'step': 26469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:30.012633', 'step': 26469, 'epoch': 3}
{'type': 'loss', 'content': 0.14231082797050476, 'timestamp': '2025-10-02 00:58:30.015753', 'step': 26470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:30.086086', 'step': 26470, 'epoch': 3}
{'type': 'loss', 'content': 0.03966484218835831, 'timestamp': '2025-10-02 00:58:30.096285', 'step': 26471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:30.167360', 'step': 26471, 'epoch': 3}
{'type': 'loss', 'content': 0.03536275774240494, 'timestamp': '2025-10-02 00:58:30.179482', 'step': 26472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:30.244899', 'step': 26472, 'epoch': 3}
{'type': 'loss', 'content': 0.049049150198698044, 'timestamp': '2025-10-02 00:58:30.251308', 'step': 26473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:58:30.328951', 'step': 26473, 'epoch': 3}
{'type': 'loss', 'content': 0.04462675005197525, 'timestamp': '2025-10-02 00:58:30.342149', 'step': 26474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:30.402786', 'step': 26474, 'epoch': 3}
{'type': 'loss', 'content': 0.08797550201416016, 'timestamp': '2025-10-02 00:58:30.406452', 'step': 26475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:30.479881', 'step': 26475, 'epoch': 3}
{'type': 'loss', 'content': 0.07164808362722397, 'timestamp': '2025-10-02 00:58:30.488291', 'step': 26476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:30.554771', 'step': 26476, 'epoch': 3}
{'type': 'loss', 'content': 0.040704622864723206, 'timestamp': '2025-10-02 00:58:30.558116', 'step': 26477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:30.625671', 'step': 26477, 'epoch': 3}
{'type': 'loss', 'content': 0.011265823617577553, 'timestamp': '2025-10-02 00:58:30.630947', 'step': 26478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:30.698663', 'step': 26478, 'epoch': 3}
{'type': 'loss', 'content': 0.08265011012554169, 'timestamp': '2025-10-02 00:58:30.701391', 'step': 26479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:30.765707', 'step': 26479, 'epoch': 3}
{'type': 'loss', 'content': 0.0158856064081192, 'timestamp': '2025-10-02 00:58:30.772266', 'step': 26480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:58:30.847036', 'step': 26480, 'epoch': 3}
{'type': 'loss', 'content': 0.008935067802667618, 'timestamp': '2025-10-02 00:58:30.858351', 'step': 26481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:30.922549', 'step': 26481, 'epoch': 3}
{'type': 'loss', 'content': 0.06991203874349594, 'timestamp': '2025-10-02 00:58:30.925505', 'step': 26482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:30.993746', 'step': 26482, 'epoch': 3}
{'type': 'loss', 'content': 0.053304288536310196, 'timestamp': '2025-10-02 00:58:31.001949', 'step': 26483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:31.067274', 'step': 26483, 'epoch': 3}
{'type': 'loss', 'content': 0.021261490881443024, 'timestamp': '2025-10-02 00:58:31.074121', 'step': 26484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:31.150373', 'step': 26484, 'epoch': 3}
{'type': 'loss', 'content': 0.10240770131349564, 'timestamp': '2025-10-02 00:58:31.153673', 'step': 26485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:31.212428', 'step': 26485, 'epoch': 3}
{'type': 'loss', 'content': 0.010956112295389175, 'timestamp': '2025-10-02 00:58:31.220394', 'step': 26486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:31.279348', 'step': 26486, 'epoch': 3}
{'type': 'loss', 'content': 0.012832671403884888, 'timestamp': '2025-10-02 00:58:31.282228', 'step': 26487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:31.347745', 'step': 26487, 'epoch': 3}
{'type': 'loss', 'content': 0.06866771727800369, 'timestamp': '2025-10-02 00:58:31.355136', 'step': 26488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:31.412405', 'step': 26488, 'epoch': 3}
{'type': 'loss', 'content': 0.026445899158716202, 'timestamp': '2025-10-02 00:58:31.415416', 'step': 26489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:58:31.485107', 'step': 26489, 'epoch': 3}
{'type': 'loss', 'content': 0.02556704729795456, 'timestamp': '2025-10-02 00:58:31.495760', 'step': 26490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:31.553135', 'step': 26490, 'epoch': 3}
{'type': 'loss', 'content': 0.10404994338750839, 'timestamp': '2025-10-02 00:58:31.562873', 'step': 26491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:31.646331', 'step': 26491, 'epoch': 3}
{'type': 'loss', 'content': 0.10456026345491409, 'timestamp': '2025-10-02 00:58:31.654579', 'step': 26492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:31.723037', 'step': 26492, 'epoch': 3}
{'type': 'loss', 'content': 0.0011941920965909958, 'timestamp': '2025-10-02 00:58:31.734003', 'step': 26493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:31.812026', 'step': 26493, 'epoch': 3}
{'type': 'loss', 'content': 0.04445503652095795, 'timestamp': '2025-10-02 00:58:31.821087', 'step': 26494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:31.894427', 'step': 26494, 'epoch': 3}
{'type': 'loss', 'content': 0.016552388668060303, 'timestamp': '2025-10-02 00:58:31.902082', 'step': 26495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:31.966880', 'step': 26495, 'epoch': 3}
{'type': 'loss', 'content': 0.036700811237096786, 'timestamp': '2025-10-02 00:58:31.974519', 'step': 26496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:32.036612', 'step': 26496, 'epoch': 3}
{'type': 'loss', 'content': 0.05655108392238617, 'timestamp': '2025-10-02 00:58:32.044665', 'step': 26497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:58:32.134547', 'step': 26497, 'epoch': 3}
{'type': 'loss', 'content': 0.05214650183916092, 'timestamp': '2025-10-02 00:58:32.147985', 'step': 26498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:32.205149', 'step': 26498, 'epoch': 3}
{'type': 'loss', 'content': 0.07005930691957474, 'timestamp': '2025-10-02 00:58:32.211664', 'step': 26499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:32.270758', 'step': 26499, 'epoch': 3}
{'type': 'loss', 'content': 0.021713862195611, 'timestamp': '2025-10-02 00:58:32.278035', 'step': 26500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 26500', 'timestamp': '2025-10-02 00:58:32.745317', 'step': 26500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:32.814001', 'step': 26500, 'epoch': 3}
{'type': 'loss', 'content': 0.07951782643795013, 'timestamp': '2025-10-02 00:58:32.821817', 'step': 26501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:58:32.902942', 'step': 26501, 'epoch': 3}
{'type': 'loss', 'content': 0.08565790951251984, 'timestamp': '2025-10-02 00:58:32.906527', 'step': 26502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:32.975301', 'step': 26502, 'epoch': 3}
{'type': 'loss', 'content': 0.06392879039049149, 'timestamp': '2025-10-02 00:58:32.984503', 'step': 26503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:33.051768', 'step': 26503, 'epoch': 3}
{'type': 'loss', 'content': 0.06221446767449379, 'timestamp': '2025-10-02 00:58:33.059090', 'step': 26504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:33.132035', 'step': 26504, 'epoch': 3}
{'type': 'loss', 'content': 0.08876089751720428, 'timestamp': '2025-10-02 00:58:33.135419', 'step': 26505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:33.220353', 'step': 26505, 'epoch': 3}
{'type': 'loss', 'content': 0.0006070327362976968, 'timestamp': '2025-10-02 00:58:33.229490', 'step': 26506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:33.309860', 'step': 26506, 'epoch': 3}
{'type': 'loss', 'content': 0.028800906613469124, 'timestamp': '2025-10-02 00:58:33.312851', 'step': 26507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:33.370751', 'step': 26507, 'epoch': 3}
{'type': 'loss', 'content': 0.057202182710170746, 'timestamp': '2025-10-02 00:58:33.381018', 'step': 26508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:33.444731', 'step': 26508, 'epoch': 3}
{'type': 'loss', 'content': 0.040511354804039, 'timestamp': '2025-10-02 00:58:33.453669', 'step': 26509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:33.531448', 'step': 26509, 'epoch': 3}
{'type': 'loss', 'content': 0.03256414085626602, 'timestamp': '2025-10-02 00:58:33.541011', 'step': 26510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:33.601177', 'step': 26510, 'epoch': 3}
{'type': 'loss', 'content': 0.014762092381715775, 'timestamp': '2025-10-02 00:58:33.604416', 'step': 26511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:33.673463', 'step': 26511, 'epoch': 3}
{'type': 'loss', 'content': 0.05650048702955246, 'timestamp': '2025-10-02 00:58:33.686223', 'step': 26512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:33.758763', 'step': 26512, 'epoch': 3}
{'type': 'loss', 'content': 0.02774221822619438, 'timestamp': '2025-10-02 00:58:33.768485', 'step': 26513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:58:33.845680', 'step': 26513, 'epoch': 3}
{'type': 'loss', 'content': 0.03460368514060974, 'timestamp': '2025-10-02 00:58:33.849917', 'step': 26514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:33.910129', 'step': 26514, 'epoch': 3}
{'type': 'loss', 'content': 0.03416519612073898, 'timestamp': '2025-10-02 00:58:33.919016', 'step': 26515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:33.982790', 'step': 26515, 'epoch': 3}
{'type': 'loss', 'content': 0.046592798084020615, 'timestamp': '2025-10-02 00:58:33.989735', 'step': 26516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:34.065415', 'step': 26516, 'epoch': 3}
{'type': 'loss', 'content': 0.12544536590576172, 'timestamp': '2025-10-02 00:58:34.067913', 'step': 26517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:34.127532', 'step': 26517, 'epoch': 3}
{'type': 'loss', 'content': 0.0017791487043723464, 'timestamp': '2025-10-02 00:58:34.137357', 'step': 26518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:58:34.222287', 'step': 26518, 'epoch': 3}
{'type': 'loss', 'content': 0.010236426256597042, 'timestamp': '2025-10-02 00:58:34.232706', 'step': 26519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:34.290836', 'step': 26519, 'epoch': 3}
{'type': 'loss', 'content': 0.03211307153105736, 'timestamp': '2025-10-02 00:58:34.297853', 'step': 26520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:34.362688', 'step': 26520, 'epoch': 3}
{'type': 'loss', 'content': 0.040764085948467255, 'timestamp': '2025-10-02 00:58:34.370604', 'step': 26521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:34.434845', 'step': 26521, 'epoch': 3}
{'type': 'loss', 'content': 0.019415806978940964, 'timestamp': '2025-10-02 00:58:34.437730', 'step': 26522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:34.499043', 'step': 26522, 'epoch': 3}
{'type': 'loss', 'content': 0.03445611521601677, 'timestamp': '2025-10-02 00:58:34.502155', 'step': 26523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:34.566889', 'step': 26523, 'epoch': 3}
{'type': 'loss', 'content': 0.020600007846951485, 'timestamp': '2025-10-02 00:58:34.574265', 'step': 26524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:34.630932', 'step': 26524, 'epoch': 3}
{'type': 'loss', 'content': 0.023932404816150665, 'timestamp': '2025-10-02 00:58:34.640864', 'step': 26525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:34.700044', 'step': 26525, 'epoch': 3}
{'type': 'loss', 'content': 0.030347177758812904, 'timestamp': '2025-10-02 00:58:34.703752', 'step': 26526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:34.769550', 'step': 26526, 'epoch': 3}
{'type': 'loss', 'content': 0.0009492638637311757, 'timestamp': '2025-10-02 00:58:34.772528', 'step': 26527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:34.847749', 'step': 26527, 'epoch': 3}
{'type': 'loss', 'content': 0.028767574578523636, 'timestamp': '2025-10-02 00:58:34.860065', 'step': 26528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:34.921772', 'step': 26528, 'epoch': 3}
{'type': 'loss', 'content': 0.03805205598473549, 'timestamp': '2025-10-02 00:58:34.932660', 'step': 26529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:35.004360', 'step': 26529, 'epoch': 3}
{'type': 'loss', 'content': 0.05034926161170006, 'timestamp': '2025-10-02 00:58:35.014052', 'step': 26530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:35.092423', 'step': 26530, 'epoch': 3}
{'type': 'loss', 'content': 0.08008211851119995, 'timestamp': '2025-10-02 00:58:35.099763', 'step': 26531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:35.168243', 'step': 26531, 'epoch': 3}
{'type': 'loss', 'content': 0.014268658123910427, 'timestamp': '2025-10-02 00:58:35.181044', 'step': 26532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:35.246102', 'step': 26532, 'epoch': 3}
{'type': 'loss', 'content': 0.01699046604335308, 'timestamp': '2025-10-02 00:58:35.252927', 'step': 26533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:35.327278', 'step': 26533, 'epoch': 3}
{'type': 'loss', 'content': 0.017037805169820786, 'timestamp': '2025-10-02 00:58:35.330348', 'step': 26534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:35.406061', 'step': 26534, 'epoch': 3}
{'type': 'loss', 'content': 0.027291253209114075, 'timestamp': '2025-10-02 00:58:35.417054', 'step': 26535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:58:35.474748', 'step': 26535, 'epoch': 3}
{'type': 'loss', 'content': 0.06604792177677155, 'timestamp': '2025-10-02 00:58:35.493447', 'step': 26536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:35.554695', 'step': 26536, 'epoch': 3}
{'type': 'loss', 'content': 0.011167777702212334, 'timestamp': '2025-10-02 00:58:35.561619', 'step': 26537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:35.641177', 'step': 26537, 'epoch': 3}
{'type': 'loss', 'content': 0.009334508329629898, 'timestamp': '2025-10-02 00:58:35.651321', 'step': 26538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:58:35.719143', 'step': 26538, 'epoch': 3}
{'type': 'loss', 'content': 0.05702165141701698, 'timestamp': '2025-10-02 00:58:35.722371', 'step': 26539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:35.795332', 'step': 26539, 'epoch': 3}
{'type': 'loss', 'content': 0.08528479933738708, 'timestamp': '2025-10-02 00:58:35.803406', 'step': 26540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:35.887525', 'step': 26540, 'epoch': 3}
{'type': 'loss', 'content': 0.08658989518880844, 'timestamp': '2025-10-02 00:58:35.891337', 'step': 26541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:35.966041', 'step': 26541, 'epoch': 3}
{'type': 'loss', 'content': 0.07777149230241776, 'timestamp': '2025-10-02 00:58:35.976222', 'step': 26542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:36.065837', 'step': 26542, 'epoch': 3}
{'type': 'loss', 'content': 0.03167647495865822, 'timestamp': '2025-10-02 00:58:36.077908', 'step': 26543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:36.157703', 'step': 26543, 'epoch': 3}
{'type': 'loss', 'content': 0.0440668985247612, 'timestamp': '2025-10-02 00:58:36.171748', 'step': 26544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:36.245530', 'step': 26544, 'epoch': 3}
{'type': 'loss', 'content': 0.026489432901144028, 'timestamp': '2025-10-02 00:58:36.254421', 'step': 26545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:58:36.313096', 'step': 26545, 'epoch': 3}
{'type': 'loss', 'content': 0.02411513216793537, 'timestamp': '2025-10-02 00:58:36.325927', 'step': 26546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:36.415381', 'step': 26546, 'epoch': 3}
{'type': 'loss', 'content': 0.03632281720638275, 'timestamp': '2025-10-02 00:58:36.424702', 'step': 26547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:36.504491', 'step': 26547, 'epoch': 3}
{'type': 'loss', 'content': 0.08794603496789932, 'timestamp': '2025-10-02 00:58:36.511636', 'step': 26548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:36.582364', 'step': 26548, 'epoch': 3}
{'type': 'loss', 'content': 0.051347311586141586, 'timestamp': '2025-10-02 00:58:36.585837', 'step': 26549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:58:36.654915', 'step': 26549, 'epoch': 3}
{'type': 'loss', 'content': 0.058703791350126266, 'timestamp': '2025-10-02 00:58:36.658474', 'step': 26550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:36.745253', 'step': 26550, 'epoch': 3}
{'type': 'loss', 'content': 0.049250684678554535, 'timestamp': '2025-10-02 00:58:36.755411', 'step': 26551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:36.834525', 'step': 26551, 'epoch': 3}
{'type': 'loss', 'content': 0.07528794556856155, 'timestamp': '2025-10-02 00:58:36.840620', 'step': 26552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:36.914606', 'step': 26552, 'epoch': 3}
{'type': 'loss', 'content': 0.054871901869773865, 'timestamp': '2025-10-02 00:58:36.922446', 'step': 26553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:36.989399', 'step': 26553, 'epoch': 3}
{'type': 'loss', 'content': 0.02010795846581459, 'timestamp': '2025-10-02 00:58:36.996846', 'step': 26554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:58:37.077354', 'step': 26554, 'epoch': 3}
{'type': 'loss', 'content': 0.022771896794438362, 'timestamp': '2025-10-02 00:58:37.089318', 'step': 26555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:58:37.156426', 'step': 26555, 'epoch': 3}
{'type': 'loss', 'content': 0.03285522013902664, 'timestamp': '2025-10-02 00:58:37.162947', 'step': 26556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:58:37.246829', 'step': 26556, 'epoch': 3}
{'type': 'loss', 'content': 0.022305814549326897, 'timestamp': '2025-10-02 00:58:37.261197', 'step': 26557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:37.331714', 'step': 26557, 'epoch': 3}
{'type': 'loss', 'content': 0.009480084292590618, 'timestamp': '2025-10-02 00:58:37.337021', 'step': 26558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:37.400736', 'step': 26558, 'epoch': 3}
{'type': 'loss', 'content': 0.04631038010120392, 'timestamp': '2025-10-02 00:58:37.407936', 'step': 26559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:37.474322', 'step': 26559, 'epoch': 3}
{'type': 'loss', 'content': 0.002419772557914257, 'timestamp': '2025-10-02 00:58:37.485256', 'step': 26560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:58:37.555290', 'step': 26560, 'epoch': 3}
{'type': 'loss', 'content': 0.03364599123597145, 'timestamp': '2025-10-02 00:58:37.567023', 'step': 26561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:58:37.652882', 'step': 26561, 'epoch': 3}
{'type': 'loss', 'content': 0.013638630509376526, 'timestamp': '2025-10-02 00:58:37.665209', 'step': 26562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:37.738381', 'step': 26562, 'epoch': 3}
{'type': 'loss', 'content': 0.03650808706879616, 'timestamp': '2025-10-02 00:58:37.743259', 'step': 26563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:37.806165', 'step': 26563, 'epoch': 3}
{'type': 'loss', 'content': 0.024394577369093895, 'timestamp': '2025-10-02 00:58:37.817275', 'step': 26564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:58:37.873217', 'step': 26564, 'epoch': 3}
{'type': 'loss', 'content': 0.08209609985351562, 'timestamp': '2025-10-02 00:58:37.875985', 'step': 26565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:58:37.949019', 'step': 26565, 'epoch': 3}
{'type': 'loss', 'content': 0.03199077025055885, 'timestamp': '2025-10-02 00:58:37.959888', 'step': 26566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:38.027087', 'step': 26566, 'epoch': 3}
{'type': 'loss', 'content': 0.06029612943530083, 'timestamp': '2025-10-02 00:58:38.037434', 'step': 26567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:38.137513', 'step': 26567, 'epoch': 3}
{'type': 'loss', 'content': 0.019850697368383408, 'timestamp': '2025-10-02 00:58:38.144361', 'step': 26568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:38.218169', 'step': 26568, 'epoch': 3}
{'type': 'loss', 'content': 0.2103244811296463, 'timestamp': '2025-10-02 00:58:38.226309', 'step': 26569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:58:38.299125', 'step': 26569, 'epoch': 3}
{'type': 'loss', 'content': 0.015720058232545853, 'timestamp': '2025-10-02 00:58:38.309690', 'step': 26570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:38.374193', 'step': 26570, 'epoch': 3}
{'type': 'loss', 'content': 0.055614568293094635, 'timestamp': '2025-10-02 00:58:38.377096', 'step': 26571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:38.438450', 'step': 26571, 'epoch': 3}
{'type': 'loss', 'content': 0.03033808246254921, 'timestamp': '2025-10-02 00:58:38.446225', 'step': 26572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:38.526453', 'step': 26572, 'epoch': 3}
{'type': 'loss', 'content': 0.01667754165828228, 'timestamp': '2025-10-02 00:58:38.534509', 'step': 26573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:38.602528', 'step': 26573, 'epoch': 3}
{'type': 'loss', 'content': 0.12943404912948608, 'timestamp': '2025-10-02 00:58:38.605360', 'step': 26574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:38.680896', 'step': 26574, 'epoch': 3}
{'type': 'loss', 'content': 0.023592760786414146, 'timestamp': '2025-10-02 00:58:38.693469', 'step': 26575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:38.753694', 'step': 26575, 'epoch': 3}
{'type': 'loss', 'content': 0.09390588849782944, 'timestamp': '2025-10-02 00:58:38.761585', 'step': 26576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:58:38.826803', 'step': 26576, 'epoch': 3}
{'type': 'loss', 'content': 0.05024246498942375, 'timestamp': '2025-10-02 00:58:38.839389', 'step': 26577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:58:38.941288', 'step': 26577, 'epoch': 3}
{'type': 'loss', 'content': 0.00140493200160563, 'timestamp': '2025-10-02 00:58:38.957150', 'step': 26578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:39.017799', 'step': 26578, 'epoch': 3}
{'type': 'loss', 'content': 0.06305121630430222, 'timestamp': '2025-10-02 00:58:39.020818', 'step': 26579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:39.091252', 'step': 26579, 'epoch': 3}
{'type': 'loss', 'content': 0.03477727994322777, 'timestamp': '2025-10-02 00:58:39.105985', 'step': 26580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:39.200170', 'step': 26580, 'epoch': 3}
{'type': 'loss', 'content': 0.059851840138435364, 'timestamp': '2025-10-02 00:58:39.203915', 'step': 26581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:39.281241', 'step': 26581, 'epoch': 3}
{'type': 'loss', 'content': 0.02094428986310959, 'timestamp': '2025-10-02 00:58:39.293288', 'step': 26582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:58:39.363386', 'step': 26582, 'epoch': 3}
{'type': 'loss', 'content': 0.0791080892086029, 'timestamp': '2025-10-02 00:58:39.378793', 'step': 26583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:39.440094', 'step': 26583, 'epoch': 3}
{'type': 'loss', 'content': 0.04726242646574974, 'timestamp': '2025-10-02 00:58:39.448717', 'step': 26584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:58:39.505318', 'step': 26584, 'epoch': 3}
{'type': 'loss', 'content': 0.04619168862700462, 'timestamp': '2025-10-02 00:58:39.521252', 'step': 26585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:39.592889', 'step': 26585, 'epoch': 3}
{'type': 'loss', 'content': 0.04494590312242508, 'timestamp': '2025-10-02 00:58:39.597181', 'step': 26586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:39.678295', 'step': 26586, 'epoch': 3}
{'type': 'loss', 'content': 0.021621564403176308, 'timestamp': '2025-10-02 00:58:39.681523', 'step': 26587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:39.741084', 'step': 26587, 'epoch': 3}
{'type': 'loss', 'content': 0.017680255696177483, 'timestamp': '2025-10-02 00:58:39.750607', 'step': 26588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:58:39.817196', 'step': 26588, 'epoch': 3}
{'type': 'loss', 'content': 0.031840067356824875, 'timestamp': '2025-10-02 00:58:39.828657', 'step': 26589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:39.889643', 'step': 26589, 'epoch': 3}
{'type': 'loss', 'content': 0.06759554892778397, 'timestamp': '2025-10-02 00:58:39.894444', 'step': 26590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:39.973708', 'step': 26590, 'epoch': 3}
{'type': 'loss', 'content': 0.03099283203482628, 'timestamp': '2025-10-02 00:58:39.986907', 'step': 26591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:40.073626', 'step': 26591, 'epoch': 3}
{'type': 'loss', 'content': 0.005364291835576296, 'timestamp': '2025-10-02 00:58:40.089343', 'step': 26592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:40.151126', 'step': 26592, 'epoch': 3}
{'type': 'loss', 'content': 0.037841539829969406, 'timestamp': '2025-10-02 00:58:40.154500', 'step': 26593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:58:40.241485', 'step': 26593, 'epoch': 3}
{'type': 'loss', 'content': 0.012999966740608215, 'timestamp': '2025-10-02 00:58:40.253473', 'step': 26594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:40.323757', 'step': 26594, 'epoch': 3}
{'type': 'loss', 'content': 0.06906132400035858, 'timestamp': '2025-10-02 00:58:40.330959', 'step': 26595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:40.400262', 'step': 26595, 'epoch': 3}
{'type': 'loss', 'content': 0.04238797724246979, 'timestamp': '2025-10-02 00:58:40.413868', 'step': 26596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:40.497196', 'step': 26596, 'epoch': 3}
{'type': 'loss', 'content': 0.04853607341647148, 'timestamp': '2025-10-02 00:58:40.508164', 'step': 26597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:40.583458', 'step': 26597, 'epoch': 3}
{'type': 'loss', 'content': 0.02891388162970543, 'timestamp': '2025-10-02 00:58:40.590220', 'step': 26598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:40.656141', 'step': 26598, 'epoch': 3}
{'type': 'loss', 'content': 0.021731331944465637, 'timestamp': '2025-10-02 00:58:40.666656', 'step': 26599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:58:40.747887', 'step': 26599, 'epoch': 3}
{'type': 'loss', 'content': 0.020783359184861183, 'timestamp': '2025-10-02 00:58:40.759164', 'step': 26600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:40.848025', 'step': 26600, 'epoch': 3}
{'type': 'loss', 'content': 0.0392998568713665, 'timestamp': '2025-10-02 00:58:40.851022', 'step': 26601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:40.921629', 'step': 26601, 'epoch': 3}
{'type': 'loss', 'content': 0.1107388511300087, 'timestamp': '2025-10-02 00:58:40.927633', 'step': 26602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:40.998948', 'step': 26602, 'epoch': 3}
{'type': 'loss', 'content': 0.026009606197476387, 'timestamp': '2025-10-02 00:58:41.005963', 'step': 26603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:41.072414', 'step': 26603, 'epoch': 3}
{'type': 'loss', 'content': 0.03928200155496597, 'timestamp': '2025-10-02 00:58:41.079878', 'step': 26604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:58:41.151448', 'step': 26604, 'epoch': 3}
{'type': 'loss', 'content': 0.05364220589399338, 'timestamp': '2025-10-02 00:58:41.162807', 'step': 26605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:41.224589', 'step': 26605, 'epoch': 3}
{'type': 'loss', 'content': 0.04565900191664696, 'timestamp': '2025-10-02 00:58:41.229278', 'step': 26606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:41.297964', 'step': 26606, 'epoch': 3}
{'type': 'loss', 'content': 0.031788311898708344, 'timestamp': '2025-10-02 00:58:41.303322', 'step': 26607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:41.362728', 'step': 26607, 'epoch': 3}
{'type': 'loss', 'content': 0.028395306318998337, 'timestamp': '2025-10-02 00:58:41.370037', 'step': 26608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:58:41.434099', 'step': 26608, 'epoch': 3}
{'type': 'loss', 'content': 0.014627722091972828, 'timestamp': '2025-10-02 00:58:41.445393', 'step': 26609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:41.507292', 'step': 26609, 'epoch': 3}
{'type': 'loss', 'content': 0.0257240142673254, 'timestamp': '2025-10-02 00:58:41.520714', 'step': 26610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:58:41.580393', 'step': 26610, 'epoch': 3}
{'type': 'loss', 'content': 0.035139478743076324, 'timestamp': '2025-10-02 00:58:41.584127', 'step': 26611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:58:41.667027', 'step': 26611, 'epoch': 3}
{'type': 'loss', 'content': 0.009890689514577389, 'timestamp': '2025-10-02 00:58:41.679743', 'step': 26612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:41.749407', 'step': 26612, 'epoch': 3}
{'type': 'loss', 'content': 0.041027192026376724, 'timestamp': '2025-10-02 00:58:41.754786', 'step': 26613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:41.839696', 'step': 26613, 'epoch': 3}
{'type': 'loss', 'content': 0.006676810793578625, 'timestamp': '2025-10-02 00:58:41.846685', 'step': 26614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:58:41.924711', 'step': 26614, 'epoch': 3}
{'type': 'loss', 'content': 0.017137639224529266, 'timestamp': '2025-10-02 00:58:41.935185', 'step': 26615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:42.013402', 'step': 26615, 'epoch': 3}
{'type': 'loss', 'content': 0.05247436463832855, 'timestamp': '2025-10-02 00:58:42.021262', 'step': 26616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:42.082062', 'step': 26616, 'epoch': 3}
{'type': 'loss', 'content': 0.07490970939397812, 'timestamp': '2025-10-02 00:58:42.085029', 'step': 26617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:42.145848', 'step': 26617, 'epoch': 3}
{'type': 'loss', 'content': 0.02284080721437931, 'timestamp': '2025-10-02 00:58:42.151451', 'step': 26618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:42.235867', 'step': 26618, 'epoch': 3}
{'type': 'loss', 'content': 0.028322789818048477, 'timestamp': '2025-10-02 00:58:42.246024', 'step': 26619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:42.315403', 'step': 26619, 'epoch': 3}
{'type': 'loss', 'content': 0.0206227358430624, 'timestamp': '2025-10-02 00:58:42.324973', 'step': 26620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:42.399681', 'step': 26620, 'epoch': 3}
{'type': 'loss', 'content': 0.11563844233751297, 'timestamp': '2025-10-02 00:58:42.405236', 'step': 26621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:42.483681', 'step': 26621, 'epoch': 3}
{'type': 'loss', 'content': 0.07994471490383148, 'timestamp': '2025-10-02 00:58:42.488941', 'step': 26622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:42.565956', 'step': 26622, 'epoch': 3}
{'type': 'loss', 'content': 0.08753282576799393, 'timestamp': '2025-10-02 00:58:42.569474', 'step': 26623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:42.665546', 'step': 26623, 'epoch': 3}
{'type': 'loss', 'content': 0.07068067789077759, 'timestamp': '2025-10-02 00:58:42.673096', 'step': 26624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:42.733900', 'step': 26624, 'epoch': 3}
{'type': 'loss', 'content': 0.03574473783373833, 'timestamp': '2025-10-02 00:58:42.739838', 'step': 26625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:42.807363', 'step': 26625, 'epoch': 3}
{'type': 'loss', 'content': 0.02138114534318447, 'timestamp': '2025-10-02 00:58:42.817597', 'step': 26626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:42.884066', 'step': 26626, 'epoch': 3}
{'type': 'loss', 'content': 0.02231038548052311, 'timestamp': '2025-10-02 00:58:42.892881', 'step': 26627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:42.964357', 'step': 26627, 'epoch': 3}
{'type': 'loss', 'content': 0.0057181669399142265, 'timestamp': '2025-10-02 00:58:42.971519', 'step': 26628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:43.045302', 'step': 26628, 'epoch': 3}
{'type': 'loss', 'content': 0.07489825785160065, 'timestamp': '2025-10-02 00:58:43.049011', 'step': 26629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:43.124161', 'step': 26629, 'epoch': 3}
{'type': 'loss', 'content': 0.07274188101291656, 'timestamp': '2025-10-02 00:58:43.138039', 'step': 26630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:43.210754', 'step': 26630, 'epoch': 3}
{'type': 'loss', 'content': 0.02026803232729435, 'timestamp': '2025-10-02 00:58:43.220272', 'step': 26631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:43.319369', 'step': 26631, 'epoch': 3}
{'type': 'loss', 'content': 0.028722861781716347, 'timestamp': '2025-10-02 00:58:43.329688', 'step': 26632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:43.402903', 'step': 26632, 'epoch': 3}
{'type': 'loss', 'content': 0.050176650285720825, 'timestamp': '2025-10-02 00:58:43.411888', 'step': 26633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:43.472227', 'step': 26633, 'epoch': 3}
{'type': 'loss', 'content': 0.109197236597538, 'timestamp': '2025-10-02 00:58:43.476355', 'step': 26634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:43.559139', 'step': 26634, 'epoch': 3}
{'type': 'loss', 'content': 0.02789866365492344, 'timestamp': '2025-10-02 00:58:43.565915', 'step': 26635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:43.639786', 'step': 26635, 'epoch': 3}
{'type': 'loss', 'content': 0.07350194454193115, 'timestamp': '2025-10-02 00:58:43.659567', 'step': 26636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:43.733788', 'step': 26636, 'epoch': 3}
{'type': 'loss', 'content': 0.032131291925907135, 'timestamp': '2025-10-02 00:58:43.737899', 'step': 26637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:43.798029', 'step': 26637, 'epoch': 3}
{'type': 'loss', 'content': 0.03893481194972992, 'timestamp': '2025-10-02 00:58:43.812979', 'step': 26638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:43.908076', 'step': 26638, 'epoch': 3}
{'type': 'loss', 'content': 0.0401722677052021, 'timestamp': '2025-10-02 00:58:43.922260', 'step': 26639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:43.993418', 'step': 26639, 'epoch': 3}
{'type': 'loss', 'content': 0.03969058766961098, 'timestamp': '2025-10-02 00:58:44.000644', 'step': 26640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:44.060873', 'step': 26640, 'epoch': 3}
{'type': 'loss', 'content': 0.022029651328921318, 'timestamp': '2025-10-02 00:58:44.064236', 'step': 26641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:44.126665', 'step': 26641, 'epoch': 3}
{'type': 'loss', 'content': 0.05786004289984703, 'timestamp': '2025-10-02 00:58:44.141386', 'step': 26642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:44.238777', 'step': 26642, 'epoch': 3}
{'type': 'loss', 'content': 0.04685346782207489, 'timestamp': '2025-10-02 00:58:44.253452', 'step': 26643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:44.345671', 'step': 26643, 'epoch': 3}
{'type': 'loss', 'content': 0.008041943423449993, 'timestamp': '2025-10-02 00:58:44.353641', 'step': 26644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:44.412083', 'step': 26644, 'epoch': 3}
{'type': 'loss', 'content': 0.024484803900122643, 'timestamp': '2025-10-02 00:58:44.426188', 'step': 26645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:44.511376', 'step': 26645, 'epoch': 3}
{'type': 'loss', 'content': 0.014951665885746479, 'timestamp': '2025-10-02 00:58:44.516475', 'step': 26646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:44.613895', 'step': 26646, 'epoch': 3}
{'type': 'loss', 'content': 0.02511155977845192, 'timestamp': '2025-10-02 00:58:44.618866', 'step': 26647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:58:44.693305', 'step': 26647, 'epoch': 3}
{'type': 'loss', 'content': 0.014557632617652416, 'timestamp': '2025-10-02 00:58:44.710822', 'step': 26648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:58:44.796065', 'step': 26648, 'epoch': 3}
{'type': 'loss', 'content': 0.04758119583129883, 'timestamp': '2025-10-02 00:58:44.809412', 'step': 26649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:44.871992', 'step': 26649, 'epoch': 3}
{'type': 'loss', 'content': 0.03988201916217804, 'timestamp': '2025-10-02 00:58:44.877875', 'step': 26650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:58:44.978600', 'step': 26650, 'epoch': 3}
{'type': 'loss', 'content': 0.036117665469646454, 'timestamp': '2025-10-02 00:58:44.992344', 'step': 26651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:45.054383', 'step': 26651, 'epoch': 3}
{'type': 'loss', 'content': 0.02806997485458851, 'timestamp': '2025-10-02 00:58:45.063655', 'step': 26652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:45.142068', 'step': 26652, 'epoch': 3}
{'type': 'loss', 'content': 0.05293995514512062, 'timestamp': '2025-10-02 00:58:45.147811', 'step': 26653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:45.239094', 'step': 26653, 'epoch': 3}
{'type': 'loss', 'content': 0.005721348337829113, 'timestamp': '2025-10-02 00:58:45.244586', 'step': 26654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 00:58:45.321183', 'step': 26654, 'epoch': 3}
{'type': 'loss', 'content': 0.02876286208629608, 'timestamp': '2025-10-02 00:58:45.333503', 'step': 26655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 00:58:45.418808', 'step': 26655, 'epoch': 3}
{'type': 'loss', 'content': 0.028078116476535797, 'timestamp': '2025-10-02 00:58:45.433246', 'step': 26656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:45.524801', 'step': 26656, 'epoch': 3}
{'type': 'loss', 'content': 0.029167070984840393, 'timestamp': '2025-10-02 00:58:45.528900', 'step': 26657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:45.611084', 'step': 26657, 'epoch': 3}
{'type': 'loss', 'content': 0.032329995185136795, 'timestamp': '2025-10-02 00:58:45.629542', 'step': 26658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:45.719866', 'step': 26658, 'epoch': 3}
{'type': 'loss', 'content': 0.07741899788379669, 'timestamp': '2025-10-02 00:58:45.724989', 'step': 26659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:45.797285', 'step': 26659, 'epoch': 3}
{'type': 'loss', 'content': 0.00937927421182394, 'timestamp': '2025-10-02 00:58:45.819130', 'step': 26660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:45.899848', 'step': 26660, 'epoch': 3}
{'type': 'loss', 'content': 0.09699468314647675, 'timestamp': '2025-10-02 00:58:45.914937', 'step': 26661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:45.985748', 'step': 26661, 'epoch': 3}
{'type': 'loss', 'content': 0.06968338787555695, 'timestamp': '2025-10-02 00:58:45.999227', 'step': 26662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:46.070856', 'step': 26662, 'epoch': 3}
{'type': 'loss', 'content': 0.1100057065486908, 'timestamp': '2025-10-02 00:58:46.077765', 'step': 26663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:58:46.160917', 'step': 26663, 'epoch': 3}
{'type': 'loss', 'content': 0.03039747104048729, 'timestamp': '2025-10-02 00:58:46.178728', 'step': 26664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:46.258737', 'step': 26664, 'epoch': 3}
{'type': 'loss', 'content': 0.0731351226568222, 'timestamp': '2025-10-02 00:58:46.272125', 'step': 26665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:58:46.358160', 'step': 26665, 'epoch': 3}
{'type': 'loss', 'content': 0.026224056258797646, 'timestamp': '2025-10-02 00:58:46.368563', 'step': 26666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:58:46.451100', 'step': 26666, 'epoch': 3}
{'type': 'loss', 'content': 0.03347058966755867, 'timestamp': '2025-10-02 00:58:46.463367', 'step': 26667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:46.546627', 'step': 26667, 'epoch': 3}
{'type': 'loss', 'content': 0.023746689781546593, 'timestamp': '2025-10-02 00:58:46.553671', 'step': 26668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:46.630823', 'step': 26668, 'epoch': 3}
{'type': 'loss', 'content': 0.031493134796619415, 'timestamp': '2025-10-02 00:58:46.641780', 'step': 26669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:46.712643', 'step': 26669, 'epoch': 3}
{'type': 'loss', 'content': 0.04457032307982445, 'timestamp': '2025-10-02 00:58:46.724535', 'step': 26670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:46.784686', 'step': 26670, 'epoch': 3}
{'type': 'loss', 'content': 0.0821104571223259, 'timestamp': '2025-10-02 00:58:46.799487', 'step': 26671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:46.867288', 'step': 26671, 'epoch': 3}
{'type': 'loss', 'content': 0.014649368822574615, 'timestamp': '2025-10-02 00:58:46.874769', 'step': 26672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:58:46.946014', 'step': 26672, 'epoch': 3}
{'type': 'loss', 'content': 0.058930788189172745, 'timestamp': '2025-10-02 00:58:46.951989', 'step': 26673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:47.045558', 'step': 26673, 'epoch': 3}
{'type': 'loss', 'content': 0.011224168352782726, 'timestamp': '2025-10-02 00:58:47.057660', 'step': 26674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:47.131444', 'step': 26674, 'epoch': 3}
{'type': 'loss', 'content': 0.047210630029439926, 'timestamp': '2025-10-02 00:58:47.145808', 'step': 26675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:47.229148', 'step': 26675, 'epoch': 3}
{'type': 'loss', 'content': 0.024132804945111275, 'timestamp': '2025-10-02 00:58:47.250530', 'step': 26676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:47.320666', 'step': 26676, 'epoch': 3}
{'type': 'loss', 'content': 0.05214593932032585, 'timestamp': '2025-10-02 00:58:47.324932', 'step': 26677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:47.394962', 'step': 26677, 'epoch': 3}
{'type': 'loss', 'content': 0.059949565678834915, 'timestamp': '2025-10-02 00:58:47.399644', 'step': 26678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:47.486396', 'step': 26678, 'epoch': 3}
{'type': 'loss', 'content': 0.044468771666288376, 'timestamp': '2025-10-02 00:58:47.489267', 'step': 26679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:47.574191', 'step': 26679, 'epoch': 3}
{'type': 'loss', 'content': 0.1649467796087265, 'timestamp': '2025-10-02 00:58:47.590377', 'step': 26680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:58:47.648314', 'step': 26680, 'epoch': 3}
{'type': 'loss', 'content': 0.03388313204050064, 'timestamp': '2025-10-02 00:58:47.662674', 'step': 26681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:47.722136', 'step': 26681, 'epoch': 3}
{'type': 'loss', 'content': 0.021731959655880928, 'timestamp': '2025-10-02 00:58:47.727004', 'step': 26682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:47.797528', 'step': 26682, 'epoch': 3}
{'type': 'loss', 'content': 0.06624312698841095, 'timestamp': '2025-10-02 00:58:47.814082', 'step': 26683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:47.913831', 'step': 26683, 'epoch': 3}
{'type': 'loss', 'content': 0.04411917179822922, 'timestamp': '2025-10-02 00:58:47.921521', 'step': 26684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:47.983333', 'step': 26684, 'epoch': 3}
{'type': 'loss', 'content': 0.023826176300644875, 'timestamp': '2025-10-02 00:58:47.992920', 'step': 26685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:48.064814', 'step': 26685, 'epoch': 3}
{'type': 'loss', 'content': 0.059756044298410416, 'timestamp': '2025-10-02 00:58:48.069848', 'step': 26686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:48.141727', 'step': 26686, 'epoch': 3}
{'type': 'loss', 'content': 0.08554314076900482, 'timestamp': '2025-10-02 00:58:48.151253', 'step': 26687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:48.231141', 'step': 26687, 'epoch': 3}
{'type': 'loss', 'content': 0.03162199631333351, 'timestamp': '2025-10-02 00:58:48.246604', 'step': 26688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:48.313663', 'step': 26688, 'epoch': 3}
{'type': 'loss', 'content': 0.033345792442560196, 'timestamp': '2025-10-02 00:58:48.316973', 'step': 26689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:48.407808', 'step': 26689, 'epoch': 3}
{'type': 'loss', 'content': 0.0022413490805774927, 'timestamp': '2025-10-02 00:58:48.416567', 'step': 26690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:48.496021', 'step': 26690, 'epoch': 3}
{'type': 'loss', 'content': 0.05085375905036926, 'timestamp': '2025-10-02 00:58:48.500266', 'step': 26691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:48.584333', 'step': 26691, 'epoch': 3}
{'type': 'loss', 'content': 0.031931523233652115, 'timestamp': '2025-10-02 00:58:48.591939', 'step': 26692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:48.659367', 'step': 26692, 'epoch': 3}
{'type': 'loss', 'content': 0.0445556566119194, 'timestamp': '2025-10-02 00:58:48.670654', 'step': 26693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:48.738190', 'step': 26693, 'epoch': 3}
{'type': 'loss', 'content': 0.026182683184742928, 'timestamp': '2025-10-02 00:58:48.741483', 'step': 26694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:48.801803', 'step': 26694, 'epoch': 3}
{'type': 'loss', 'content': 0.03906310722231865, 'timestamp': '2025-10-02 00:58:48.804681', 'step': 26695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:48.863152', 'step': 26695, 'epoch': 3}
{'type': 'loss', 'content': 0.11373765766620636, 'timestamp': '2025-10-02 00:58:48.870209', 'step': 26696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:48.929761', 'step': 26696, 'epoch': 3}
{'type': 'loss', 'content': 0.07259068638086319, 'timestamp': '2025-10-02 00:58:48.935846', 'step': 26697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:48.994798', 'step': 26697, 'epoch': 3}
{'type': 'loss', 'content': 0.055505555123090744, 'timestamp': '2025-10-02 00:58:48.997852', 'step': 26698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:49.067251', 'step': 26698, 'epoch': 3}
{'type': 'loss', 'content': 0.030336851254105568, 'timestamp': '2025-10-02 00:58:49.081278', 'step': 26699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:49.157539', 'step': 26699, 'epoch': 3}
{'type': 'loss', 'content': 0.09303110837936401, 'timestamp': '2025-10-02 00:58:49.165624', 'step': 26700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:49.243206', 'step': 26700, 'epoch': 3}
{'type': 'loss', 'content': 0.02571653202176094, 'timestamp': '2025-10-02 00:58:49.252275', 'step': 26701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:49.334230', 'step': 26701, 'epoch': 3}
{'type': 'loss', 'content': 0.04542933404445648, 'timestamp': '2025-10-02 00:58:49.336767', 'step': 26702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:49.395644', 'step': 26702, 'epoch': 3}
{'type': 'loss', 'content': 0.023282533511519432, 'timestamp': '2025-10-02 00:58:49.404429', 'step': 26703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:49.485098', 'step': 26703, 'epoch': 3}
{'type': 'loss', 'content': 0.02946256846189499, 'timestamp': '2025-10-02 00:58:49.500005', 'step': 26704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:49.571293', 'step': 26704, 'epoch': 3}
{'type': 'loss', 'content': 0.004391722846776247, 'timestamp': '2025-10-02 00:58:49.583385', 'step': 26705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:49.641302', 'step': 26705, 'epoch': 3}
{'type': 'loss', 'content': 0.06524122506380081, 'timestamp': '2025-10-02 00:58:49.650874', 'step': 26706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:49.735694', 'step': 26706, 'epoch': 3}
{'type': 'loss', 'content': 0.06063919886946678, 'timestamp': '2025-10-02 00:58:49.745220', 'step': 26707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:49.815348', 'step': 26707, 'epoch': 3}
{'type': 'loss', 'content': 0.06497319787740707, 'timestamp': '2025-10-02 00:58:49.822569', 'step': 26708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:49.895943', 'step': 26708, 'epoch': 3}
{'type': 'loss', 'content': 0.05248453840613365, 'timestamp': '2025-10-02 00:58:49.903912', 'step': 26709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:49.985368', 'step': 26709, 'epoch': 3}
{'type': 'loss', 'content': 0.014346174895763397, 'timestamp': '2025-10-02 00:58:49.994866', 'step': 26710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:50.066817', 'step': 26710, 'epoch': 3}
{'type': 'loss', 'content': 0.018687274307012558, 'timestamp': '2025-10-02 00:58:50.071909', 'step': 26711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:50.161007', 'step': 26711, 'epoch': 3}
{'type': 'loss', 'content': 0.04977458342909813, 'timestamp': '2025-10-02 00:58:50.177602', 'step': 26712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:58:50.272438', 'step': 26712, 'epoch': 3}
{'type': 'loss', 'content': 0.031693488359451294, 'timestamp': '2025-10-02 00:58:50.285369', 'step': 26713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:50.359309', 'step': 26713, 'epoch': 3}
{'type': 'loss', 'content': 0.03701728209853172, 'timestamp': '2025-10-02 00:58:50.374330', 'step': 26714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:50.455063', 'step': 26714, 'epoch': 3}
{'type': 'loss', 'content': 0.0703517347574234, 'timestamp': '2025-10-02 00:58:50.466290', 'step': 26715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:50.545219', 'step': 26715, 'epoch': 3}
{'type': 'loss', 'content': 0.01801532506942749, 'timestamp': '2025-10-02 00:58:50.552292', 'step': 26716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:58:50.628882', 'step': 26716, 'epoch': 3}
{'type': 'loss', 'content': 0.01671178638935089, 'timestamp': '2025-10-02 00:58:50.643281', 'step': 26717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 11200068058304.0}, 'timestamp': '2025-10-02 00:58:50.733210', 'step': 26717, 'epoch': 3}
{'type': 'loss', 'content': 0.004585559479892254, 'timestamp': '2025-10-02 00:58:50.748287', 'step': 26718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:50.848461', 'step': 26718, 'epoch': 3}
{'type': 'loss', 'content': 0.026601340621709824, 'timestamp': '2025-10-02 00:58:50.854179', 'step': 26719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:58:50.958430', 'step': 26719, 'epoch': 3}
{'type': 'loss', 'content': 0.07364415377378464, 'timestamp': '2025-10-02 00:58:50.976327', 'step': 26720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:51.056089', 'step': 26720, 'epoch': 3}
{'type': 'loss', 'content': 0.04141213744878769, 'timestamp': '2025-10-02 00:58:51.060949', 'step': 26721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:51.139778', 'step': 26721, 'epoch': 3}
{'type': 'loss', 'content': 0.033174779266119, 'timestamp': '2025-10-02 00:58:51.158659', 'step': 26722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:58:51.281701', 'step': 26722, 'epoch': 3}
{'type': 'loss', 'content': 0.02196224220097065, 'timestamp': '2025-10-02 00:58:51.292246', 'step': 26723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:51.361222', 'step': 26723, 'epoch': 3}
{'type': 'loss', 'content': 0.006553792394697666, 'timestamp': '2025-10-02 00:58:51.372175', 'step': 26724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:51.461798', 'step': 26724, 'epoch': 3}
{'type': 'loss', 'content': 0.012199729681015015, 'timestamp': '2025-10-02 00:58:51.466922', 'step': 26725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:51.567955', 'step': 26725, 'epoch': 3}
{'type': 'loss', 'content': 0.06645854562520981, 'timestamp': '2025-10-02 00:58:51.574922', 'step': 26726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:51.636494', 'step': 26726, 'epoch': 3}
{'type': 'loss', 'content': 0.025476623326539993, 'timestamp': '2025-10-02 00:58:51.651820', 'step': 26727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:51.758684', 'step': 26727, 'epoch': 3}
{'type': 'loss', 'content': 0.07613195478916168, 'timestamp': '2025-10-02 00:58:51.773752', 'step': 26728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:58:51.833889', 'step': 26728, 'epoch': 3}
{'type': 'loss', 'content': 0.07252053171396255, 'timestamp': '2025-10-02 00:58:51.846584', 'step': 26729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:51.907927', 'step': 26729, 'epoch': 3}
{'type': 'loss', 'content': 0.02549789845943451, 'timestamp': '2025-10-02 00:58:51.912575', 'step': 26730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:51.983677', 'step': 26730, 'epoch': 3}
{'type': 'loss', 'content': 0.04421428218483925, 'timestamp': '2025-10-02 00:58:51.988744', 'step': 26731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:52.055249', 'step': 26731, 'epoch': 3}
{'type': 'loss', 'content': 0.039422933012247086, 'timestamp': '2025-10-02 00:58:52.077409', 'step': 26732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:52.161283', 'step': 26732, 'epoch': 3}
{'type': 'loss', 'content': 0.029106415808200836, 'timestamp': '2025-10-02 00:58:52.172683', 'step': 26733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:52.253746', 'step': 26733, 'epoch': 3}
{'type': 'loss', 'content': 0.027238082140684128, 'timestamp': '2025-10-02 00:58:52.259033', 'step': 26734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:52.324693', 'step': 26734, 'epoch': 3}
{'type': 'loss', 'content': 0.07476747781038284, 'timestamp': '2025-10-02 00:58:52.329528', 'step': 26735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:52.392058', 'step': 26735, 'epoch': 3}
{'type': 'loss', 'content': 0.02777647227048874, 'timestamp': '2025-10-02 00:58:52.401835', 'step': 26736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:52.477693', 'step': 26736, 'epoch': 3}
{'type': 'loss', 'content': 0.020585978403687477, 'timestamp': '2025-10-02 00:58:52.494568', 'step': 26737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:52.557352', 'step': 26737, 'epoch': 3}
{'type': 'loss', 'content': 0.015857890248298645, 'timestamp': '2025-10-02 00:58:52.566150', 'step': 26738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:52.655650', 'step': 26738, 'epoch': 3}
{'type': 'loss', 'content': 0.01288480032235384, 'timestamp': '2025-10-02 00:58:52.659658', 'step': 26739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:52.750213', 'step': 26739, 'epoch': 3}
{'type': 'loss', 'content': 0.07058285176753998, 'timestamp': '2025-10-02 00:58:52.769978', 'step': 26740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:52.856497', 'step': 26740, 'epoch': 3}
{'type': 'loss', 'content': 0.03602563962340355, 'timestamp': '2025-10-02 00:58:52.859978', 'step': 26741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:52.965252', 'step': 26741, 'epoch': 3}
{'type': 'loss', 'content': 0.025728454813361168, 'timestamp': '2025-10-02 00:58:52.974739', 'step': 26742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:53.074380', 'step': 26742, 'epoch': 3}
{'type': 'loss', 'content': 0.07696287333965302, 'timestamp': '2025-10-02 00:58:53.079902', 'step': 26743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:53.158902', 'step': 26743, 'epoch': 3}
{'type': 'loss', 'content': 0.03961142897605896, 'timestamp': '2025-10-02 00:58:53.167747', 'step': 26744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:58:53.242499', 'step': 26744, 'epoch': 3}
{'type': 'loss', 'content': 0.05567452684044838, 'timestamp': '2025-10-02 00:58:53.246473', 'step': 26745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:53.319647', 'step': 26745, 'epoch': 3}
{'type': 'loss', 'content': 0.07233195006847382, 'timestamp': '2025-10-02 00:58:53.324729', 'step': 26746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:53.385276', 'step': 26746, 'epoch': 3}
{'type': 'loss', 'content': 0.044488999992609024, 'timestamp': '2025-10-02 00:58:53.391882', 'step': 26747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:53.453264', 'step': 26747, 'epoch': 3}
{'type': 'loss', 'content': 0.07613455504179001, 'timestamp': '2025-10-02 00:58:53.462486', 'step': 26748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:53.521995', 'step': 26748, 'epoch': 3}
{'type': 'loss', 'content': 0.08408690989017487, 'timestamp': '2025-10-02 00:58:53.536371', 'step': 26749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:53.639134', 'step': 26749, 'epoch': 3}
{'type': 'loss', 'content': 0.05024006962776184, 'timestamp': '2025-10-02 00:58:53.643394', 'step': 26750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:53.717270', 'step': 26750, 'epoch': 3}
{'type': 'loss', 'content': 0.01651814393699169, 'timestamp': '2025-10-02 00:58:53.732600', 'step': 26751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:53.826900', 'step': 26751, 'epoch': 3}
{'type': 'loss', 'content': 0.06619841605424881, 'timestamp': '2025-10-02 00:58:53.845511', 'step': 26752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:53.928222', 'step': 26752, 'epoch': 3}
{'type': 'loss', 'content': 0.05580802634358406, 'timestamp': '2025-10-02 00:58:53.932354', 'step': 26753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:54.012628', 'step': 26753, 'epoch': 3}
{'type': 'loss', 'content': 0.008957266807556152, 'timestamp': '2025-10-02 00:58:54.022736', 'step': 26754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:54.109325', 'step': 26754, 'epoch': 3}
{'type': 'loss', 'content': 0.023535650223493576, 'timestamp': '2025-10-02 00:58:54.118182', 'step': 26755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:54.203237', 'step': 26755, 'epoch': 3}
{'type': 'loss', 'content': 0.03506273031234741, 'timestamp': '2025-10-02 00:58:54.209869', 'step': 26756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:54.292353', 'step': 26756, 'epoch': 3}
{'type': 'loss', 'content': 0.020898640155792236, 'timestamp': '2025-10-02 00:58:54.295697', 'step': 26757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:54.375068', 'step': 26757, 'epoch': 3}
{'type': 'loss', 'content': 0.016269151121377945, 'timestamp': '2025-10-02 00:58:54.389649', 'step': 26758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:54.451792', 'step': 26758, 'epoch': 3}
{'type': 'loss', 'content': 0.020218873396515846, 'timestamp': '2025-10-02 00:58:54.461359', 'step': 26759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:54.537326', 'step': 26759, 'epoch': 3}
{'type': 'loss', 'content': 0.12076767534017563, 'timestamp': '2025-10-02 00:58:54.546545', 'step': 26760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:54.620296', 'step': 26760, 'epoch': 3}
{'type': 'loss', 'content': 0.027896380051970482, 'timestamp': '2025-10-02 00:58:54.629164', 'step': 26761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:54.699197', 'step': 26761, 'epoch': 3}
{'type': 'loss', 'content': 0.043035607784986496, 'timestamp': '2025-10-02 00:58:54.711909', 'step': 26762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:54.809017', 'step': 26762, 'epoch': 3}
{'type': 'loss', 'content': 0.004416659474372864, 'timestamp': '2025-10-02 00:58:54.814037', 'step': 26763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:54.883679', 'step': 26763, 'epoch': 3}
{'type': 'loss', 'content': 0.0670504942536354, 'timestamp': '2025-10-02 00:58:54.891143', 'step': 26764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:54.951188', 'step': 26764, 'epoch': 3}
{'type': 'loss', 'content': 0.005344614386558533, 'timestamp': '2025-10-02 00:58:54.954169', 'step': 26765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:55.022856', 'step': 26765, 'epoch': 3}
{'type': 'loss', 'content': 0.0799809917807579, 'timestamp': '2025-10-02 00:58:55.036690', 'step': 26766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:58:55.113273', 'step': 26766, 'epoch': 3}
{'type': 'loss', 'content': 0.025602241978049278, 'timestamp': '2025-10-02 00:58:55.123903', 'step': 26767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:55.188378', 'step': 26767, 'epoch': 3}
{'type': 'loss', 'content': 0.03754337504506111, 'timestamp': '2025-10-02 00:58:55.195952', 'step': 26768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:55.268346', 'step': 26768, 'epoch': 3}
{'type': 'loss', 'content': 0.030712192878127098, 'timestamp': '2025-10-02 00:58:55.278451', 'step': 26769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:55.365428', 'step': 26769, 'epoch': 3}
{'type': 'loss', 'content': 0.12087055295705795, 'timestamp': '2025-10-02 00:58:55.378397', 'step': 26770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:55.452788', 'step': 26770, 'epoch': 3}
{'type': 'loss', 'content': 0.02534460462629795, 'timestamp': '2025-10-02 00:58:55.458043', 'step': 26771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 00:58:55.550890', 'step': 26771, 'epoch': 3}
{'type': 'loss', 'content': 0.02911333367228508, 'timestamp': '2025-10-02 00:58:55.565446', 'step': 26772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:55.635006', 'step': 26772, 'epoch': 3}
{'type': 'loss', 'content': 0.02815709076821804, 'timestamp': '2025-10-02 00:58:55.639406', 'step': 26773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:55.698892', 'step': 26773, 'epoch': 3}
{'type': 'loss', 'content': 0.07905549556016922, 'timestamp': '2025-10-02 00:58:55.703478', 'step': 26774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:55.783259', 'step': 26774, 'epoch': 3}
{'type': 'loss', 'content': 0.04761423170566559, 'timestamp': '2025-10-02 00:58:55.790015', 'step': 26775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:55.863297', 'step': 26775, 'epoch': 3}
{'type': 'loss', 'content': 0.1027856320142746, 'timestamp': '2025-10-02 00:58:55.870980', 'step': 26776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:58:55.955688', 'step': 26776, 'epoch': 3}
{'type': 'loss', 'content': 0.05512513965368271, 'timestamp': '2025-10-02 00:58:55.968580', 'step': 26777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:56.041146', 'step': 26777, 'epoch': 3}
{'type': 'loss', 'content': 0.04999115690588951, 'timestamp': '2025-10-02 00:58:56.050650', 'step': 26778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:56.178679', 'step': 26778, 'epoch': 3}
{'type': 'loss', 'content': 0.013132094405591488, 'timestamp': '2025-10-02 00:58:56.182995', 'step': 26779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:56.284692', 'step': 26779, 'epoch': 3}
{'type': 'loss', 'content': 0.021448910236358643, 'timestamp': '2025-10-02 00:58:56.305260', 'step': 26780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:56.385665', 'step': 26780, 'epoch': 3}
{'type': 'loss', 'content': 0.05443164333701134, 'timestamp': '2025-10-02 00:58:56.391012', 'step': 26781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:56.453337', 'step': 26781, 'epoch': 3}
{'type': 'loss', 'content': 0.09546449780464172, 'timestamp': '2025-10-02 00:58:56.458072', 'step': 26782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:56.519391', 'step': 26782, 'epoch': 3}
{'type': 'loss', 'content': 0.05121007189154625, 'timestamp': '2025-10-02 00:58:56.524348', 'step': 26783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:56.586112', 'step': 26783, 'epoch': 3}
{'type': 'loss', 'content': 0.026356099173426628, 'timestamp': '2025-10-02 00:58:56.596400', 'step': 26784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:56.695874', 'step': 26784, 'epoch': 3}
{'type': 'loss', 'content': 0.14286646246910095, 'timestamp': '2025-10-02 00:58:56.701533', 'step': 26785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:58:56.781966', 'step': 26785, 'epoch': 3}
{'type': 'loss', 'content': 0.013974854722619057, 'timestamp': '2025-10-02 00:58:56.791510', 'step': 26786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:58:56.893110', 'step': 26786, 'epoch': 3}
{'type': 'loss', 'content': 0.10543414205312729, 'timestamp': '2025-10-02 00:58:56.897738', 'step': 26787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:56.963294', 'step': 26787, 'epoch': 3}
{'type': 'loss', 'content': 0.037201397120952606, 'timestamp': '2025-10-02 00:58:56.971008', 'step': 26788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:57.059304', 'step': 26788, 'epoch': 3}
{'type': 'loss', 'content': 0.08929944038391113, 'timestamp': '2025-10-02 00:58:57.075204', 'step': 26789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:57.152262', 'step': 26789, 'epoch': 3}
{'type': 'loss', 'content': 0.03353670611977577, 'timestamp': '2025-10-02 00:58:57.157020', 'step': 26790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:57.248061', 'step': 26790, 'epoch': 3}
{'type': 'loss', 'content': 0.06362301111221313, 'timestamp': '2025-10-02 00:58:57.258164', 'step': 26791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:57.338211', 'step': 26791, 'epoch': 3}
{'type': 'loss', 'content': 0.030989304184913635, 'timestamp': '2025-10-02 00:58:57.346396', 'step': 26792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:57.432324', 'step': 26792, 'epoch': 3}
{'type': 'loss', 'content': 0.0587291494011879, 'timestamp': '2025-10-02 00:58:57.436123', 'step': 26793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:57.499628', 'step': 26793, 'epoch': 3}
{'type': 'loss', 'content': 0.022176269441843033, 'timestamp': '2025-10-02 00:58:57.504779', 'step': 26794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:57.566009', 'step': 26794, 'epoch': 3}
{'type': 'loss', 'content': 0.08506707102060318, 'timestamp': '2025-10-02 00:58:57.571249', 'step': 26795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:57.635448', 'step': 26795, 'epoch': 3}
{'type': 'loss', 'content': 0.010840347036719322, 'timestamp': '2025-10-02 00:58:57.645008', 'step': 26796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:57.721881', 'step': 26796, 'epoch': 3}
{'type': 'loss', 'content': 0.03565952554345131, 'timestamp': '2025-10-02 00:58:57.739803', 'step': 26797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:57.817840', 'step': 26797, 'epoch': 3}
{'type': 'loss', 'content': 0.07845720648765564, 'timestamp': '2025-10-02 00:58:57.835318', 'step': 26798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:57.943490', 'step': 26798, 'epoch': 3}
{'type': 'loss', 'content': 0.11473226547241211, 'timestamp': '2025-10-02 00:58:57.960422', 'step': 26799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:58.072981', 'step': 26799, 'epoch': 3}
{'type': 'loss', 'content': 0.024864234030246735, 'timestamp': '2025-10-02 00:58:58.081229', 'step': 26800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:58.142529', 'step': 26800, 'epoch': 3}
{'type': 'loss', 'content': 0.02551678940653801, 'timestamp': '2025-10-02 00:58:58.149302', 'step': 26801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:58:58.209633', 'step': 26801, 'epoch': 3}
{'type': 'loss', 'content': 0.029196083545684814, 'timestamp': '2025-10-02 00:58:58.228507', 'step': 26802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:58.335594', 'step': 26802, 'epoch': 3}
{'type': 'loss', 'content': 0.018062295392155647, 'timestamp': '2025-10-02 00:58:58.340833', 'step': 26803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:58.414366', 'step': 26803, 'epoch': 3}
{'type': 'loss', 'content': 0.009690292179584503, 'timestamp': '2025-10-02 00:58:58.437609', 'step': 26804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:58:58.514019', 'step': 26804, 'epoch': 3}
{'type': 'loss', 'content': 0.03028206340968609, 'timestamp': '2025-10-02 00:58:58.519318', 'step': 26805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:58:58.610430', 'step': 26805, 'epoch': 3}
{'type': 'loss', 'content': 0.04056562855839729, 'timestamp': '2025-10-02 00:58:58.629693', 'step': 26806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:58:58.729204', 'step': 26806, 'epoch': 3}
{'type': 'loss', 'content': 0.029463056474924088, 'timestamp': '2025-10-02 00:58:58.739753', 'step': 26807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:58.802538', 'step': 26807, 'epoch': 3}
{'type': 'loss', 'content': 0.07773571461439133, 'timestamp': '2025-10-02 00:58:58.812565', 'step': 26808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:58.886453', 'step': 26808, 'epoch': 3}
{'type': 'loss', 'content': 0.05807027593255043, 'timestamp': '2025-10-02 00:58:58.890817', 'step': 26809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:58:58.969304', 'step': 26809, 'epoch': 3}
{'type': 'loss', 'content': 0.029972048476338387, 'timestamp': '2025-10-02 00:58:58.975899', 'step': 26810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:59.039116', 'step': 26810, 'epoch': 3}
{'type': 'loss', 'content': 0.03839194029569626, 'timestamp': '2025-10-02 00:58:59.043329', 'step': 26811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:58:59.150352', 'step': 26811, 'epoch': 3}
{'type': 'loss', 'content': 0.01209396030753851, 'timestamp': '2025-10-02 00:58:59.160141', 'step': 26812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:58:59.274752', 'step': 26812, 'epoch': 3}
{'type': 'loss', 'content': 0.052869830280542374, 'timestamp': '2025-10-02 00:58:59.283426', 'step': 26813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:58:59.358891', 'step': 26813, 'epoch': 3}
{'type': 'loss', 'content': 0.07903636991977692, 'timestamp': '2025-10-02 00:58:59.362781', 'step': 26814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:58:59.424092', 'step': 26814, 'epoch': 3}
{'type': 'loss', 'content': 0.06594433635473251, 'timestamp': '2025-10-02 00:58:59.440583', 'step': 26815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:58:59.503736', 'step': 26815, 'epoch': 3}
{'type': 'loss', 'content': 0.0036254017613828182, 'timestamp': '2025-10-02 00:58:59.511475', 'step': 26816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:59.586426', 'step': 26816, 'epoch': 3}
{'type': 'loss', 'content': 0.04680779576301575, 'timestamp': '2025-10-02 00:58:59.591528', 'step': 26817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:58:59.656520', 'step': 26817, 'epoch': 3}
{'type': 'loss', 'content': 0.011896690353751183, 'timestamp': '2025-10-02 00:58:59.661672', 'step': 26818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:58:59.778015', 'step': 26818, 'epoch': 3}
{'type': 'loss', 'content': 0.10854586958885193, 'timestamp': '2025-10-02 00:58:59.783519', 'step': 26819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:58:59.849067', 'step': 26819, 'epoch': 3}
{'type': 'loss', 'content': 0.07076282799243927, 'timestamp': '2025-10-02 00:58:59.859987', 'step': 26820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:58:59.934728', 'step': 26820, 'epoch': 3}
{'type': 'loss', 'content': 0.09368254244327545, 'timestamp': '2025-10-02 00:58:59.938256', 'step': 26821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:00.003065', 'step': 26821, 'epoch': 3}
{'type': 'loss', 'content': 0.013329154811799526, 'timestamp': '2025-10-02 00:59:00.010011', 'step': 26822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:00.088002', 'step': 26822, 'epoch': 3}
{'type': 'loss', 'content': 0.04515518248081207, 'timestamp': '2025-10-02 00:59:00.093335', 'step': 26823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:00.175682', 'step': 26823, 'epoch': 3}
{'type': 'loss', 'content': 0.013343474827706814, 'timestamp': '2025-10-02 00:59:00.186882', 'step': 26824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:00.255542', 'step': 26824, 'epoch': 3}
{'type': 'loss', 'content': 0.03465992957353592, 'timestamp': '2025-10-02 00:59:00.266871', 'step': 26825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:00.394981', 'step': 26825, 'epoch': 3}
{'type': 'loss', 'content': 0.10971413552761078, 'timestamp': '2025-10-02 00:59:00.415611', 'step': 26826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:00.491714', 'step': 26826, 'epoch': 3}
{'type': 'loss', 'content': 0.061169303953647614, 'timestamp': '2025-10-02 00:59:00.496636', 'step': 26827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:00.600172', 'step': 26827, 'epoch': 3}
{'type': 'loss', 'content': 0.06283075362443924, 'timestamp': '2025-10-02 00:59:00.608840', 'step': 26828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:00.703883', 'step': 26828, 'epoch': 3}
{'type': 'loss', 'content': 0.020596230402588844, 'timestamp': '2025-10-02 00:59:00.710365', 'step': 26829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:00.785856', 'step': 26829, 'epoch': 3}
{'type': 'loss', 'content': 0.04035396873950958, 'timestamp': '2025-10-02 00:59:00.790459', 'step': 26830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:00.910329', 'step': 26830, 'epoch': 3}
{'type': 'loss', 'content': 0.05312930792570114, 'timestamp': '2025-10-02 00:59:00.914652', 'step': 26831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:01.013742', 'step': 26831, 'epoch': 3}
{'type': 'loss', 'content': 0.07715417444705963, 'timestamp': '2025-10-02 00:59:01.033561', 'step': 26832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:01.112620', 'step': 26832, 'epoch': 3}
{'type': 'loss', 'content': 0.019123448058962822, 'timestamp': '2025-10-02 00:59:01.123527', 'step': 26833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:01.197634', 'step': 26833, 'epoch': 3}
{'type': 'loss', 'content': 0.014042683877050877, 'timestamp': '2025-10-02 00:59:01.204984', 'step': 26834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:01.277222', 'step': 26834, 'epoch': 3}
{'type': 'loss', 'content': 0.013829695992171764, 'timestamp': '2025-10-02 00:59:01.284019', 'step': 26835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:01.375702', 'step': 26835, 'epoch': 3}
{'type': 'loss', 'content': 0.01977737434208393, 'timestamp': '2025-10-02 00:59:01.395660', 'step': 26836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:01.500752', 'step': 26836, 'epoch': 3}
{'type': 'loss', 'content': 0.011359110474586487, 'timestamp': '2025-10-02 00:59:01.518590', 'step': 26837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:01.597903', 'step': 26837, 'epoch': 3}
{'type': 'loss', 'content': 0.02152320370078087, 'timestamp': '2025-10-02 00:59:01.613556', 'step': 26838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:01.674118', 'step': 26838, 'epoch': 3}
{'type': 'loss', 'content': 0.030251238495111465, 'timestamp': '2025-10-02 00:59:01.679898', 'step': 26839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:01.754414', 'step': 26839, 'epoch': 3}
{'type': 'loss', 'content': 0.057653527706861496, 'timestamp': '2025-10-02 00:59:01.762276', 'step': 26840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:01.834039', 'step': 26840, 'epoch': 3}
{'type': 'loss', 'content': 0.012504767626523972, 'timestamp': '2025-10-02 00:59:01.849759', 'step': 26841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:01.912523', 'step': 26841, 'epoch': 3}
{'type': 'loss', 'content': 0.09996385127305984, 'timestamp': '2025-10-02 00:59:01.929091', 'step': 26842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:02.020919', 'step': 26842, 'epoch': 3}
{'type': 'loss', 'content': 0.008518416434526443, 'timestamp': '2025-10-02 00:59:02.038002', 'step': 26843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:02.113653', 'step': 26843, 'epoch': 3}
{'type': 'loss', 'content': 0.026052327826619148, 'timestamp': '2025-10-02 00:59:02.121615', 'step': 26844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:02.189964', 'step': 26844, 'epoch': 3}
{'type': 'loss', 'content': 0.09616247564554214, 'timestamp': '2025-10-02 00:59:02.194901', 'step': 26845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:02.260276', 'step': 26845, 'epoch': 3}
{'type': 'loss', 'content': 0.02150682359933853, 'timestamp': '2025-10-02 00:59:02.278447', 'step': 26846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:02.384644', 'step': 26846, 'epoch': 3}
{'type': 'loss', 'content': 0.035475000739097595, 'timestamp': '2025-10-02 00:59:02.390998', 'step': 26847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:02.482037', 'step': 26847, 'epoch': 3}
{'type': 'loss', 'content': 0.00038631082861684263, 'timestamp': '2025-10-02 00:59:02.491431', 'step': 26848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:59:02.586560', 'step': 26848, 'epoch': 3}
{'type': 'loss', 'content': 0.0001993243204196915, 'timestamp': '2025-10-02 00:59:02.601123', 'step': 26849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:02.698218', 'step': 26849, 'epoch': 3}
{'type': 'loss', 'content': 0.03391365706920624, 'timestamp': '2025-10-02 00:59:02.702825', 'step': 26850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:02.776032', 'step': 26850, 'epoch': 3}
{'type': 'loss', 'content': 0.05613734945654869, 'timestamp': '2025-10-02 00:59:02.785010', 'step': 26851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:02.889358', 'step': 26851, 'epoch': 3}
{'type': 'loss', 'content': 0.08093615621328354, 'timestamp': '2025-10-02 00:59:02.912049', 'step': 26852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:03.003351', 'step': 26852, 'epoch': 3}
{'type': 'loss', 'content': 0.03522540256381035, 'timestamp': '2025-10-02 00:59:03.007536', 'step': 26853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:03.094804', 'step': 26853, 'epoch': 3}
{'type': 'loss', 'content': 0.0019067145185545087, 'timestamp': '2025-10-02 00:59:03.101351', 'step': 26854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:59:03.179929', 'step': 26854, 'epoch': 3}
{'type': 'loss', 'content': 0.01523969043046236, 'timestamp': '2025-10-02 00:59:03.190754', 'step': 26855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:03.251639', 'step': 26855, 'epoch': 3}
{'type': 'loss', 'content': 0.012151911854743958, 'timestamp': '2025-10-02 00:59:03.259459', 'step': 26856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:03.359348', 'step': 26856, 'epoch': 3}
{'type': 'loss', 'content': 0.014335288666188717, 'timestamp': '2025-10-02 00:59:03.364724', 'step': 26857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:03.425116', 'step': 26857, 'epoch': 3}
{'type': 'loss', 'content': 0.09393326193094254, 'timestamp': '2025-10-02 00:59:03.428699', 'step': 26858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:03.533287', 'step': 26858, 'epoch': 3}
{'type': 'loss', 'content': 0.08223015815019608, 'timestamp': '2025-10-02 00:59:03.543689', 'step': 26859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:03.613826', 'step': 26859, 'epoch': 3}
{'type': 'loss', 'content': 0.04735405743122101, 'timestamp': '2025-10-02 00:59:03.631397', 'step': 26860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:03.705076', 'step': 26860, 'epoch': 3}
{'type': 'loss', 'content': 0.08579499274492264, 'timestamp': '2025-10-02 00:59:03.710183', 'step': 26861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:03.797910', 'step': 26861, 'epoch': 3}
{'type': 'loss', 'content': 0.04871275648474693, 'timestamp': '2025-10-02 00:59:03.807413', 'step': 26862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:03.888812', 'step': 26862, 'epoch': 3}
{'type': 'loss', 'content': 0.09370215237140656, 'timestamp': '2025-10-02 00:59:03.893128', 'step': 26863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:03.953880', 'step': 26863, 'epoch': 3}
{'type': 'loss', 'content': 0.05414803326129913, 'timestamp': '2025-10-02 00:59:03.974821', 'step': 26864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:04.046778', 'step': 26864, 'epoch': 3}
{'type': 'loss', 'content': 0.05485687777400017, 'timestamp': '2025-10-02 00:59:04.050715', 'step': 26865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:04.120991', 'step': 26865, 'epoch': 3}
{'type': 'loss', 'content': 0.039069000631570816, 'timestamp': '2025-10-02 00:59:04.135569', 'step': 26866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:04.231653', 'step': 26866, 'epoch': 3}
{'type': 'loss', 'content': 0.05938417837023735, 'timestamp': '2025-10-02 00:59:04.235328', 'step': 26867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:04.307545', 'step': 26867, 'epoch': 3}
{'type': 'loss', 'content': 0.045296598225831985, 'timestamp': '2025-10-02 00:59:04.314836', 'step': 26868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:04.398916', 'step': 26868, 'epoch': 3}
{'type': 'loss', 'content': 0.047120995819568634, 'timestamp': '2025-10-02 00:59:04.403056', 'step': 26869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:59:04.462057', 'step': 26869, 'epoch': 3}
{'type': 'loss', 'content': 0.11215946823358536, 'timestamp': '2025-10-02 00:59:04.465235', 'step': 26870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:04.525527', 'step': 26870, 'epoch': 3}
{'type': 'loss', 'content': 0.08431828767061234, 'timestamp': '2025-10-02 00:59:04.538949', 'step': 26871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:04.611913', 'step': 26871, 'epoch': 3}
{'type': 'loss', 'content': 0.08616101741790771, 'timestamp': '2025-10-02 00:59:04.619851', 'step': 26872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:04.696006', 'step': 26872, 'epoch': 3}
{'type': 'loss', 'content': 0.030178245157003403, 'timestamp': '2025-10-02 00:59:04.699587', 'step': 26873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:04.776209', 'step': 26873, 'epoch': 3}
{'type': 'loss', 'content': 0.006002048961818218, 'timestamp': '2025-10-02 00:59:04.790834', 'step': 26874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:59:04.873981', 'step': 26874, 'epoch': 3}
{'type': 'loss', 'content': 0.042459145188331604, 'timestamp': '2025-10-02 00:59:04.878356', 'step': 26875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:04.953347', 'step': 26875, 'epoch': 3}
{'type': 'loss', 'content': 0.05420852452516556, 'timestamp': '2025-10-02 00:59:04.970568', 'step': 26876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:05.031951', 'step': 26876, 'epoch': 3}
{'type': 'loss', 'content': 0.042262207716703415, 'timestamp': '2025-10-02 00:59:05.035039', 'step': 26877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:05.118751', 'step': 26877, 'epoch': 3}
{'type': 'loss', 'content': 0.06729839742183685, 'timestamp': '2025-10-02 00:59:05.123559', 'step': 26878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:05.195423', 'step': 26878, 'epoch': 3}
{'type': 'loss', 'content': 0.059363193809986115, 'timestamp': '2025-10-02 00:59:05.209526', 'step': 26879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:59:05.279888', 'step': 26879, 'epoch': 3}
{'type': 'loss', 'content': 0.037432849407196045, 'timestamp': '2025-10-02 00:59:05.286291', 'step': 26880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:05.358298', 'step': 26880, 'epoch': 3}
{'type': 'loss', 'content': 0.0013379937736317515, 'timestamp': '2025-10-02 00:59:05.368520', 'step': 26881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:05.429360', 'step': 26881, 'epoch': 3}
{'type': 'loss', 'content': 0.03651829808950424, 'timestamp': '2025-10-02 00:59:05.435121', 'step': 26882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:05.514718', 'step': 26882, 'epoch': 3}
{'type': 'loss', 'content': 0.13875886797904968, 'timestamp': '2025-10-02 00:59:05.523146', 'step': 26883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:05.588815', 'step': 26883, 'epoch': 3}
{'type': 'loss', 'content': 0.03454740718007088, 'timestamp': '2025-10-02 00:59:05.596927', 'step': 26884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:05.684400', 'step': 26884, 'epoch': 3}
{'type': 'loss', 'content': 0.02184910885989666, 'timestamp': '2025-10-02 00:59:05.693597', 'step': 26885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:05.755505', 'step': 26885, 'epoch': 3}
{'type': 'loss', 'content': 0.033769186586141586, 'timestamp': '2025-10-02 00:59:05.760865', 'step': 26886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:05.839915', 'step': 26886, 'epoch': 3}
{'type': 'loss', 'content': 0.008592595346271992, 'timestamp': '2025-10-02 00:59:05.848913', 'step': 26887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:05.917681', 'step': 26887, 'epoch': 3}
{'type': 'loss', 'content': 0.08294088393449783, 'timestamp': '2025-10-02 00:59:05.937299', 'step': 26888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:06.010625', 'step': 26888, 'epoch': 3}
{'type': 'loss', 'content': 0.04963674396276474, 'timestamp': '2025-10-02 00:59:06.020420', 'step': 26889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:06.102581', 'step': 26889, 'epoch': 3}
{'type': 'loss', 'content': 0.007757055573165417, 'timestamp': '2025-10-02 00:59:06.111662', 'step': 26890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:06.210854', 'step': 26890, 'epoch': 3}
{'type': 'loss', 'content': 0.09261395782232285, 'timestamp': '2025-10-02 00:59:06.226093', 'step': 26891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:06.298671', 'step': 26891, 'epoch': 3}
{'type': 'loss', 'content': 0.03545457124710083, 'timestamp': '2025-10-02 00:59:06.317537', 'step': 26892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:06.388812', 'step': 26892, 'epoch': 3}
{'type': 'loss', 'content': 0.06642194837331772, 'timestamp': '2025-10-02 00:59:06.397890', 'step': 26893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:06.463505', 'step': 26893, 'epoch': 3}
{'type': 'loss', 'content': 0.011576134711503983, 'timestamp': '2025-10-02 00:59:06.469235', 'step': 26894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:06.531121', 'step': 26894, 'epoch': 3}
{'type': 'loss', 'content': 0.02757336013019085, 'timestamp': '2025-10-02 00:59:06.536453', 'step': 26895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:06.620751', 'step': 26895, 'epoch': 3}
{'type': 'loss', 'content': 0.09738881886005402, 'timestamp': '2025-10-02 00:59:06.630249', 'step': 26896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:06.695658', 'step': 26896, 'epoch': 3}
{'type': 'loss', 'content': 0.03003787249326706, 'timestamp': '2025-10-02 00:59:06.706610', 'step': 26897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:06.796268', 'step': 26897, 'epoch': 3}
{'type': 'loss', 'content': 0.031479012221097946, 'timestamp': '2025-10-02 00:59:06.817030', 'step': 26898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:06.880560', 'step': 26898, 'epoch': 3}
{'type': 'loss', 'content': 0.0909232348203659, 'timestamp': '2025-10-02 00:59:06.885464', 'step': 26899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:06.964340', 'step': 26899, 'epoch': 3}
{'type': 'loss', 'content': 0.027701878920197487, 'timestamp': '2025-10-02 00:59:06.974646', 'step': 26900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:07.100281', 'step': 26900, 'epoch': 3}
{'type': 'loss', 'content': 0.018747640773653984, 'timestamp': '2025-10-02 00:59:07.111124', 'step': 26901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:59:07.219922', 'step': 26901, 'epoch': 3}
{'type': 'loss', 'content': 0.016700655221939087, 'timestamp': '2025-10-02 00:59:07.223986', 'step': 26902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:07.319765', 'step': 26902, 'epoch': 3}
{'type': 'loss', 'content': 0.04187662526965141, 'timestamp': '2025-10-02 00:59:07.324745', 'step': 26903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:07.431551', 'step': 26903, 'epoch': 3}
{'type': 'loss', 'content': 0.05515670031309128, 'timestamp': '2025-10-02 00:59:07.440959', 'step': 26904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:07.533360', 'step': 26904, 'epoch': 3}
{'type': 'loss', 'content': 0.021588323637843132, 'timestamp': '2025-10-02 00:59:07.540401', 'step': 26905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:07.612948', 'step': 26905, 'epoch': 3}
{'type': 'loss', 'content': 0.1643057018518448, 'timestamp': '2025-10-02 00:59:07.618009', 'step': 26906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:59:07.731283', 'step': 26906, 'epoch': 3}
{'type': 'loss', 'content': 0.03489238768815994, 'timestamp': '2025-10-02 00:59:07.750927', 'step': 26907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:07.815026', 'step': 26907, 'epoch': 3}
{'type': 'loss', 'content': 0.06152293458580971, 'timestamp': '2025-10-02 00:59:07.822791', 'step': 26908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:07.928219', 'step': 26908, 'epoch': 3}
{'type': 'loss', 'content': 0.035459600389003754, 'timestamp': '2025-10-02 00:59:07.933651', 'step': 26909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:08.008147', 'step': 26909, 'epoch': 3}
{'type': 'loss', 'content': 0.11656646430492401, 'timestamp': '2025-10-02 00:59:08.012427', 'step': 26910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:08.073929', 'step': 26910, 'epoch': 3}
{'type': 'loss', 'content': 0.03758540749549866, 'timestamp': '2025-10-02 00:59:08.079473', 'step': 26911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:08.155708', 'step': 26911, 'epoch': 3}
{'type': 'loss', 'content': 0.02294226735830307, 'timestamp': '2025-10-02 00:59:08.165391', 'step': 26912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:08.226619', 'step': 26912, 'epoch': 3}
{'type': 'loss', 'content': 0.0855160802602768, 'timestamp': '2025-10-02 00:59:08.231586', 'step': 26913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:08.308681', 'step': 26913, 'epoch': 3}
{'type': 'loss', 'content': 0.01555569376796484, 'timestamp': '2025-10-02 00:59:08.317751', 'step': 26914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:08.418351', 'step': 26914, 'epoch': 3}
{'type': 'loss', 'content': 0.036348242312669754, 'timestamp': '2025-10-02 00:59:08.422739', 'step': 26915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:59:08.492057', 'step': 26915, 'epoch': 3}
{'type': 'loss', 'content': 0.04658307880163193, 'timestamp': '2025-10-02 00:59:08.512853', 'step': 26916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:08.603126', 'step': 26916, 'epoch': 3}
{'type': 'loss', 'content': 0.10046514123678207, 'timestamp': '2025-10-02 00:59:08.610198', 'step': 26917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:08.702352', 'step': 26917, 'epoch': 3}
{'type': 'loss', 'content': 0.03629222512245178, 'timestamp': '2025-10-02 00:59:08.711020', 'step': 26918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:08.793080', 'step': 26918, 'epoch': 3}
{'type': 'loss', 'content': 0.011881077662110329, 'timestamp': '2025-10-02 00:59:08.803212', 'step': 26919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:08.898790', 'step': 26919, 'epoch': 3}
{'type': 'loss', 'content': 0.02322535403072834, 'timestamp': '2025-10-02 00:59:08.916290', 'step': 26920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:08.989678', 'step': 26920, 'epoch': 3}
{'type': 'loss', 'content': 0.030855705961585045, 'timestamp': '2025-10-02 00:59:09.005679', 'step': 26921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:09.087419', 'step': 26921, 'epoch': 3}
{'type': 'loss', 'content': 0.045017655938863754, 'timestamp': '2025-10-02 00:59:09.103507', 'step': 26922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:09.189080', 'step': 26922, 'epoch': 3}
{'type': 'loss', 'content': 0.02945644035935402, 'timestamp': '2025-10-02 00:59:09.224819', 'step': 26923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:09.336256', 'step': 26923, 'epoch': 3}
{'type': 'loss', 'content': 0.0533146895468235, 'timestamp': '2025-10-02 00:59:09.357773', 'step': 26924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:59:09.439078', 'step': 26924, 'epoch': 3}
{'type': 'loss', 'content': 0.0958520695567131, 'timestamp': '2025-10-02 00:59:09.461884', 'step': 26925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:09.557582', 'step': 26925, 'epoch': 3}
{'type': 'loss', 'content': 0.011673485860228539, 'timestamp': '2025-10-02 00:59:09.579302', 'step': 26926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:09.664757', 'step': 26926, 'epoch': 3}
{'type': 'loss', 'content': 0.1343381106853485, 'timestamp': '2025-10-02 00:59:09.673663', 'step': 26927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:09.763032', 'step': 26927, 'epoch': 3}
{'type': 'loss', 'content': 0.05194840207695961, 'timestamp': '2025-10-02 00:59:09.778215', 'step': 26928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:09.852807', 'step': 26928, 'epoch': 3}
{'type': 'loss', 'content': 0.03212941065430641, 'timestamp': '2025-10-02 00:59:09.863341', 'step': 26929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:09.946203', 'step': 26929, 'epoch': 3}
{'type': 'loss', 'content': 0.06521265208721161, 'timestamp': '2025-10-02 00:59:09.959240', 'step': 26930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:10.048849', 'step': 26930, 'epoch': 3}
{'type': 'loss', 'content': 0.030198439955711365, 'timestamp': '2025-10-02 00:59:10.054664', 'step': 26931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:10.125133', 'step': 26931, 'epoch': 3}
{'type': 'loss', 'content': 0.09948170185089111, 'timestamp': '2025-10-02 00:59:10.134923', 'step': 26932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:59:10.203251', 'step': 26932, 'epoch': 3}
{'type': 'loss', 'content': 0.011461514979600906, 'timestamp': '2025-10-02 00:59:10.214910', 'step': 26933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:10.297185', 'step': 26933, 'epoch': 3}
{'type': 'loss', 'content': 0.06244571506977081, 'timestamp': '2025-10-02 00:59:10.303364', 'step': 26934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:10.368872', 'step': 26934, 'epoch': 3}
{'type': 'loss', 'content': 0.022853905335068703, 'timestamp': '2025-10-02 00:59:10.381907', 'step': 26935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:10.501615', 'step': 26935, 'epoch': 3}
{'type': 'loss', 'content': 0.0385562963783741, 'timestamp': '2025-10-02 00:59:10.509721', 'step': 26936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:59:10.601344', 'step': 26936, 'epoch': 3}
{'type': 'loss', 'content': 0.05064139515161514, 'timestamp': '2025-10-02 00:59:10.613014', 'step': 26937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:10.675170', 'step': 26937, 'epoch': 3}
{'type': 'loss', 'content': 0.06923017650842667, 'timestamp': '2025-10-02 00:59:10.680482', 'step': 26938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:10.742879', 'step': 26938, 'epoch': 3}
{'type': 'loss', 'content': 0.029710233211517334, 'timestamp': '2025-10-02 00:59:10.747703', 'step': 26939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:10.809166', 'step': 26939, 'epoch': 3}
{'type': 'loss', 'content': 0.057021427899599075, 'timestamp': '2025-10-02 00:59:10.818302', 'step': 26940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:10.878727', 'step': 26940, 'epoch': 3}
{'type': 'loss', 'content': 0.027815690264105797, 'timestamp': '2025-10-02 00:59:10.883237', 'step': 26941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:10.952831', 'step': 26941, 'epoch': 3}
{'type': 'loss', 'content': 0.05475145950913429, 'timestamp': '2025-10-02 00:59:10.968451', 'step': 26942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:11.056514', 'step': 26942, 'epoch': 3}
{'type': 'loss', 'content': 0.023476144298911095, 'timestamp': '2025-10-02 00:59:11.071277', 'step': 26943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:11.155734', 'step': 26943, 'epoch': 3}
{'type': 'loss', 'content': 0.028762448579072952, 'timestamp': '2025-10-02 00:59:11.175375', 'step': 26944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:11.250740', 'step': 26944, 'epoch': 3}
{'type': 'loss', 'content': 0.033253494650125504, 'timestamp': '2025-10-02 00:59:11.260576', 'step': 26945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:11.320359', 'step': 26945, 'epoch': 3}
{'type': 'loss', 'content': 0.08055025339126587, 'timestamp': '2025-10-02 00:59:11.324642', 'step': 26946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:11.401139', 'step': 26946, 'epoch': 3}
{'type': 'loss', 'content': 0.03783524036407471, 'timestamp': '2025-10-02 00:59:11.404666', 'step': 26947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 00:59:11.479187', 'step': 26947, 'epoch': 3}
{'type': 'loss', 'content': 0.04697369039058685, 'timestamp': '2025-10-02 00:59:11.496585', 'step': 26948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:11.557776', 'step': 26948, 'epoch': 3}
{'type': 'loss', 'content': 0.030523348599672318, 'timestamp': '2025-10-02 00:59:11.560960', 'step': 26949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:11.636019', 'step': 26949, 'epoch': 3}
{'type': 'loss', 'content': 0.020561454817652702, 'timestamp': '2025-10-02 00:59:11.645496', 'step': 26950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:11.710523', 'step': 26950, 'epoch': 3}
{'type': 'loss', 'content': 0.06424524635076523, 'timestamp': '2025-10-02 00:59:11.714793', 'step': 26951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:11.774316', 'step': 26951, 'epoch': 3}
{'type': 'loss', 'content': 0.013104853220283985, 'timestamp': '2025-10-02 00:59:11.781703', 'step': 26952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:11.841059', 'step': 26952, 'epoch': 3}
{'type': 'loss', 'content': 0.021028460934758186, 'timestamp': '2025-10-02 00:59:11.844217', 'step': 26953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:11.936288', 'step': 26953, 'epoch': 3}
{'type': 'loss', 'content': 0.10105008631944656, 'timestamp': '2025-10-02 00:59:11.940746', 'step': 26954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:12.001376', 'step': 26954, 'epoch': 3}
{'type': 'loss', 'content': 0.02376735955476761, 'timestamp': '2025-10-02 00:59:12.004845', 'step': 26955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:12.085357', 'step': 26955, 'epoch': 3}
{'type': 'loss', 'content': 0.01416812464594841, 'timestamp': '2025-10-02 00:59:12.094569', 'step': 26956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:12.167168', 'step': 26956, 'epoch': 3}
{'type': 'loss', 'content': 0.0339219830930233, 'timestamp': '2025-10-02 00:59:12.177020', 'step': 26957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:12.240519', 'step': 26957, 'epoch': 3}
{'type': 'loss', 'content': 0.02104758657515049, 'timestamp': '2025-10-02 00:59:12.243847', 'step': 26958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:12.318378', 'step': 26958, 'epoch': 3}
{'type': 'loss', 'content': 0.02216123789548874, 'timestamp': '2025-10-02 00:59:12.323676', 'step': 26959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:12.386272', 'step': 26959, 'epoch': 3}
{'type': 'loss', 'content': 0.052245769649744034, 'timestamp': '2025-10-02 00:59:12.394076', 'step': 26960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:12.454264', 'step': 26960, 'epoch': 3}
{'type': 'loss', 'content': 0.0009566626977175474, 'timestamp': '2025-10-02 00:59:12.459794', 'step': 26961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:12.536591', 'step': 26961, 'epoch': 3}
{'type': 'loss', 'content': 0.11329338699579239, 'timestamp': '2025-10-02 00:59:12.548506', 'step': 26962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:12.619320', 'step': 26962, 'epoch': 3}
{'type': 'loss', 'content': 0.03137226775288582, 'timestamp': '2025-10-02 00:59:12.627688', 'step': 26963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:12.692688', 'step': 26963, 'epoch': 3}
{'type': 'loss', 'content': 0.02443128637969494, 'timestamp': '2025-10-02 00:59:12.709269', 'step': 26964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:12.775705', 'step': 26964, 'epoch': 3}
{'type': 'loss', 'content': 0.08903563767671585, 'timestamp': '2025-10-02 00:59:12.782763', 'step': 26965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:12.847518', 'step': 26965, 'epoch': 3}
{'type': 'loss', 'content': 0.11576688289642334, 'timestamp': '2025-10-02 00:59:12.852660', 'step': 26966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:59:12.909721', 'step': 26966, 'epoch': 3}
{'type': 'loss', 'content': 0.07197962701320648, 'timestamp': '2025-10-02 00:59:12.918031', 'step': 26967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:12.976393', 'step': 26967, 'epoch': 3}
{'type': 'loss', 'content': 0.1082729697227478, 'timestamp': '2025-10-02 00:59:12.982834', 'step': 26968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:13.055267', 'step': 26968, 'epoch': 3}
{'type': 'loss', 'content': 0.03015180118381977, 'timestamp': '2025-10-02 00:59:13.060544', 'step': 26969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:13.125747', 'step': 26969, 'epoch': 3}
{'type': 'loss', 'content': 0.06615937501192093, 'timestamp': '2025-10-02 00:59:13.130981', 'step': 26970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:13.199532', 'step': 26970, 'epoch': 3}
{'type': 'loss', 'content': 0.0692409798502922, 'timestamp': '2025-10-02 00:59:13.208129', 'step': 26971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:13.266114', 'step': 26971, 'epoch': 3}
{'type': 'loss', 'content': 0.005948260426521301, 'timestamp': '2025-10-02 00:59:13.273648', 'step': 26972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:13.340686', 'step': 26972, 'epoch': 3}
{'type': 'loss', 'content': 0.1324930340051651, 'timestamp': '2025-10-02 00:59:13.345394', 'step': 26973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:13.404678', 'step': 26973, 'epoch': 3}
{'type': 'loss', 'content': 0.015887655317783356, 'timestamp': '2025-10-02 00:59:13.413724', 'step': 26974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:13.480139', 'step': 26974, 'epoch': 3}
{'type': 'loss', 'content': 0.066391721367836, 'timestamp': '2025-10-02 00:59:13.483367', 'step': 26975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:13.556624', 'step': 26975, 'epoch': 3}
{'type': 'loss', 'content': 0.030549796298146248, 'timestamp': '2025-10-02 00:59:13.565344', 'step': 26976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:13.628313', 'step': 26976, 'epoch': 3}
{'type': 'loss', 'content': 0.042168162763118744, 'timestamp': '2025-10-02 00:59:13.632120', 'step': 26977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:59:13.716890', 'step': 26977, 'epoch': 3}
{'type': 'loss', 'content': 0.10575128346681595, 'timestamp': '2025-10-02 00:59:13.720934', 'step': 26978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:13.787030', 'step': 26978, 'epoch': 3}
{'type': 'loss', 'content': 0.077246755361557, 'timestamp': '2025-10-02 00:59:13.790416', 'step': 26979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:13.862097', 'step': 26979, 'epoch': 3}
{'type': 'loss', 'content': 0.09482795745134354, 'timestamp': '2025-10-02 00:59:13.871338', 'step': 26980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:13.935519', 'step': 26980, 'epoch': 3}
{'type': 'loss', 'content': 0.022773800417780876, 'timestamp': '2025-10-02 00:59:13.946436', 'step': 26981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:14.005715', 'step': 26981, 'epoch': 3}
{'type': 'loss', 'content': 0.0277567021548748, 'timestamp': '2025-10-02 00:59:14.012651', 'step': 26982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:14.113262', 'step': 26982, 'epoch': 3}
{'type': 'loss', 'content': 0.0900728777050972, 'timestamp': '2025-10-02 00:59:14.117335', 'step': 26983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:14.216267', 'step': 26983, 'epoch': 3}
{'type': 'loss', 'content': 0.041553694754838943, 'timestamp': '2025-10-02 00:59:14.224533', 'step': 26984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:14.314861', 'step': 26984, 'epoch': 3}
{'type': 'loss', 'content': 0.008580354042351246, 'timestamp': '2025-10-02 00:59:14.317917', 'step': 26985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:14.387072', 'step': 26985, 'epoch': 3}
{'type': 'loss', 'content': 0.04275691881775856, 'timestamp': '2025-10-02 00:59:14.390762', 'step': 26986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:14.454742', 'step': 26986, 'epoch': 3}
{'type': 'loss', 'content': 0.007265312131494284, 'timestamp': '2025-10-02 00:59:14.464876', 'step': 26987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:14.532007', 'step': 26987, 'epoch': 3}
{'type': 'loss', 'content': 0.029991120100021362, 'timestamp': '2025-10-02 00:59:14.541602', 'step': 26988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:14.611508', 'step': 26988, 'epoch': 3}
{'type': 'loss', 'content': 0.0030783459078520536, 'timestamp': '2025-10-02 00:59:14.614411', 'step': 26989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:14.674476', 'step': 26989, 'epoch': 3}
{'type': 'loss', 'content': 0.01839951053261757, 'timestamp': '2025-10-02 00:59:14.684673', 'step': 26990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:14.763919', 'step': 26990, 'epoch': 3}
{'type': 'loss', 'content': 0.040209196507930756, 'timestamp': '2025-10-02 00:59:14.769342', 'step': 26991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:59:14.828532', 'step': 26991, 'epoch': 3}
{'type': 'loss', 'content': 0.06226464733481407, 'timestamp': '2025-10-02 00:59:14.836108', 'step': 26992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:14.922684', 'step': 26992, 'epoch': 3}
{'type': 'loss', 'content': 0.09792156517505646, 'timestamp': '2025-10-02 00:59:14.925736', 'step': 26993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:15.000169', 'step': 26993, 'epoch': 3}
{'type': 'loss', 'content': 0.03847505524754524, 'timestamp': '2025-10-02 00:59:15.006868', 'step': 26994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:15.073826', 'step': 26994, 'epoch': 3}
{'type': 'loss', 'content': 0.04384312406182289, 'timestamp': '2025-10-02 00:59:15.082436', 'step': 26995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:15.160980', 'step': 26995, 'epoch': 3}
{'type': 'loss', 'content': 0.0011337040923535824, 'timestamp': '2025-10-02 00:59:15.174075', 'step': 26996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:15.245180', 'step': 26996, 'epoch': 3}
{'type': 'loss', 'content': 0.01231453288346529, 'timestamp': '2025-10-02 00:59:15.252126', 'step': 26997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:15.327337', 'step': 26997, 'epoch': 3}
{'type': 'loss', 'content': 0.06617242097854614, 'timestamp': '2025-10-02 00:59:15.332048', 'step': 26998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:15.411092', 'step': 26998, 'epoch': 3}
{'type': 'loss', 'content': 0.061403077095746994, 'timestamp': '2025-10-02 00:59:15.427978', 'step': 26999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:15.495394', 'step': 26999, 'epoch': 3}
{'type': 'loss', 'content': 0.05138992890715599, 'timestamp': '2025-10-02 00:59:15.503275', 'step': 27000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 27000', 'timestamp': '2025-10-02 00:59:15.920508', 'step': 27000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:15.979373', 'step': 27000, 'epoch': 3}
{'type': 'loss', 'content': 0.010702972300350666, 'timestamp': '2025-10-02 00:59:15.988918', 'step': 27001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:16.047526', 'step': 27001, 'epoch': 3}
{'type': 'loss', 'content': 0.01206756941974163, 'timestamp': '2025-10-02 00:59:16.055513', 'step': 27002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:16.127241', 'step': 27002, 'epoch': 3}
{'type': 'loss', 'content': 0.06793304532766342, 'timestamp': '2025-10-02 00:59:16.129848', 'step': 27003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:16.195322', 'step': 27003, 'epoch': 3}
{'type': 'loss', 'content': 0.14673514664173126, 'timestamp': '2025-10-02 00:59:16.210014', 'step': 27004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:16.275225', 'step': 27004, 'epoch': 3}
{'type': 'loss', 'content': 0.05930495634675026, 'timestamp': '2025-10-02 00:59:16.278714', 'step': 27005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:16.349627', 'step': 27005, 'epoch': 3}
{'type': 'loss', 'content': 0.027754293754696846, 'timestamp': '2025-10-02 00:59:16.358248', 'step': 27006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:16.416332', 'step': 27006, 'epoch': 3}
{'type': 'loss', 'content': 0.004718669690191746, 'timestamp': '2025-10-02 00:59:16.423320', 'step': 27007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:16.493870', 'step': 27007, 'epoch': 3}
{'type': 'loss', 'content': 0.02697792276740074, 'timestamp': '2025-10-02 00:59:16.505101', 'step': 27008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:16.563738', 'step': 27008, 'epoch': 3}
{'type': 'loss', 'content': 0.03026081994175911, 'timestamp': '2025-10-02 00:59:16.566382', 'step': 27009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:16.630112', 'step': 27009, 'epoch': 3}
{'type': 'loss', 'content': 0.013308767229318619, 'timestamp': '2025-10-02 00:59:16.636842', 'step': 27010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:16.714399', 'step': 27010, 'epoch': 3}
{'type': 'loss', 'content': 0.09098732471466064, 'timestamp': '2025-10-02 00:59:16.725906', 'step': 27011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:16.818820', 'step': 27011, 'epoch': 3}
{'type': 'loss', 'content': 0.04270293563604355, 'timestamp': '2025-10-02 00:59:16.829094', 'step': 27012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:16.893423', 'step': 27012, 'epoch': 3}
{'type': 'loss', 'content': 0.0332743376493454, 'timestamp': '2025-10-02 00:59:16.905382', 'step': 27013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:16.982833', 'step': 27013, 'epoch': 3}
{'type': 'loss', 'content': 0.06277863681316376, 'timestamp': '2025-10-02 00:59:16.986443', 'step': 27014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:17.044888', 'step': 27014, 'epoch': 3}
{'type': 'loss', 'content': 0.049886349588632584, 'timestamp': '2025-10-02 00:59:17.048852', 'step': 27015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:17.113819', 'step': 27015, 'epoch': 3}
{'type': 'loss', 'content': 0.09663121402263641, 'timestamp': '2025-10-02 00:59:17.121070', 'step': 27016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:17.190657', 'step': 27016, 'epoch': 3}
{'type': 'loss', 'content': 0.07722529023885727, 'timestamp': '2025-10-02 00:59:17.195688', 'step': 27017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 00:59:17.275861', 'step': 27017, 'epoch': 3}
{'type': 'loss', 'content': 0.03467031195759773, 'timestamp': '2025-10-02 00:59:17.289237', 'step': 27018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:17.346436', 'step': 27018, 'epoch': 3}
{'type': 'loss', 'content': 0.024421371519565582, 'timestamp': '2025-10-02 00:59:17.350124', 'step': 27019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:59:17.414041', 'step': 27019, 'epoch': 3}
{'type': 'loss', 'content': 0.056400176137685776, 'timestamp': '2025-10-02 00:59:17.425231', 'step': 27020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:17.500839', 'step': 27020, 'epoch': 3}
{'type': 'loss', 'content': 0.01894346810877323, 'timestamp': '2025-10-02 00:59:17.511731', 'step': 27021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:17.577541', 'step': 27021, 'epoch': 3}
{'type': 'loss', 'content': 0.07793425023555756, 'timestamp': '2025-10-02 00:59:17.588028', 'step': 27022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:17.645815', 'step': 27022, 'epoch': 3}
{'type': 'loss', 'content': 0.01900026574730873, 'timestamp': '2025-10-02 00:59:17.656758', 'step': 27023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:59:17.726295', 'step': 27023, 'epoch': 3}
{'type': 'loss', 'content': 0.0097833052277565, 'timestamp': '2025-10-02 00:59:17.737764', 'step': 27024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:17.802873', 'step': 27024, 'epoch': 3}
{'type': 'loss', 'content': 0.017313972115516663, 'timestamp': '2025-10-02 00:59:17.807045', 'step': 27025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:17.867045', 'step': 27025, 'epoch': 3}
{'type': 'loss', 'content': 0.04090948775410652, 'timestamp': '2025-10-02 00:59:17.871946', 'step': 27026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:17.940610', 'step': 27026, 'epoch': 3}
{'type': 'loss', 'content': 0.07706272602081299, 'timestamp': '2025-10-02 00:59:17.953561', 'step': 27027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:18.039635', 'step': 27027, 'epoch': 3}
{'type': 'loss', 'content': 0.03527417778968811, 'timestamp': '2025-10-02 00:59:18.051670', 'step': 27028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:18.118846', 'step': 27028, 'epoch': 3}
{'type': 'loss', 'content': 0.03996311128139496, 'timestamp': '2025-10-02 00:59:18.122252', 'step': 27029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:18.206682', 'step': 27029, 'epoch': 3}
{'type': 'loss', 'content': 0.08694788068532944, 'timestamp': '2025-10-02 00:59:18.219635', 'step': 27030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:18.311716', 'step': 27030, 'epoch': 3}
{'type': 'loss', 'content': 0.08676688373088837, 'timestamp': '2025-10-02 00:59:18.315218', 'step': 27031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:18.376747', 'step': 27031, 'epoch': 3}
{'type': 'loss', 'content': 0.03376096859574318, 'timestamp': '2025-10-02 00:59:18.384482', 'step': 27032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:18.456624', 'step': 27032, 'epoch': 3}
{'type': 'loss', 'content': 0.038016241043806076, 'timestamp': '2025-10-02 00:59:18.463575', 'step': 27033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:18.550722', 'step': 27033, 'epoch': 3}
{'type': 'loss', 'content': 0.06488822400569916, 'timestamp': '2025-10-02 00:59:18.555986', 'step': 27034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:18.637183', 'step': 27034, 'epoch': 3}
{'type': 'loss', 'content': 0.05527201294898987, 'timestamp': '2025-10-02 00:59:18.654235', 'step': 27035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:18.725502', 'step': 27035, 'epoch': 3}
{'type': 'loss', 'content': 0.029069315642118454, 'timestamp': '2025-10-02 00:59:18.736575', 'step': 27036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:18.815620', 'step': 27036, 'epoch': 3}
{'type': 'loss', 'content': 0.01589578576385975, 'timestamp': '2025-10-02 00:59:18.830977', 'step': 27037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:18.908758', 'step': 27037, 'epoch': 3}
{'type': 'loss', 'content': 0.0237498227506876, 'timestamp': '2025-10-02 00:59:18.922228', 'step': 27038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:18.988777', 'step': 27038, 'epoch': 3}
{'type': 'loss', 'content': 0.00437093386426568, 'timestamp': '2025-10-02 00:59:19.001393', 'step': 27039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:19.077420', 'step': 27039, 'epoch': 3}
{'type': 'loss', 'content': 0.04244334250688553, 'timestamp': '2025-10-02 00:59:19.085139', 'step': 27040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:19.142515', 'step': 27040, 'epoch': 3}
{'type': 'loss', 'content': 0.02269618958234787, 'timestamp': '2025-10-02 00:59:19.152481', 'step': 27041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:19.209997', 'step': 27041, 'epoch': 3}
{'type': 'loss', 'content': 0.09114737808704376, 'timestamp': '2025-10-02 00:59:19.215127', 'step': 27042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:19.301875', 'step': 27042, 'epoch': 3}
{'type': 'loss', 'content': 0.020992640405893326, 'timestamp': '2025-10-02 00:59:19.309960', 'step': 27043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:19.385331', 'step': 27043, 'epoch': 3}
{'type': 'loss', 'content': 0.017039991915225983, 'timestamp': '2025-10-02 00:59:19.392866', 'step': 27044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:19.465514', 'step': 27044, 'epoch': 3}
{'type': 'loss', 'content': 0.12249013781547546, 'timestamp': '2025-10-02 00:59:19.477401', 'step': 27045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:19.549401', 'step': 27045, 'epoch': 3}
{'type': 'loss', 'content': 0.002302624052390456, 'timestamp': '2025-10-02 00:59:19.560979', 'step': 27046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:19.622780', 'step': 27046, 'epoch': 3}
{'type': 'loss', 'content': 0.012767038308084011, 'timestamp': '2025-10-02 00:59:19.631920', 'step': 27047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:19.695175', 'step': 27047, 'epoch': 3}
{'type': 'loss', 'content': 0.04011143743991852, 'timestamp': '2025-10-02 00:59:19.704064', 'step': 27048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:19.760857', 'step': 27048, 'epoch': 3}
{'type': 'loss', 'content': 0.1095946654677391, 'timestamp': '2025-10-02 00:59:19.764447', 'step': 27049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:19.827949', 'step': 27049, 'epoch': 3}
{'type': 'loss', 'content': 0.044540390372276306, 'timestamp': '2025-10-02 00:59:19.831328', 'step': 27050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:19.896561', 'step': 27050, 'epoch': 3}
{'type': 'loss', 'content': 0.0019984308164566755, 'timestamp': '2025-10-02 00:59:19.906092', 'step': 27051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:19.972899', 'step': 27051, 'epoch': 3}
{'type': 'loss', 'content': 0.02749503217637539, 'timestamp': '2025-10-02 00:59:19.987255', 'step': 27052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:20.055796', 'step': 27052, 'epoch': 3}
{'type': 'loss', 'content': 0.061988137662410736, 'timestamp': '2025-10-02 00:59:20.066752', 'step': 27053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:59:20.138804', 'step': 27053, 'epoch': 3}
{'type': 'loss', 'content': 0.059066466987133026, 'timestamp': '2025-10-02 00:59:20.142527', 'step': 27054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:20.211017', 'step': 27054, 'epoch': 3}
{'type': 'loss', 'content': 0.0794096440076828, 'timestamp': '2025-10-02 00:59:20.219709', 'step': 27055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:20.276690', 'step': 27055, 'epoch': 3}
{'type': 'loss', 'content': 0.031076576560735703, 'timestamp': '2025-10-02 00:59:20.284039', 'step': 27056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:59:20.361407', 'step': 27056, 'epoch': 3}
{'type': 'loss', 'content': 0.005969849415123463, 'timestamp': '2025-10-02 00:59:20.374384', 'step': 27057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:20.441655', 'step': 27057, 'epoch': 3}
{'type': 'loss', 'content': 0.004918673075735569, 'timestamp': '2025-10-02 00:59:20.444264', 'step': 27058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:20.520662', 'step': 27058, 'epoch': 3}
{'type': 'loss', 'content': 0.0017523211427032948, 'timestamp': '2025-10-02 00:59:20.529570', 'step': 27059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:20.615609', 'step': 27059, 'epoch': 3}
{'type': 'loss', 'content': 0.01110838446766138, 'timestamp': '2025-10-02 00:59:20.625518', 'step': 27060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:20.694419', 'step': 27060, 'epoch': 3}
{'type': 'loss', 'content': 0.060111839324235916, 'timestamp': '2025-10-02 00:59:20.701862', 'step': 27061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:20.768194', 'step': 27061, 'epoch': 3}
{'type': 'loss', 'content': 0.05633774772286415, 'timestamp': '2025-10-02 00:59:20.772930', 'step': 27062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:20.851224', 'step': 27062, 'epoch': 3}
{'type': 'loss', 'content': 0.03692208603024483, 'timestamp': '2025-10-02 00:59:20.864405', 'step': 27063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:20.945031', 'step': 27063, 'epoch': 3}
{'type': 'loss', 'content': 0.016021549701690674, 'timestamp': '2025-10-02 00:59:20.960234', 'step': 27064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:21.019079', 'step': 27064, 'epoch': 3}
{'type': 'loss', 'content': 0.056634169071912766, 'timestamp': '2025-10-02 00:59:21.021917', 'step': 27065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:21.092802', 'step': 27065, 'epoch': 3}
{'type': 'loss', 'content': 0.03803393244743347, 'timestamp': '2025-10-02 00:59:21.096582', 'step': 27066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:21.189014', 'step': 27066, 'epoch': 3}
{'type': 'loss', 'content': 0.06615092605352402, 'timestamp': '2025-10-02 00:59:21.195796', 'step': 27067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:21.273906', 'step': 27067, 'epoch': 3}
{'type': 'loss', 'content': 0.003327341750264168, 'timestamp': '2025-10-02 00:59:21.283435', 'step': 27068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:21.416177', 'step': 27068, 'epoch': 3}
{'type': 'loss', 'content': 0.010912120342254639, 'timestamp': '2025-10-02 00:59:21.420536', 'step': 27069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:21.505337', 'step': 27069, 'epoch': 3}
{'type': 'loss', 'content': 0.05981504172086716, 'timestamp': '2025-10-02 00:59:21.513468', 'step': 27070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:21.585147', 'step': 27070, 'epoch': 3}
{'type': 'loss', 'content': 0.03497011959552765, 'timestamp': '2025-10-02 00:59:21.594725', 'step': 27071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:21.672320', 'step': 27071, 'epoch': 3}
{'type': 'loss', 'content': 0.1505107581615448, 'timestamp': '2025-10-02 00:59:21.681948', 'step': 27072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:59:21.746867', 'step': 27072, 'epoch': 3}
{'type': 'loss', 'content': 0.037908680737018585, 'timestamp': '2025-10-02 00:59:21.750945', 'step': 27073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:21.811231', 'step': 27073, 'epoch': 3}
{'type': 'loss', 'content': 0.011232133954763412, 'timestamp': '2025-10-02 00:59:21.820724', 'step': 27074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:21.908293', 'step': 27074, 'epoch': 3}
{'type': 'loss', 'content': 0.02277853712439537, 'timestamp': '2025-10-02 00:59:21.917868', 'step': 27075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:22.006700', 'step': 27075, 'epoch': 3}
{'type': 'loss', 'content': 0.039255719631910324, 'timestamp': '2025-10-02 00:59:22.014326', 'step': 27076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:22.080952', 'step': 27076, 'epoch': 3}
{'type': 'loss', 'content': 0.03286409005522728, 'timestamp': '2025-10-02 00:59:22.095233', 'step': 27077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:22.176571', 'step': 27077, 'epoch': 3}
{'type': 'loss', 'content': 0.012063967995345592, 'timestamp': '2025-10-02 00:59:22.185776', 'step': 27078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:22.252811', 'step': 27078, 'epoch': 3}
{'type': 'loss', 'content': 0.05153105780482292, 'timestamp': '2025-10-02 00:59:22.255762', 'step': 27079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:22.316268', 'step': 27079, 'epoch': 3}
{'type': 'loss', 'content': 0.024213716387748718, 'timestamp': '2025-10-02 00:59:22.334245', 'step': 27080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:22.395831', 'step': 27080, 'epoch': 3}
{'type': 'loss', 'content': 0.015217643231153488, 'timestamp': '2025-10-02 00:59:22.404845', 'step': 27081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:22.478190', 'step': 27081, 'epoch': 3}
{'type': 'loss', 'content': 0.01255758386105299, 'timestamp': '2025-10-02 00:59:22.488659', 'step': 27082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:22.548348', 'step': 27082, 'epoch': 3}
{'type': 'loss', 'content': 0.028115980327129364, 'timestamp': '2025-10-02 00:59:22.552104', 'step': 27083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:22.622459', 'step': 27083, 'epoch': 3}
{'type': 'loss', 'content': 0.045000575482845306, 'timestamp': '2025-10-02 00:59:22.629754', 'step': 27084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:59:22.688501', 'step': 27084, 'epoch': 3}
{'type': 'loss', 'content': 0.06108158081769943, 'timestamp': '2025-10-02 00:59:22.692227', 'step': 27085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:22.761949', 'step': 27085, 'epoch': 3}
{'type': 'loss', 'content': 0.06990779936313629, 'timestamp': '2025-10-02 00:59:22.773835', 'step': 27086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 00:59:22.862878', 'step': 27086, 'epoch': 3}
{'type': 'loss', 'content': 0.03519963473081589, 'timestamp': '2025-10-02 00:59:22.866601', 'step': 27087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:22.925875', 'step': 27087, 'epoch': 3}
{'type': 'loss', 'content': 0.041007090359926224, 'timestamp': '2025-10-02 00:59:22.939601', 'step': 27088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:59:23.012190', 'step': 27088, 'epoch': 3}
{'type': 'loss', 'content': 0.02959960512816906, 'timestamp': '2025-10-02 00:59:23.023925', 'step': 27089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:23.091525', 'step': 27089, 'epoch': 3}
{'type': 'loss', 'content': 0.012313726358115673, 'timestamp': '2025-10-02 00:59:23.101729', 'step': 27090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:23.166790', 'step': 27090, 'epoch': 3}
{'type': 'loss', 'content': 0.03600317984819412, 'timestamp': '2025-10-02 00:59:23.171983', 'step': 27091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:23.243171', 'step': 27091, 'epoch': 3}
{'type': 'loss', 'content': 0.025761889293789864, 'timestamp': '2025-10-02 00:59:23.250422', 'step': 27092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:23.324069', 'step': 27092, 'epoch': 3}
{'type': 'loss', 'content': 0.06407808512449265, 'timestamp': '2025-10-02 00:59:23.328366', 'step': 27093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:23.393823', 'step': 27093, 'epoch': 3}
{'type': 'loss', 'content': 0.09292106330394745, 'timestamp': '2025-10-02 00:59:23.397266', 'step': 27094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:23.461811', 'step': 27094, 'epoch': 3}
{'type': 'loss', 'content': 0.042678408324718475, 'timestamp': '2025-10-02 00:59:23.465908', 'step': 27095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:23.527916', 'step': 27095, 'epoch': 3}
{'type': 'loss', 'content': 0.051151543855667114, 'timestamp': '2025-10-02 00:59:23.534703', 'step': 27096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:23.612361', 'step': 27096, 'epoch': 3}
{'type': 'loss', 'content': 0.0393909327685833, 'timestamp': '2025-10-02 00:59:23.621458', 'step': 27097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:23.680290', 'step': 27097, 'epoch': 3}
{'type': 'loss', 'content': 0.05124267563223839, 'timestamp': '2025-10-02 00:59:23.689662', 'step': 27098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:23.753595', 'step': 27098, 'epoch': 3}
{'type': 'loss', 'content': 0.0165067408233881, 'timestamp': '2025-10-02 00:59:23.763532', 'step': 27099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:23.830700', 'step': 27099, 'epoch': 3}
{'type': 'loss', 'content': 0.02911740355193615, 'timestamp': '2025-10-02 00:59:23.842445', 'step': 27100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:23.920513', 'step': 27100, 'epoch': 3}
{'type': 'loss', 'content': 0.05160163715481758, 'timestamp': '2025-10-02 00:59:23.923553', 'step': 27101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:23.987748', 'step': 27101, 'epoch': 3}
{'type': 'loss', 'content': 0.018770594149827957, 'timestamp': '2025-10-02 00:59:23.997546', 'step': 27102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:24.067885', 'step': 27102, 'epoch': 3}
{'type': 'loss', 'content': 0.009679351933300495, 'timestamp': '2025-10-02 00:59:24.071463', 'step': 27103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:59:24.132101', 'step': 27103, 'epoch': 3}
{'type': 'loss', 'content': 0.04478874057531357, 'timestamp': '2025-10-02 00:59:24.139490', 'step': 27104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:24.202427', 'step': 27104, 'epoch': 3}
{'type': 'loss', 'content': 0.036957740783691406, 'timestamp': '2025-10-02 00:59:24.213370', 'step': 27105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:24.270180', 'step': 27105, 'epoch': 3}
{'type': 'loss', 'content': 0.07232167571783066, 'timestamp': '2025-10-02 00:59:24.272824', 'step': 27106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 00:59:24.344335', 'step': 27106, 'epoch': 3}
{'type': 'loss', 'content': 0.024058543145656586, 'timestamp': '2025-10-02 00:59:24.356977', 'step': 27107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:24.424818', 'step': 27107, 'epoch': 3}
{'type': 'loss', 'content': 0.019412169232964516, 'timestamp': '2025-10-02 00:59:24.435162', 'step': 27108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:24.495964', 'step': 27108, 'epoch': 3}
{'type': 'loss', 'content': 0.062263596802949905, 'timestamp': '2025-10-02 00:59:24.507268', 'step': 27109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:24.572738', 'step': 27109, 'epoch': 3}
{'type': 'loss', 'content': 0.03084445744752884, 'timestamp': '2025-10-02 00:59:24.580235', 'step': 27110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:59:24.635655', 'step': 27110, 'epoch': 3}
{'type': 'loss', 'content': 0.02855813503265381, 'timestamp': '2025-10-02 00:59:24.638322', 'step': 27111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:24.695197', 'step': 27111, 'epoch': 3}
{'type': 'loss', 'content': 0.03163915500044823, 'timestamp': '2025-10-02 00:59:24.701759', 'step': 27112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:24.756509', 'step': 27112, 'epoch': 3}
{'type': 'loss', 'content': 0.0985071063041687, 'timestamp': '2025-10-02 00:59:24.759124', 'step': 27113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:24.815629', 'step': 27113, 'epoch': 3}
{'type': 'loss', 'content': 0.02870362065732479, 'timestamp': '2025-10-02 00:59:24.818624', 'step': 27114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:24.875145', 'step': 27114, 'epoch': 3}
{'type': 'loss', 'content': 0.02336074411869049, 'timestamp': '2025-10-02 00:59:24.880381', 'step': 27115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:24.939317', 'step': 27115, 'epoch': 3}
{'type': 'loss', 'content': 0.022197525948286057, 'timestamp': '2025-10-02 00:59:24.949625', 'step': 27116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:25.013509', 'step': 27116, 'epoch': 3}
{'type': 'loss', 'content': 0.04543299973011017, 'timestamp': '2025-10-02 00:59:25.018224', 'step': 27117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:25.084824', 'step': 27117, 'epoch': 3}
{'type': 'loss', 'content': 0.029841238632798195, 'timestamp': '2025-10-02 00:59:25.095252', 'step': 27118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:25.157045', 'step': 27118, 'epoch': 3}
{'type': 'loss', 'content': 0.0015720912488177419, 'timestamp': '2025-10-02 00:59:25.163861', 'step': 27119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:25.227733', 'step': 27119, 'epoch': 3}
{'type': 'loss', 'content': 0.023938002064824104, 'timestamp': '2025-10-02 00:59:25.236721', 'step': 27120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:59:25.299157', 'step': 27120, 'epoch': 3}
{'type': 'loss', 'content': 0.08531051129102707, 'timestamp': '2025-10-02 00:59:25.302429', 'step': 27121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:25.357854', 'step': 27121, 'epoch': 3}
{'type': 'loss', 'content': 0.041425444185733795, 'timestamp': '2025-10-02 00:59:25.360063', 'step': 27122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:25.414669', 'step': 27122, 'epoch': 3}
{'type': 'loss', 'content': 0.05520451068878174, 'timestamp': '2025-10-02 00:59:25.417397', 'step': 27123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:25.472433', 'step': 27123, 'epoch': 3}
{'type': 'loss', 'content': 0.029753832146525383, 'timestamp': '2025-10-02 00:59:25.478499', 'step': 27124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:59:25.539664', 'step': 27124, 'epoch': 3}
{'type': 'loss', 'content': 0.02803785912692547, 'timestamp': '2025-10-02 00:59:25.551372', 'step': 27125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:25.616076', 'step': 27125, 'epoch': 3}
{'type': 'loss', 'content': 0.005159453488886356, 'timestamp': '2025-10-02 00:59:25.621415', 'step': 27126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:25.685746', 'step': 27126, 'epoch': 3}
{'type': 'loss', 'content': 0.017554352059960365, 'timestamp': '2025-10-02 00:59:25.695914', 'step': 27127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:25.762336', 'step': 27127, 'epoch': 3}
{'type': 'loss', 'content': 0.039156243205070496, 'timestamp': '2025-10-02 00:59:25.768688', 'step': 27128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:25.825158', 'step': 27128, 'epoch': 3}
{'type': 'loss', 'content': 0.006583105772733688, 'timestamp': '2025-10-02 00:59:25.832255', 'step': 27129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:25.889861', 'step': 27129, 'epoch': 3}
{'type': 'loss', 'content': 0.01950177177786827, 'timestamp': '2025-10-02 00:59:25.895205', 'step': 27130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:25.953253', 'step': 27130, 'epoch': 3}
{'type': 'loss', 'content': 0.06281726062297821, 'timestamp': '2025-10-02 00:59:25.962314', 'step': 27131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:26.019269', 'step': 27131, 'epoch': 3}
{'type': 'loss', 'content': 0.007731439080089331, 'timestamp': '2025-10-02 00:59:26.029581', 'step': 27132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:26.087622', 'step': 27132, 'epoch': 3}
{'type': 'loss', 'content': 0.09725913405418396, 'timestamp': '2025-10-02 00:59:26.091073', 'step': 27133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:26.146379', 'step': 27133, 'epoch': 3}
{'type': 'loss', 'content': 0.031223654747009277, 'timestamp': '2025-10-02 00:59:26.148847', 'step': 27134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:26.203818', 'step': 27134, 'epoch': 3}
{'type': 'loss', 'content': 0.11010099202394485, 'timestamp': '2025-10-02 00:59:26.206847', 'step': 27135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:26.264585', 'step': 27135, 'epoch': 3}
{'type': 'loss', 'content': 0.07184231281280518, 'timestamp': '2025-10-02 00:59:26.272798', 'step': 27136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:26.336060', 'step': 27136, 'epoch': 3}
{'type': 'loss', 'content': 0.004351308103650808, 'timestamp': '2025-10-02 00:59:26.344998', 'step': 27137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:26.408844', 'step': 27137, 'epoch': 3}
{'type': 'loss', 'content': 0.0010553705506026745, 'timestamp': '2025-10-02 00:59:26.417996', 'step': 27138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:26.473095', 'step': 27138, 'epoch': 3}
{'type': 'loss', 'content': 0.021584032103419304, 'timestamp': '2025-10-02 00:59:26.475527', 'step': 27139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:26.537583', 'step': 27139, 'epoch': 3}
{'type': 'loss', 'content': 0.018740037456154823, 'timestamp': '2025-10-02 00:59:26.548737', 'step': 27140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:26.603302', 'step': 27140, 'epoch': 3}
{'type': 'loss', 'content': 0.009920800104737282, 'timestamp': '2025-10-02 00:59:26.605762', 'step': 27141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:26.668757', 'step': 27141, 'epoch': 3}
{'type': 'loss', 'content': 0.01845921203494072, 'timestamp': '2025-10-02 00:59:26.675787', 'step': 27142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:26.739087', 'step': 27142, 'epoch': 3}
{'type': 'loss', 'content': 0.017916889861226082, 'timestamp': '2025-10-02 00:59:26.744749', 'step': 27143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:26.813794', 'step': 27143, 'epoch': 3}
{'type': 'loss', 'content': 0.00849514827132225, 'timestamp': '2025-10-02 00:59:26.819910', 'step': 27144, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 00:59:54.194415', 'step': 27144, 'epoch': 3}
{'type': 'pplx', 'content': 90.61627407191783, 'timestamp': '2025-10-02 00:59:54.197737', 'step': 27144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:54.256291', 'step': 27144, 'epoch': 3}
{'type': 'loss', 'content': 0.0518590547144413, 'timestamp': '2025-10-02 00:59:54.267569', 'step': 27145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:54.323838', 'step': 27145, 'epoch': 3}
{'type': 'loss', 'content': 0.1187426820397377, 'timestamp': '2025-10-02 00:59:54.328646', 'step': 27146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:54.385507', 'step': 27146, 'epoch': 3}
{'type': 'loss', 'content': 0.059402499347925186, 'timestamp': '2025-10-02 00:59:54.388120', 'step': 27147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:54.442436', 'step': 27147, 'epoch': 3}
{'type': 'loss', 'content': 0.11524525284767151, 'timestamp': '2025-10-02 00:59:54.449043', 'step': 27148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:54.502945', 'step': 27148, 'epoch': 3}
{'type': 'loss', 'content': 0.022557619959115982, 'timestamp': '2025-10-02 00:59:54.512422', 'step': 27149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:54.567429', 'step': 27149, 'epoch': 3}
{'type': 'loss', 'content': 0.028797948732972145, 'timestamp': '2025-10-02 00:59:54.576710', 'step': 27150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:54.632984', 'step': 27150, 'epoch': 3}
{'type': 'loss', 'content': 0.04312101751565933, 'timestamp': '2025-10-02 00:59:54.635528', 'step': 27151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:54.690604', 'step': 27151, 'epoch': 3}
{'type': 'loss', 'content': 0.06400569528341293, 'timestamp': '2025-10-02 00:59:54.696885', 'step': 27152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:54.750988', 'step': 27152, 'epoch': 3}
{'type': 'loss', 'content': 0.0357232540845871, 'timestamp': '2025-10-02 00:59:54.753295', 'step': 27153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:54.807761', 'step': 27153, 'epoch': 3}
{'type': 'loss', 'content': 0.02817290462553501, 'timestamp': '2025-10-02 00:59:54.810115', 'step': 27154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:54.865430', 'step': 27154, 'epoch': 3}
{'type': 'loss', 'content': 0.0029974565841257572, 'timestamp': '2025-10-02 00:59:54.871048', 'step': 27155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:59:54.933525', 'step': 27155, 'epoch': 3}
{'type': 'loss', 'content': 0.019070889800786972, 'timestamp': '2025-10-02 00:59:54.944978', 'step': 27156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:54.998933', 'step': 27156, 'epoch': 3}
{'type': 'loss', 'content': 0.1589915156364441, 'timestamp': '2025-10-02 00:59:55.001408', 'step': 27157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:55.056139', 'step': 27157, 'epoch': 3}
{'type': 'loss', 'content': 0.05179004743695259, 'timestamp': '2025-10-02 00:59:55.061871', 'step': 27158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:55.116304', 'step': 27158, 'epoch': 3}
{'type': 'loss', 'content': 0.06885404884815216, 'timestamp': '2025-10-02 00:59:55.119176', 'step': 27159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:55.174614', 'step': 27159, 'epoch': 3}
{'type': 'loss', 'content': 0.07781343162059784, 'timestamp': '2025-10-02 00:59:55.181281', 'step': 27160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:55.235495', 'step': 27160, 'epoch': 3}
{'type': 'loss', 'content': 0.08926353603601456, 'timestamp': '2025-10-02 00:59:55.238155', 'step': 27161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:55.293673', 'step': 27161, 'epoch': 3}
{'type': 'loss', 'content': 0.04536201432347298, 'timestamp': '2025-10-02 00:59:55.299430', 'step': 27162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:55.354582', 'step': 27162, 'epoch': 3}
{'type': 'loss', 'content': 0.0646127313375473, 'timestamp': '2025-10-02 00:59:55.357226', 'step': 27163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:55.412644', 'step': 27163, 'epoch': 3}
{'type': 'loss', 'content': 0.0830734521150589, 'timestamp': '2025-10-02 00:59:55.422595', 'step': 27164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:55.477489', 'step': 27164, 'epoch': 3}
{'type': 'loss', 'content': 0.024150388315320015, 'timestamp': '2025-10-02 00:59:55.479888', 'step': 27165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:55.534964', 'step': 27165, 'epoch': 3}
{'type': 'loss', 'content': 0.0150897316634655, 'timestamp': '2025-10-02 00:59:55.537464', 'step': 27166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 00:59:55.610753', 'step': 27166, 'epoch': 3}
{'type': 'loss', 'content': 0.011995854787528515, 'timestamp': '2025-10-02 00:59:55.623828', 'step': 27167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:55.678160', 'step': 27167, 'epoch': 3}
{'type': 'loss', 'content': 0.006106039509177208, 'timestamp': '2025-10-02 00:59:55.684317', 'step': 27168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:55.744886', 'step': 27168, 'epoch': 3}
{'type': 'loss', 'content': 0.0322207473218441, 'timestamp': '2025-10-02 00:59:55.756192', 'step': 27169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:55.810748', 'step': 27169, 'epoch': 3}
{'type': 'loss', 'content': 0.03477092832326889, 'timestamp': '2025-10-02 00:59:55.816335', 'step': 27170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:55.871780', 'step': 27170, 'epoch': 3}
{'type': 'loss', 'content': 0.02698640525341034, 'timestamp': '2025-10-02 00:59:55.874025', 'step': 27171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:55.929416', 'step': 27171, 'epoch': 3}
{'type': 'loss', 'content': 0.0686386451125145, 'timestamp': '2025-10-02 00:59:55.936447', 'step': 27172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:55.991642', 'step': 27172, 'epoch': 3}
{'type': 'loss', 'content': 0.045467913150787354, 'timestamp': '2025-10-02 00:59:56.000980', 'step': 27173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:56.058270', 'step': 27173, 'epoch': 3}
{'type': 'loss', 'content': 0.022381270304322243, 'timestamp': '2025-10-02 00:59:56.060658', 'step': 27174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:59:56.115206', 'step': 27174, 'epoch': 3}
{'type': 'loss', 'content': 0.056349027901887894, 'timestamp': '2025-10-02 00:59:56.117779', 'step': 27175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:56.173172', 'step': 27175, 'epoch': 3}
{'type': 'loss', 'content': 0.04059511795639992, 'timestamp': '2025-10-02 00:59:56.179198', 'step': 27176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:56.233413', 'step': 27176, 'epoch': 3}
{'type': 'loss', 'content': 0.04352232441306114, 'timestamp': '2025-10-02 00:59:56.242056', 'step': 27177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:56.297764', 'step': 27177, 'epoch': 3}
{'type': 'loss', 'content': 0.008535321801900864, 'timestamp': '2025-10-02 00:59:56.305251', 'step': 27178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 00:59:56.367512', 'step': 27178, 'epoch': 3}
{'type': 'loss', 'content': 0.0007588204462081194, 'timestamp': '2025-10-02 00:59:56.377970', 'step': 27179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:56.432896', 'step': 27179, 'epoch': 3}
{'type': 'loss', 'content': 0.037194184958934784, 'timestamp': '2025-10-02 00:59:56.438647', 'step': 27180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:56.493024', 'step': 27180, 'epoch': 3}
{'type': 'loss', 'content': 0.012727953493595123, 'timestamp': '2025-10-02 00:59:56.495834', 'step': 27181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 00:59:56.551007', 'step': 27181, 'epoch': 3}
{'type': 'loss', 'content': 0.057562801986932755, 'timestamp': '2025-10-02 00:59:56.553484', 'step': 27182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 00:59:56.607247', 'step': 27182, 'epoch': 3}
{'type': 'loss', 'content': 0.058146435767412186, 'timestamp': '2025-10-02 00:59:56.610130', 'step': 27183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:56.665962', 'step': 27183, 'epoch': 3}
{'type': 'loss', 'content': 0.032231077551841736, 'timestamp': '2025-10-02 00:59:56.673985', 'step': 27184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 00:59:56.728485', 'step': 27184, 'epoch': 3}
{'type': 'loss', 'content': 0.0556366890668869, 'timestamp': '2025-10-02 00:59:56.730785', 'step': 27185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:56.784934', 'step': 27185, 'epoch': 3}
{'type': 'loss', 'content': 0.056827522814273834, 'timestamp': '2025-10-02 00:59:56.787560', 'step': 27186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 00:59:56.855407', 'step': 27186, 'epoch': 3}
{'type': 'loss', 'content': 0.03084157034754753, 'timestamp': '2025-10-02 00:59:56.867338', 'step': 27187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:56.921956', 'step': 27187, 'epoch': 3}
{'type': 'loss', 'content': 0.04699103906750679, 'timestamp': '2025-10-02 00:59:56.928854', 'step': 27188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:56.983338', 'step': 27188, 'epoch': 3}
{'type': 'loss', 'content': 0.026697220280766487, 'timestamp': '2025-10-02 00:59:56.990692', 'step': 27189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:57.046302', 'step': 27189, 'epoch': 3}
{'type': 'loss', 'content': 0.040383633226156235, 'timestamp': '2025-10-02 00:59:57.048508', 'step': 27190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:57.105217', 'step': 27190, 'epoch': 3}
{'type': 'loss', 'content': 0.04363579303026199, 'timestamp': '2025-10-02 00:59:57.114850', 'step': 27191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:57.169490', 'step': 27191, 'epoch': 3}
{'type': 'loss', 'content': 0.04565999284386635, 'timestamp': '2025-10-02 00:59:57.175605', 'step': 27192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:57.229770', 'step': 27192, 'epoch': 3}
{'type': 'loss', 'content': 0.0002967356122098863, 'timestamp': '2025-10-02 00:59:57.238674', 'step': 27193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:57.293169', 'step': 27193, 'epoch': 3}
{'type': 'loss', 'content': 0.054852716624736786, 'timestamp': '2025-10-02 00:59:57.295364', 'step': 27194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:57.349442', 'step': 27194, 'epoch': 3}
{'type': 'loss', 'content': 0.08584997802972794, 'timestamp': '2025-10-02 00:59:57.351796', 'step': 27195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:57.406377', 'step': 27195, 'epoch': 3}
{'type': 'loss', 'content': 0.05275385454297066, 'timestamp': '2025-10-02 00:59:57.412269', 'step': 27196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:57.473625', 'step': 27196, 'epoch': 3}
{'type': 'loss', 'content': 0.014507543295621872, 'timestamp': '2025-10-02 00:59:57.479373', 'step': 27197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:57.535904', 'step': 27197, 'epoch': 3}
{'type': 'loss', 'content': 0.04805443808436394, 'timestamp': '2025-10-02 00:59:57.545423', 'step': 27198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:57.601303', 'step': 27198, 'epoch': 3}
{'type': 'loss', 'content': 0.015561356209218502, 'timestamp': '2025-10-02 00:59:57.607070', 'step': 27199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 00:59:57.663565', 'step': 27199, 'epoch': 3}
{'type': 'loss', 'content': 0.016933593899011612, 'timestamp': '2025-10-02 00:59:57.669633', 'step': 27200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:57.723497', 'step': 27200, 'epoch': 3}
{'type': 'loss', 'content': 0.02498171664774418, 'timestamp': '2025-10-02 00:59:57.725924', 'step': 27201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:57.780918', 'step': 27201, 'epoch': 3}
{'type': 'loss', 'content': 0.07641910016536713, 'timestamp': '2025-10-02 00:59:57.784154', 'step': 27202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:57.841235', 'step': 27202, 'epoch': 3}
{'type': 'loss', 'content': 0.001647723256610334, 'timestamp': '2025-10-02 00:59:57.846615', 'step': 27203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:57.901822', 'step': 27203, 'epoch': 3}
{'type': 'loss', 'content': 0.1282392144203186, 'timestamp': '2025-10-02 00:59:57.908488', 'step': 27204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:57.963223', 'step': 27204, 'epoch': 3}
{'type': 'loss', 'content': 0.06359674781560898, 'timestamp': '2025-10-02 00:59:57.965541', 'step': 27205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:58.021693', 'step': 27205, 'epoch': 3}
{'type': 'loss', 'content': 0.05685391649603844, 'timestamp': '2025-10-02 00:59:58.028945', 'step': 27206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:58.084130', 'step': 27206, 'epoch': 3}
{'type': 'loss', 'content': 0.027255121618509293, 'timestamp': '2025-10-02 00:59:58.086681', 'step': 27207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:58.142011', 'step': 27207, 'epoch': 3}
{'type': 'loss', 'content': 0.02715117484331131, 'timestamp': '2025-10-02 00:59:58.148169', 'step': 27208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 00:59:58.206395', 'step': 27208, 'epoch': 3}
{'type': 'loss', 'content': 0.004469171166419983, 'timestamp': '2025-10-02 00:59:58.217401', 'step': 27209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:58.272061', 'step': 27209, 'epoch': 3}
{'type': 'loss', 'content': 0.12132269144058228, 'timestamp': '2025-10-02 00:59:58.275522', 'step': 27210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:58.332523', 'step': 27210, 'epoch': 3}
{'type': 'loss', 'content': 0.01004817709326744, 'timestamp': '2025-10-02 00:59:58.335914', 'step': 27211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 00:59:58.401023', 'step': 27211, 'epoch': 3}
{'type': 'loss', 'content': 0.02970539592206478, 'timestamp': '2025-10-02 00:59:58.412789', 'step': 27212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:58.469868', 'step': 27212, 'epoch': 3}
{'type': 'loss', 'content': 0.14162394404411316, 'timestamp': '2025-10-02 00:59:58.473249', 'step': 27213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:58.530681', 'step': 27213, 'epoch': 3}
{'type': 'loss', 'content': 0.03885591775178909, 'timestamp': '2025-10-02 00:59:58.533987', 'step': 27214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:58.591068', 'step': 27214, 'epoch': 3}
{'type': 'loss', 'content': 0.007307234685868025, 'timestamp': '2025-10-02 00:59:58.594475', 'step': 27215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:58.651511', 'step': 27215, 'epoch': 3}
{'type': 'loss', 'content': 0.03574822098016739, 'timestamp': '2025-10-02 00:59:58.658547', 'step': 27216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 00:59:58.715411', 'step': 27216, 'epoch': 3}
{'type': 'loss', 'content': 0.037363141775131226, 'timestamp': '2025-10-02 00:59:58.718605', 'step': 27217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:58.776103', 'step': 27217, 'epoch': 3}
{'type': 'loss', 'content': 0.0462949238717556, 'timestamp': '2025-10-02 00:59:58.779363', 'step': 27218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:58.836383', 'step': 27218, 'epoch': 3}
{'type': 'loss', 'content': 0.04836999624967575, 'timestamp': '2025-10-02 00:59:58.839940', 'step': 27219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 00:59:58.896505', 'step': 27219, 'epoch': 3}
{'type': 'loss', 'content': 0.0731855034828186, 'timestamp': '2025-10-02 00:59:58.903594', 'step': 27220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:58.959582', 'step': 27220, 'epoch': 3}
{'type': 'loss', 'content': 0.019703295081853867, 'timestamp': '2025-10-02 00:59:58.968794', 'step': 27221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:59.027434', 'step': 27221, 'epoch': 3}
{'type': 'loss', 'content': 0.1340852826833725, 'timestamp': '2025-10-02 00:59:59.030456', 'step': 27222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:59.085823', 'step': 27222, 'epoch': 3}
{'type': 'loss', 'content': 0.038454510271549225, 'timestamp': '2025-10-02 00:59:59.091310', 'step': 27223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:59.154636', 'step': 27223, 'epoch': 3}
{'type': 'loss', 'content': 0.027178119868040085, 'timestamp': '2025-10-02 00:59:59.161584', 'step': 27224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:59.215727', 'step': 27224, 'epoch': 3}
{'type': 'loss', 'content': 0.062412142753601074, 'timestamp': '2025-10-02 00:59:59.219012', 'step': 27225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 00:59:59.274421', 'step': 27225, 'epoch': 3}
{'type': 'loss', 'content': 0.10415679961442947, 'timestamp': '2025-10-02 00:59:59.277352', 'step': 27226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 00:59:59.337440', 'step': 27226, 'epoch': 3}
{'type': 'loss', 'content': 0.07912859320640564, 'timestamp': '2025-10-02 00:59:59.340002', 'step': 27227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 00:59:59.394886', 'step': 27227, 'epoch': 3}
{'type': 'loss', 'content': 0.024001114070415497, 'timestamp': '2025-10-02 00:59:59.401325', 'step': 27228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 00:59:59.458259', 'step': 27228, 'epoch': 3}
{'type': 'loss', 'content': 0.10224510729312897, 'timestamp': '2025-10-02 00:59:59.461230', 'step': 27229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 00:59:59.517453', 'step': 27229, 'epoch': 3}
{'type': 'loss', 'content': 0.05996648594737053, 'timestamp': '2025-10-02 00:59:59.521781', 'step': 27230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 00:59:59.578943', 'step': 27230, 'epoch': 3}
{'type': 'loss', 'content': 0.03757895156741142, 'timestamp': '2025-10-02 00:59:59.588257', 'step': 27231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:59:59.656427', 'step': 27231, 'epoch': 3}
{'type': 'loss', 'content': 0.002898226259276271, 'timestamp': '2025-10-02 00:59:59.667902', 'step': 27232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 00:59:59.730796', 'step': 27232, 'epoch': 3}
{'type': 'loss', 'content': 0.021999916061758995, 'timestamp': '2025-10-02 00:59:59.742321', 'step': 27233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 00:59:59.809427', 'step': 27233, 'epoch': 3}
{'type': 'loss', 'content': 0.0720379650592804, 'timestamp': '2025-10-02 00:59:59.812677', 'step': 27234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 00:59:59.877472', 'step': 27234, 'epoch': 3}
{'type': 'loss', 'content': 0.05247116833925247, 'timestamp': '2025-10-02 00:59:59.884750', 'step': 27235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 00:59:59.942920', 'step': 27235, 'epoch': 3}
{'type': 'loss', 'content': 0.018051717430353165, 'timestamp': '2025-10-02 00:59:59.953269', 'step': 27236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:00.009284', 'step': 27236, 'epoch': 3}
{'type': 'loss', 'content': 0.05225556716322899, 'timestamp': '2025-10-02 01:00:00.015081', 'step': 27237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:00.071511', 'step': 27237, 'epoch': 3}
{'type': 'loss', 'content': 0.04253764823079109, 'timestamp': '2025-10-02 01:00:00.074300', 'step': 27238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:00.130281', 'step': 27238, 'epoch': 3}
{'type': 'loss', 'content': 0.015591933391988277, 'timestamp': '2025-10-02 01:00:00.135695', 'step': 27239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:00.195485', 'step': 27239, 'epoch': 3}
{'type': 'loss', 'content': 0.011296526528894901, 'timestamp': '2025-10-02 01:00:00.201978', 'step': 27240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:00.256680', 'step': 27240, 'epoch': 3}
{'type': 'loss', 'content': 0.06628492474555969, 'timestamp': '2025-10-02 01:00:00.266072', 'step': 27241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:00.320521', 'step': 27241, 'epoch': 3}
{'type': 'loss', 'content': 0.029637381434440613, 'timestamp': '2025-10-02 01:00:00.326286', 'step': 27242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:00.381598', 'step': 27242, 'epoch': 3}
{'type': 'loss', 'content': 0.04798857867717743, 'timestamp': '2025-10-02 01:00:00.384140', 'step': 27243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:00.438898', 'step': 27243, 'epoch': 3}
{'type': 'loss', 'content': 0.09083680063486099, 'timestamp': '2025-10-02 01:00:00.446149', 'step': 27244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:00.500245', 'step': 27244, 'epoch': 3}
{'type': 'loss', 'content': 0.039917342364788055, 'timestamp': '2025-10-02 01:00:00.510430', 'step': 27245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:00.567046', 'step': 27245, 'epoch': 3}
{'type': 'loss', 'content': 0.08894697576761246, 'timestamp': '2025-10-02 01:00:00.569796', 'step': 27246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:00.625730', 'step': 27246, 'epoch': 3}
{'type': 'loss', 'content': 0.022800814360380173, 'timestamp': '2025-10-02 01:00:00.635113', 'step': 27247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:00.690511', 'step': 27247, 'epoch': 3}
{'type': 'loss', 'content': 0.030663806945085526, 'timestamp': '2025-10-02 01:00:00.698696', 'step': 27248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:00.754082', 'step': 27248, 'epoch': 3}
{'type': 'loss', 'content': 0.03256222978234291, 'timestamp': '2025-10-02 01:00:00.756553', 'step': 27249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:00.810799', 'step': 27249, 'epoch': 3}
{'type': 'loss', 'content': 0.057178352028131485, 'timestamp': '2025-10-02 01:00:00.813010', 'step': 27250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:00.868376', 'step': 27250, 'epoch': 3}
{'type': 'loss', 'content': 0.050614792853593826, 'timestamp': '2025-10-02 01:00:00.877899', 'step': 27251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:00.932794', 'step': 27251, 'epoch': 3}
{'type': 'loss', 'content': 0.05074341595172882, 'timestamp': '2025-10-02 01:00:00.938811', 'step': 27252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:00.992404', 'step': 27252, 'epoch': 3}
{'type': 'loss', 'content': 0.08252649754285812, 'timestamp': '2025-10-02 01:00:00.994947', 'step': 27253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:01.050138', 'step': 27253, 'epoch': 3}
{'type': 'loss', 'content': 0.03500783443450928, 'timestamp': '2025-10-02 01:00:01.052520', 'step': 27254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:01.107430', 'step': 27254, 'epoch': 3}
{'type': 'loss', 'content': 0.025320924818515778, 'timestamp': '2025-10-02 01:00:01.113319', 'step': 27255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:01.168288', 'step': 27255, 'epoch': 3}
{'type': 'loss', 'content': 0.14432793855667114, 'timestamp': '2025-10-02 01:00:01.174259', 'step': 27256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:01.227897', 'step': 27256, 'epoch': 3}
{'type': 'loss', 'content': 0.10754071176052094, 'timestamp': '2025-10-02 01:00:01.230252', 'step': 27257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:01.284884', 'step': 27257, 'epoch': 3}
{'type': 'loss', 'content': 0.007134019397199154, 'timestamp': '2025-10-02 01:00:01.290420', 'step': 27258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:01.345959', 'step': 27258, 'epoch': 3}
{'type': 'loss', 'content': 0.09657450020313263, 'timestamp': '2025-10-02 01:00:01.348371', 'step': 27259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:01.402912', 'step': 27259, 'epoch': 3}
{'type': 'loss', 'content': 0.03312229737639427, 'timestamp': '2025-10-02 01:00:01.408803', 'step': 27260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:01.462818', 'step': 27260, 'epoch': 3}
{'type': 'loss', 'content': 0.048079367727041245, 'timestamp': '2025-10-02 01:00:01.470289', 'step': 27261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:00:01.533409', 'step': 27261, 'epoch': 3}
{'type': 'loss', 'content': 0.030643414705991745, 'timestamp': '2025-10-02 01:00:01.544268', 'step': 27262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:01.605936', 'step': 27262, 'epoch': 3}
{'type': 'loss', 'content': 0.03898584842681885, 'timestamp': '2025-10-02 01:00:01.616380', 'step': 27263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:01.672700', 'step': 27263, 'epoch': 3}
{'type': 'loss', 'content': 0.04131422936916351, 'timestamp': '2025-10-02 01:00:01.679147', 'step': 27264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:01.733941', 'step': 27264, 'epoch': 3}
{'type': 'loss', 'content': 0.06439568847417831, 'timestamp': '2025-10-02 01:00:01.739320', 'step': 27265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:01.794053', 'step': 27265, 'epoch': 3}
{'type': 'loss', 'content': 0.01983526535332203, 'timestamp': '2025-10-02 01:00:01.796361', 'step': 27266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:01.851610', 'step': 27266, 'epoch': 3}
{'type': 'loss', 'content': 0.019123556092381477, 'timestamp': '2025-10-02 01:00:01.854110', 'step': 27267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:01.908073', 'step': 27267, 'epoch': 3}
{'type': 'loss', 'content': 0.02026825211942196, 'timestamp': '2025-10-02 01:00:01.916341', 'step': 27268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:01.970615', 'step': 27268, 'epoch': 3}
{'type': 'loss', 'content': 0.012432199902832508, 'timestamp': '2025-10-02 01:00:01.979948', 'step': 27269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:02.034730', 'step': 27269, 'epoch': 3}
{'type': 'loss', 'content': 0.12883220613002777, 'timestamp': '2025-10-02 01:00:02.037110', 'step': 27270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:02.091596', 'step': 27270, 'epoch': 3}
{'type': 'loss', 'content': 0.03489188104867935, 'timestamp': '2025-10-02 01:00:02.097034', 'step': 27271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:02.152255', 'step': 27271, 'epoch': 3}
{'type': 'loss', 'content': 0.013688873499631882, 'timestamp': '2025-10-02 01:00:02.160161', 'step': 27272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:00:02.229793', 'step': 27272, 'epoch': 3}
{'type': 'loss', 'content': 0.032102711498737335, 'timestamp': '2025-10-02 01:00:02.241536', 'step': 27273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:02.303234', 'step': 27273, 'epoch': 3}
{'type': 'loss', 'content': 0.033802516758441925, 'timestamp': '2025-10-02 01:00:02.313683', 'step': 27274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:02.368820', 'step': 27274, 'epoch': 3}
{'type': 'loss', 'content': 0.1111217737197876, 'timestamp': '2025-10-02 01:00:02.371350', 'step': 27275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:02.426377', 'step': 27275, 'epoch': 3}
{'type': 'loss', 'content': 0.014545893296599388, 'timestamp': '2025-10-02 01:00:02.436410', 'step': 27276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:02.491510', 'step': 27276, 'epoch': 3}
{'type': 'loss', 'content': 0.009017673321068287, 'timestamp': '2025-10-02 01:00:02.493850', 'step': 27277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:02.548846', 'step': 27277, 'epoch': 3}
{'type': 'loss', 'content': 0.028557155281305313, 'timestamp': '2025-10-02 01:00:02.551485', 'step': 27278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:02.606680', 'step': 27278, 'epoch': 3}
{'type': 'loss', 'content': 0.025725653395056725, 'timestamp': '2025-10-02 01:00:02.616020', 'step': 27279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:00:02.688042', 'step': 27279, 'epoch': 3}
{'type': 'loss', 'content': 0.0439317412674427, 'timestamp': '2025-10-02 01:00:02.700797', 'step': 27280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:02.757788', 'step': 27280, 'epoch': 3}
{'type': 'loss', 'content': 0.001007048413157463, 'timestamp': '2025-10-02 01:00:02.761063', 'step': 27281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:02.824086', 'step': 27281, 'epoch': 3}
{'type': 'loss', 'content': 0.023797661066055298, 'timestamp': '2025-10-02 01:00:02.826692', 'step': 27282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:02.881193', 'step': 27282, 'epoch': 3}
{'type': 'loss', 'content': 0.06458105146884918, 'timestamp': '2025-10-02 01:00:02.888473', 'step': 27283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:02.943660', 'step': 27283, 'epoch': 3}
{'type': 'loss', 'content': 0.026853039860725403, 'timestamp': '2025-10-02 01:00:02.949822', 'step': 27284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:03.011040', 'step': 27284, 'epoch': 3}
{'type': 'loss', 'content': 0.007051877211779356, 'timestamp': '2025-10-02 01:00:03.022433', 'step': 27285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:03.077271', 'step': 27285, 'epoch': 3}
{'type': 'loss', 'content': 0.09615049511194229, 'timestamp': '2025-10-02 01:00:03.079734', 'step': 27286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:03.135222', 'step': 27286, 'epoch': 3}
{'type': 'loss', 'content': 0.011817681603133678, 'timestamp': '2025-10-02 01:00:03.138068', 'step': 27287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:03.196835', 'step': 27287, 'epoch': 3}
{'type': 'loss', 'content': 0.05153890699148178, 'timestamp': '2025-10-02 01:00:03.203495', 'step': 27288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:03.258845', 'step': 27288, 'epoch': 3}
{'type': 'loss', 'content': 0.045259397476911545, 'timestamp': '2025-10-02 01:00:03.272343', 'step': 27289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:03.330082', 'step': 27289, 'epoch': 3}
{'type': 'loss', 'content': 0.019089313223958015, 'timestamp': '2025-10-02 01:00:03.333933', 'step': 27290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:03.388820', 'step': 27290, 'epoch': 3}
{'type': 'loss', 'content': 0.044409193098545074, 'timestamp': '2025-10-02 01:00:03.392054', 'step': 27291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:03.448295', 'step': 27291, 'epoch': 3}
{'type': 'loss', 'content': 0.08267643302679062, 'timestamp': '2025-10-02 01:00:03.454326', 'step': 27292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:03.515815', 'step': 27292, 'epoch': 3}
{'type': 'loss', 'content': 0.023705795407295227, 'timestamp': '2025-10-02 01:00:03.527110', 'step': 27293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:03.590213', 'step': 27293, 'epoch': 3}
{'type': 'loss', 'content': 0.007916111499071121, 'timestamp': '2025-10-02 01:00:03.592595', 'step': 27294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:03.647034', 'step': 27294, 'epoch': 3}
{'type': 'loss', 'content': 0.059902507811784744, 'timestamp': '2025-10-02 01:00:03.651387', 'step': 27295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:03.707208', 'step': 27295, 'epoch': 3}
{'type': 'loss', 'content': 0.005446792114526033, 'timestamp': '2025-10-02 01:00:03.717752', 'step': 27296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:03.780963', 'step': 27296, 'epoch': 3}
{'type': 'loss', 'content': 0.04650840535759926, 'timestamp': '2025-10-02 01:00:03.784258', 'step': 27297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:03.839513', 'step': 27297, 'epoch': 3}
{'type': 'loss', 'content': 0.05926525220274925, 'timestamp': '2025-10-02 01:00:03.842537', 'step': 27298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:03.897546', 'step': 27298, 'epoch': 3}
{'type': 'loss', 'content': 0.031297627836465836, 'timestamp': '2025-10-02 01:00:03.900023', 'step': 27299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:03.955362', 'step': 27299, 'epoch': 3}
{'type': 'loss', 'content': 0.03057284466922283, 'timestamp': '2025-10-02 01:00:03.961402', 'step': 27300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:04.016260', 'step': 27300, 'epoch': 3}
{'type': 'loss', 'content': 0.03386511653661728, 'timestamp': '2025-10-02 01:00:04.026496', 'step': 27301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:04.088603', 'step': 27301, 'epoch': 3}
{'type': 'loss', 'content': 0.0001545761915622279, 'timestamp': '2025-10-02 01:00:04.099082', 'step': 27302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:04.155082', 'step': 27302, 'epoch': 3}
{'type': 'loss', 'content': 0.01803876832127571, 'timestamp': '2025-10-02 01:00:04.160632', 'step': 27303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:04.215801', 'step': 27303, 'epoch': 3}
{'type': 'loss', 'content': 0.051210641860961914, 'timestamp': '2025-10-02 01:00:04.225855', 'step': 27304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:04.279357', 'step': 27304, 'epoch': 3}
{'type': 'loss', 'content': 0.0390155203640461, 'timestamp': '2025-10-02 01:00:04.281803', 'step': 27305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:04.336401', 'step': 27305, 'epoch': 3}
{'type': 'loss', 'content': 0.1000610738992691, 'timestamp': '2025-10-02 01:00:04.339757', 'step': 27306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:04.396757', 'step': 27306, 'epoch': 3}
{'type': 'loss', 'content': 0.04590483754873276, 'timestamp': '2025-10-02 01:00:04.399523', 'step': 27307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:04.454505', 'step': 27307, 'epoch': 3}
{'type': 'loss', 'content': 0.05535876005887985, 'timestamp': '2025-10-02 01:00:04.460852', 'step': 27308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:04.515869', 'step': 27308, 'epoch': 3}
{'type': 'loss', 'content': 0.017125435173511505, 'timestamp': '2025-10-02 01:00:04.518431', 'step': 27309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:04.573783', 'step': 27309, 'epoch': 3}
{'type': 'loss', 'content': 0.0010373846162110567, 'timestamp': '2025-10-02 01:00:04.576233', 'step': 27310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:04.630606', 'step': 27310, 'epoch': 3}
{'type': 'loss', 'content': 0.042458631098270416, 'timestamp': '2025-10-02 01:00:04.635211', 'step': 27311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:04.689435', 'step': 27311, 'epoch': 3}
{'type': 'loss', 'content': 0.040382131934165955, 'timestamp': '2025-10-02 01:00:04.695440', 'step': 27312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:04.750024', 'step': 27312, 'epoch': 3}
{'type': 'loss', 'content': 0.020288603380322456, 'timestamp': '2025-10-02 01:00:04.752895', 'step': 27313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:04.807820', 'step': 27313, 'epoch': 3}
{'type': 'loss', 'content': 0.043799810111522675, 'timestamp': '2025-10-02 01:00:04.813408', 'step': 27314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:04.868408', 'step': 27314, 'epoch': 3}
{'type': 'loss', 'content': 0.0009398553520441055, 'timestamp': '2025-10-02 01:00:04.877646', 'step': 27315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:04.936948', 'step': 27315, 'epoch': 3}
{'type': 'loss', 'content': 0.048319071531295776, 'timestamp': '2025-10-02 01:00:04.943156', 'step': 27316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:04.998648', 'step': 27316, 'epoch': 3}
{'type': 'loss', 'content': 0.02805907651782036, 'timestamp': '2025-10-02 01:00:05.001265', 'step': 27317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:05.055686', 'step': 27317, 'epoch': 3}
{'type': 'loss', 'content': 0.06252310425043106, 'timestamp': '2025-10-02 01:00:05.058417', 'step': 27318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:05.117634', 'step': 27318, 'epoch': 3}
{'type': 'loss', 'content': 0.043069686740636826, 'timestamp': '2025-10-02 01:00:05.127949', 'step': 27319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:05.183188', 'step': 27319, 'epoch': 3}
{'type': 'loss', 'content': 0.03662308678030968, 'timestamp': '2025-10-02 01:00:05.189158', 'step': 27320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:05.244324', 'step': 27320, 'epoch': 3}
{'type': 'loss', 'content': 0.05531179532408714, 'timestamp': '2025-10-02 01:00:05.247023', 'step': 27321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:05.302088', 'step': 27321, 'epoch': 3}
{'type': 'loss', 'content': 0.028264228254556656, 'timestamp': '2025-10-02 01:00:05.304513', 'step': 27322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:05.359669', 'step': 27322, 'epoch': 3}
{'type': 'loss', 'content': 0.0773945152759552, 'timestamp': '2025-10-02 01:00:05.368919', 'step': 27323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:05.424174', 'step': 27323, 'epoch': 3}
{'type': 'loss', 'content': 0.012227021157741547, 'timestamp': '2025-10-02 01:00:05.434310', 'step': 27324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:05.488848', 'step': 27324, 'epoch': 3}
{'type': 'loss', 'content': 0.016492005437612534, 'timestamp': '2025-10-02 01:00:05.499096', 'step': 27325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:05.554066', 'step': 27325, 'epoch': 3}
{'type': 'loss', 'content': 0.055146388709545135, 'timestamp': '2025-10-02 01:00:05.559496', 'step': 27326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:05.615768', 'step': 27326, 'epoch': 3}
{'type': 'loss', 'content': 0.05450019985437393, 'timestamp': '2025-10-02 01:00:05.618058', 'step': 27327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:05.672115', 'step': 27327, 'epoch': 3}
{'type': 'loss', 'content': 0.1866886019706726, 'timestamp': '2025-10-02 01:00:05.678008', 'step': 27328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:05.738662', 'step': 27328, 'epoch': 3}
{'type': 'loss', 'content': 0.0020469536539167166, 'timestamp': '2025-10-02 01:00:05.749997', 'step': 27329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:05.805890', 'step': 27329, 'epoch': 3}
{'type': 'loss', 'content': 0.00255420058965683, 'timestamp': '2025-10-02 01:00:05.815242', 'step': 27330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:05.879744', 'step': 27330, 'epoch': 3}
{'type': 'loss', 'content': 0.06032567843794823, 'timestamp': '2025-10-02 01:00:05.885479', 'step': 27331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:05.939919', 'step': 27331, 'epoch': 3}
{'type': 'loss', 'content': 0.05404791608452797, 'timestamp': '2025-10-02 01:00:05.946474', 'step': 27332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:06.001179', 'step': 27332, 'epoch': 3}
{'type': 'loss', 'content': 0.09966608136892319, 'timestamp': '2025-10-02 01:00:06.006759', 'step': 27333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:06.062044', 'step': 27333, 'epoch': 3}
{'type': 'loss', 'content': 0.023595282807946205, 'timestamp': '2025-10-02 01:00:06.069299', 'step': 27334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:06.130852', 'step': 27334, 'epoch': 3}
{'type': 'loss', 'content': 0.03669898211956024, 'timestamp': '2025-10-02 01:00:06.141351', 'step': 27335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:06.197299', 'step': 27335, 'epoch': 3}
{'type': 'loss', 'content': 0.02335328981280327, 'timestamp': '2025-10-02 01:00:06.203262', 'step': 27336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:06.257958', 'step': 27336, 'epoch': 3}
{'type': 'loss', 'content': 0.017768792808055878, 'timestamp': '2025-10-02 01:00:06.263682', 'step': 27337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:06.319642', 'step': 27337, 'epoch': 3}
{'type': 'loss', 'content': 0.028263861313462257, 'timestamp': '2025-10-02 01:00:06.322238', 'step': 27338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:06.376759', 'step': 27338, 'epoch': 3}
{'type': 'loss', 'content': 0.05980272963643074, 'timestamp': '2025-10-02 01:00:06.379238', 'step': 27339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:00:06.433744', 'step': 27339, 'epoch': 3}
{'type': 'loss', 'content': 0.0817493200302124, 'timestamp': '2025-10-02 01:00:06.440009', 'step': 27340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:06.498898', 'step': 27340, 'epoch': 3}
{'type': 'loss', 'content': 0.0038622445426881313, 'timestamp': '2025-10-02 01:00:06.509924', 'step': 27341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:06.565063', 'step': 27341, 'epoch': 3}
{'type': 'loss', 'content': 0.12812064588069916, 'timestamp': '2025-10-02 01:00:06.567619', 'step': 27342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:06.621447', 'step': 27342, 'epoch': 3}
{'type': 'loss', 'content': 0.09584642201662064, 'timestamp': '2025-10-02 01:00:06.624382', 'step': 27343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:06.680254', 'step': 27343, 'epoch': 3}
{'type': 'loss', 'content': 0.057523373514413834, 'timestamp': '2025-10-02 01:00:06.686925', 'step': 27344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:06.741049', 'step': 27344, 'epoch': 3}
{'type': 'loss', 'content': 0.010126613080501556, 'timestamp': '2025-10-02 01:00:06.750389', 'step': 27345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:06.805497', 'step': 27345, 'epoch': 3}
{'type': 'loss', 'content': 0.04548504576086998, 'timestamp': '2025-10-02 01:00:06.811190', 'step': 27346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:06.867663', 'step': 27346, 'epoch': 3}
{'type': 'loss', 'content': 0.08280106633901596, 'timestamp': '2025-10-02 01:00:06.870464', 'step': 27347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:06.925432', 'step': 27347, 'epoch': 3}
{'type': 'loss', 'content': 0.04122743383049965, 'timestamp': '2025-10-02 01:00:06.931312', 'step': 27348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:00:06.998930', 'step': 27348, 'epoch': 3}
{'type': 'loss', 'content': 0.02069312334060669, 'timestamp': '2025-10-02 01:00:07.012380', 'step': 27349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:07.068318', 'step': 27349, 'epoch': 3}
{'type': 'loss', 'content': 0.04341060668230057, 'timestamp': '2025-10-02 01:00:07.070795', 'step': 27350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:07.132365', 'step': 27350, 'epoch': 3}
{'type': 'loss', 'content': 0.02648698352277279, 'timestamp': '2025-10-02 01:00:07.142846', 'step': 27351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:07.197755', 'step': 27351, 'epoch': 3}
{'type': 'loss', 'content': 0.03136447072029114, 'timestamp': '2025-10-02 01:00:07.204158', 'step': 27352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:07.258233', 'step': 27352, 'epoch': 3}
{'type': 'loss', 'content': 0.06586864590644836, 'timestamp': '2025-10-02 01:00:07.260726', 'step': 27353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:07.316226', 'step': 27353, 'epoch': 3}
{'type': 'loss', 'content': 0.08820789307355881, 'timestamp': '2025-10-02 01:00:07.318388', 'step': 27354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:07.373833', 'step': 27354, 'epoch': 3}
{'type': 'loss', 'content': 0.06458098441362381, 'timestamp': '2025-10-02 01:00:07.376330', 'step': 27355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:07.431100', 'step': 27355, 'epoch': 3}
{'type': 'loss', 'content': 0.07797534018754959, 'timestamp': '2025-10-02 01:00:07.437643', 'step': 27356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:07.493044', 'step': 27356, 'epoch': 3}
{'type': 'loss', 'content': 0.06554844230413437, 'timestamp': '2025-10-02 01:00:07.495653', 'step': 27357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:07.551502', 'step': 27357, 'epoch': 3}
{'type': 'loss', 'content': 0.06537304818630219, 'timestamp': '2025-10-02 01:00:07.554421', 'step': 27358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:07.612686', 'step': 27358, 'epoch': 3}
{'type': 'loss', 'content': 0.01840203069150448, 'timestamp': '2025-10-02 01:00:07.616110', 'step': 27359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:07.672913', 'step': 27359, 'epoch': 3}
{'type': 'loss', 'content': 0.036828335374593735, 'timestamp': '2025-10-02 01:00:07.679277', 'step': 27360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:07.740388', 'step': 27360, 'epoch': 3}
{'type': 'loss', 'content': 0.014073994010686874, 'timestamp': '2025-10-02 01:00:07.751755', 'step': 27361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:07.809689', 'step': 27361, 'epoch': 3}
{'type': 'loss', 'content': 0.07741914689540863, 'timestamp': '2025-10-02 01:00:07.812649', 'step': 27362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:07.870434', 'step': 27362, 'epoch': 3}
{'type': 'loss', 'content': 0.033151350915431976, 'timestamp': '2025-10-02 01:00:07.876171', 'step': 27363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:07.936334', 'step': 27363, 'epoch': 3}
{'type': 'loss', 'content': 0.017099281772971153, 'timestamp': '2025-10-02 01:00:07.947309', 'step': 27364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:08.003423', 'step': 27364, 'epoch': 3}
{'type': 'loss', 'content': 0.00774044543504715, 'timestamp': '2025-10-02 01:00:08.006527', 'step': 27365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:08.063273', 'step': 27365, 'epoch': 3}
{'type': 'loss', 'content': 0.08367355912923813, 'timestamp': '2025-10-02 01:00:08.069182', 'step': 27366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:08.125376', 'step': 27366, 'epoch': 3}
{'type': 'loss', 'content': 0.027705352753400803, 'timestamp': '2025-10-02 01:00:08.131233', 'step': 27367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:08.193253', 'step': 27367, 'epoch': 3}
{'type': 'loss', 'content': 0.041758712381124496, 'timestamp': '2025-10-02 01:00:08.199577', 'step': 27368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:08.255850', 'step': 27368, 'epoch': 3}
{'type': 'loss', 'content': 0.04207216203212738, 'timestamp': '2025-10-02 01:00:08.258358', 'step': 27369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:08.313508', 'step': 27369, 'epoch': 3}
{'type': 'loss', 'content': 0.012398255057632923, 'timestamp': '2025-10-02 01:00:08.316658', 'step': 27370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:00:08.389802', 'step': 27370, 'epoch': 3}
{'type': 'loss', 'content': 0.030238423496484756, 'timestamp': '2025-10-02 01:00:08.402122', 'step': 27371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:00:08.460157', 'step': 27371, 'epoch': 3}
{'type': 'loss', 'content': 0.07573807239532471, 'timestamp': '2025-10-02 01:00:08.466991', 'step': 27372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:08.523434', 'step': 27372, 'epoch': 3}
{'type': 'loss', 'content': 0.04633275419473648, 'timestamp': '2025-10-02 01:00:08.526757', 'step': 27373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:08.592350', 'step': 27373, 'epoch': 3}
{'type': 'loss', 'content': 0.044166307896375656, 'timestamp': '2025-10-02 01:00:08.602826', 'step': 27374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:08.660033', 'step': 27374, 'epoch': 3}
{'type': 'loss', 'content': 0.04490431025624275, 'timestamp': '2025-10-02 01:00:08.663829', 'step': 27375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:08.722051', 'step': 27375, 'epoch': 3}
{'type': 'loss', 'content': 0.1314583122730255, 'timestamp': '2025-10-02 01:00:08.728979', 'step': 27376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:08.784655', 'step': 27376, 'epoch': 3}
{'type': 'loss', 'content': 0.044003672897815704, 'timestamp': '2025-10-02 01:00:08.787703', 'step': 27377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:08.845757', 'step': 27377, 'epoch': 3}
{'type': 'loss', 'content': 0.04330939054489136, 'timestamp': '2025-10-02 01:00:08.848123', 'step': 27378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:08.905454', 'step': 27378, 'epoch': 3}
{'type': 'loss', 'content': 0.030050529167056084, 'timestamp': '2025-10-02 01:00:08.907948', 'step': 27379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:08.963897', 'step': 27379, 'epoch': 3}
{'type': 'loss', 'content': 0.08636835962533951, 'timestamp': '2025-10-02 01:00:08.970416', 'step': 27380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:09.024202', 'step': 27380, 'epoch': 3}
{'type': 'loss', 'content': 0.07391837239265442, 'timestamp': '2025-10-02 01:00:09.026696', 'step': 27381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:09.083125', 'step': 27381, 'epoch': 3}
{'type': 'loss', 'content': 0.037511974573135376, 'timestamp': '2025-10-02 01:00:09.088923', 'step': 27382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:09.143253', 'step': 27382, 'epoch': 3}
{'type': 'loss', 'content': 0.12043725699186325, 'timestamp': '2025-10-02 01:00:09.145846', 'step': 27383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:09.200509', 'step': 27383, 'epoch': 3}
{'type': 'loss', 'content': 0.030284376814961433, 'timestamp': '2025-10-02 01:00:09.208589', 'step': 27384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:09.263663', 'step': 27384, 'epoch': 3}
{'type': 'loss', 'content': 0.03627167269587517, 'timestamp': '2025-10-02 01:00:09.266227', 'step': 27385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:09.322344', 'step': 27385, 'epoch': 3}
{'type': 'loss', 'content': 0.0763542652130127, 'timestamp': '2025-10-02 01:00:09.324466', 'step': 27386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:09.379128', 'step': 27386, 'epoch': 3}
{'type': 'loss', 'content': 0.011124933138489723, 'timestamp': '2025-10-02 01:00:09.388374', 'step': 27387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:09.446212', 'step': 27387, 'epoch': 3}
{'type': 'loss', 'content': 0.06304377317428589, 'timestamp': '2025-10-02 01:00:09.452406', 'step': 27388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:09.506576', 'step': 27388, 'epoch': 3}
{'type': 'loss', 'content': 0.041798558086156845, 'timestamp': '2025-10-02 01:00:09.509277', 'step': 27389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:09.564855', 'step': 27389, 'epoch': 3}
{'type': 'loss', 'content': 0.053359147161245346, 'timestamp': '2025-10-02 01:00:09.573026', 'step': 27390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:09.634952', 'step': 27390, 'epoch': 3}
{'type': 'loss', 'content': 0.06993921101093292, 'timestamp': '2025-10-02 01:00:09.637232', 'step': 27391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:09.691497', 'step': 27391, 'epoch': 3}
{'type': 'loss', 'content': 0.06806521117687225, 'timestamp': '2025-10-02 01:00:09.697727', 'step': 27392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:09.758788', 'step': 27392, 'epoch': 3}
{'type': 'loss', 'content': 0.04859939590096474, 'timestamp': '2025-10-02 01:00:09.761221', 'step': 27393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:09.815862', 'step': 27393, 'epoch': 3}
{'type': 'loss', 'content': 0.043443065136671066, 'timestamp': '2025-10-02 01:00:09.818695', 'step': 27394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:09.873147', 'step': 27394, 'epoch': 3}
{'type': 'loss', 'content': 0.10199377685785294, 'timestamp': '2025-10-02 01:00:09.876041', 'step': 27395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:09.930407', 'step': 27395, 'epoch': 3}
{'type': 'loss', 'content': 0.06459402292966843, 'timestamp': '2025-10-02 01:00:09.936260', 'step': 27396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:09.998215', 'step': 27396, 'epoch': 3}
{'type': 'loss', 'content': 0.00822541769593954, 'timestamp': '2025-10-02 01:00:10.009542', 'step': 27397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:10.064810', 'step': 27397, 'epoch': 3}
{'type': 'loss', 'content': 0.12148262560367584, 'timestamp': '2025-10-02 01:00:10.067337', 'step': 27398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:10.121958', 'step': 27398, 'epoch': 3}
{'type': 'loss', 'content': 0.10375765711069107, 'timestamp': '2025-10-02 01:00:10.124698', 'step': 27399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:10.179194', 'step': 27399, 'epoch': 3}
{'type': 'loss', 'content': 0.08245345205068588, 'timestamp': '2025-10-02 01:00:10.185363', 'step': 27400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:10.239926', 'step': 27400, 'epoch': 3}
{'type': 'loss', 'content': 0.08424640446901321, 'timestamp': '2025-10-02 01:00:10.242717', 'step': 27401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:10.304750', 'step': 27401, 'epoch': 3}
{'type': 'loss', 'content': 0.02953195571899414, 'timestamp': '2025-10-02 01:00:10.315261', 'step': 27402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:10.370497', 'step': 27402, 'epoch': 3}
{'type': 'loss', 'content': 0.04076722636818886, 'timestamp': '2025-10-02 01:00:10.373665', 'step': 27403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:00:10.429289', 'step': 27403, 'epoch': 3}
{'type': 'loss', 'content': 0.04137038439512253, 'timestamp': '2025-10-02 01:00:10.435437', 'step': 27404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:10.495906', 'step': 27404, 'epoch': 3}
{'type': 'loss', 'content': 0.024076055735349655, 'timestamp': '2025-10-02 01:00:10.507247', 'step': 27405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:10.566780', 'step': 27405, 'epoch': 3}
{'type': 'loss', 'content': 0.00634487671777606, 'timestamp': '2025-10-02 01:00:10.576241', 'step': 27406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:10.631666', 'step': 27406, 'epoch': 3}
{'type': 'loss', 'content': 0.13136354088783264, 'timestamp': '2025-10-02 01:00:10.634483', 'step': 27407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:10.689379', 'step': 27407, 'epoch': 3}
{'type': 'loss', 'content': 0.036125585436820984, 'timestamp': '2025-10-02 01:00:10.695439', 'step': 27408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:10.753341', 'step': 27408, 'epoch': 3}
{'type': 'loss', 'content': 0.011372585780918598, 'timestamp': '2025-10-02 01:00:10.764304', 'step': 27409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:10.822615', 'step': 27409, 'epoch': 3}
{'type': 'loss', 'content': 0.0356062650680542, 'timestamp': '2025-10-02 01:00:10.832160', 'step': 27410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:10.891142', 'step': 27410, 'epoch': 3}
{'type': 'loss', 'content': 0.03212738409638405, 'timestamp': '2025-10-02 01:00:10.893522', 'step': 27411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:10.947745', 'step': 27411, 'epoch': 3}
{'type': 'loss', 'content': 0.05118176341056824, 'timestamp': '2025-10-02 01:00:10.953912', 'step': 27412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:11.008262', 'step': 27412, 'epoch': 3}
{'type': 'loss', 'content': 0.1407630741596222, 'timestamp': '2025-10-02 01:00:11.010743', 'step': 27413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:11.070103', 'step': 27413, 'epoch': 3}
{'type': 'loss', 'content': 0.0022594090551137924, 'timestamp': '2025-10-02 01:00:11.080306', 'step': 27414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:11.140204', 'step': 27414, 'epoch': 3}
{'type': 'loss', 'content': 0.010544398799538612, 'timestamp': '2025-10-02 01:00:11.150379', 'step': 27415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:11.205042', 'step': 27415, 'epoch': 3}
{'type': 'loss', 'content': 0.03156483173370361, 'timestamp': '2025-10-02 01:00:11.211176', 'step': 27416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 01:00:11.285761', 'step': 27416, 'epoch': 3}
{'type': 'loss', 'content': 0.032812852412462234, 'timestamp': '2025-10-02 01:00:11.300868', 'step': 27417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:11.356626', 'step': 27417, 'epoch': 3}
{'type': 'loss', 'content': 0.04643353819847107, 'timestamp': '2025-10-02 01:00:11.359624', 'step': 27418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:11.414379', 'step': 27418, 'epoch': 3}
{'type': 'loss', 'content': 0.04811574146151543, 'timestamp': '2025-10-02 01:00:11.416873', 'step': 27419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:11.472518', 'step': 27419, 'epoch': 3}
{'type': 'loss', 'content': 0.03305047005414963, 'timestamp': '2025-10-02 01:00:11.478497', 'step': 27420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:11.531987', 'step': 27420, 'epoch': 3}
{'type': 'loss', 'content': 0.013149122707545757, 'timestamp': '2025-10-02 01:00:11.534591', 'step': 27421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:11.589748', 'step': 27421, 'epoch': 3}
{'type': 'loss', 'content': 0.012246217578649521, 'timestamp': '2025-10-02 01:00:11.591960', 'step': 27422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:11.646641', 'step': 27422, 'epoch': 3}
{'type': 'loss', 'content': 0.011518532410264015, 'timestamp': '2025-10-02 01:00:11.652279', 'step': 27423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:11.710043', 'step': 27423, 'epoch': 3}
{'type': 'loss', 'content': 0.05388250574469566, 'timestamp': '2025-10-02 01:00:11.716440', 'step': 27424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:11.771261', 'step': 27424, 'epoch': 3}
{'type': 'loss', 'content': 0.012647072784602642, 'timestamp': '2025-10-02 01:00:11.780495', 'step': 27425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:11.835925', 'step': 27425, 'epoch': 3}
{'type': 'loss', 'content': 0.06899435073137283, 'timestamp': '2025-10-02 01:00:11.838720', 'step': 27426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:11.895111', 'step': 27426, 'epoch': 3}
{'type': 'loss', 'content': 0.0394698828458786, 'timestamp': '2025-10-02 01:00:11.897789', 'step': 27427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:11.953117', 'step': 27427, 'epoch': 3}
{'type': 'loss', 'content': 0.0907340720295906, 'timestamp': '2025-10-02 01:00:11.959334', 'step': 27428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:12.017190', 'step': 27428, 'epoch': 3}
{'type': 'loss', 'content': 0.06721798330545425, 'timestamp': '2025-10-02 01:00:12.026308', 'step': 27429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:00:12.088103', 'step': 27429, 'epoch': 3}
{'type': 'loss', 'content': 0.04151295870542526, 'timestamp': '2025-10-02 01:00:12.098759', 'step': 27430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:12.155260', 'step': 27430, 'epoch': 3}
{'type': 'loss', 'content': 0.10921432822942734, 'timestamp': '2025-10-02 01:00:12.157803', 'step': 27431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:12.213037', 'step': 27431, 'epoch': 3}
{'type': 'loss', 'content': 0.026326831430196762, 'timestamp': '2025-10-02 01:00:12.222829', 'step': 27432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:12.280151', 'step': 27432, 'epoch': 3}
{'type': 'loss', 'content': 0.021137647330760956, 'timestamp': '2025-10-02 01:00:12.282965', 'step': 27433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:12.337664', 'step': 27433, 'epoch': 3}
{'type': 'loss', 'content': 0.04560258239507675, 'timestamp': '2025-10-02 01:00:12.340338', 'step': 27434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:12.396173', 'step': 27434, 'epoch': 3}
{'type': 'loss', 'content': 0.037030674517154694, 'timestamp': '2025-10-02 01:00:12.405677', 'step': 27435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:12.461242', 'step': 27435, 'epoch': 3}
{'type': 'loss', 'content': 0.049654558300971985, 'timestamp': '2025-10-02 01:00:12.467472', 'step': 27436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:12.522694', 'step': 27436, 'epoch': 3}
{'type': 'loss', 'content': 0.01832585409283638, 'timestamp': '2025-10-02 01:00:12.524597', 'step': 27437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:00:12.588005', 'step': 27437, 'epoch': 3}
{'type': 'loss', 'content': 0.0524611733853817, 'timestamp': '2025-10-02 01:00:12.598831', 'step': 27438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:12.655387', 'step': 27438, 'epoch': 3}
{'type': 'loss', 'content': 0.04609304666519165, 'timestamp': '2025-10-02 01:00:12.657973', 'step': 27439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:12.714408', 'step': 27439, 'epoch': 3}
{'type': 'loss', 'content': 0.031102485954761505, 'timestamp': '2025-10-02 01:00:12.723355', 'step': 27440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:12.780483', 'step': 27440, 'epoch': 3}
{'type': 'loss', 'content': 0.05577678978443146, 'timestamp': '2025-10-02 01:00:12.785448', 'step': 27441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:12.846932', 'step': 27441, 'epoch': 3}
{'type': 'loss', 'content': 0.03995542973279953, 'timestamp': '2025-10-02 01:00:12.867288', 'step': 27442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:00:12.923802', 'step': 27442, 'epoch': 3}
{'type': 'loss', 'content': 0.016444679349660873, 'timestamp': '2025-10-02 01:00:12.926134', 'step': 27443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:12.981482', 'step': 27443, 'epoch': 3}
{'type': 'loss', 'content': 0.016863549128174782, 'timestamp': '2025-10-02 01:00:12.987916', 'step': 27444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:13.043108', 'step': 27444, 'epoch': 3}
{'type': 'loss', 'content': 0.01557044219225645, 'timestamp': '2025-10-02 01:00:13.050387', 'step': 27445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:13.104867', 'step': 27445, 'epoch': 3}
{'type': 'loss', 'content': 0.014360220171511173, 'timestamp': '2025-10-02 01:00:13.107989', 'step': 27446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:13.167011', 'step': 27446, 'epoch': 3}
{'type': 'loss', 'content': 0.04223685711622238, 'timestamp': '2025-10-02 01:00:13.170120', 'step': 27447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:13.224957', 'step': 27447, 'epoch': 3}
{'type': 'loss', 'content': 0.08840439468622208, 'timestamp': '2025-10-02 01:00:13.231249', 'step': 27448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:13.285190', 'step': 27448, 'epoch': 3}
{'type': 'loss', 'content': 0.0548245869576931, 'timestamp': '2025-10-02 01:00:13.287535', 'step': 27449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:00:13.350595', 'step': 27449, 'epoch': 3}
{'type': 'loss', 'content': 0.01628073863685131, 'timestamp': '2025-10-02 01:00:13.361227', 'step': 27450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:13.417698', 'step': 27450, 'epoch': 3}
{'type': 'loss', 'content': 0.030915958806872368, 'timestamp': '2025-10-02 01:00:13.420111', 'step': 27451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:13.475478', 'step': 27451, 'epoch': 3}
{'type': 'loss', 'content': 0.05163666978478432, 'timestamp': '2025-10-02 01:00:13.481885', 'step': 27452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:13.536217', 'step': 27452, 'epoch': 3}
{'type': 'loss', 'content': 0.011688057333230972, 'timestamp': '2025-10-02 01:00:13.543567', 'step': 27453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:13.599151', 'step': 27453, 'epoch': 3}
{'type': 'loss', 'content': 0.0030144676566123962, 'timestamp': '2025-10-02 01:00:13.601470', 'step': 27454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:13.657232', 'step': 27454, 'epoch': 3}
{'type': 'loss', 'content': 0.023313645273447037, 'timestamp': '2025-10-02 01:00:13.659848', 'step': 27455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:00:13.716687', 'step': 27455, 'epoch': 3}
{'type': 'loss', 'content': 0.04257836937904358, 'timestamp': '2025-10-02 01:00:13.722909', 'step': 27456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:13.778032', 'step': 27456, 'epoch': 3}
{'type': 'loss', 'content': 0.022132521495223045, 'timestamp': '2025-10-02 01:00:13.785391', 'step': 27457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:13.840929', 'step': 27457, 'epoch': 3}
{'type': 'loss', 'content': 0.09525745362043381, 'timestamp': '2025-10-02 01:00:13.843789', 'step': 27458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:13.898629', 'step': 27458, 'epoch': 3}
{'type': 'loss', 'content': 0.013258052989840508, 'timestamp': '2025-10-02 01:00:13.905971', 'step': 27459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:13.966851', 'step': 27459, 'epoch': 3}
{'type': 'loss', 'content': 0.05591345578432083, 'timestamp': '2025-10-02 01:00:13.981962', 'step': 27460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:14.039578', 'step': 27460, 'epoch': 3}
{'type': 'loss', 'content': 0.07363302260637283, 'timestamp': '2025-10-02 01:00:14.045766', 'step': 27461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:00:14.127041', 'step': 27461, 'epoch': 3}
{'type': 'loss', 'content': 0.03548721969127655, 'timestamp': '2025-10-02 01:00:14.137837', 'step': 27462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:14.193200', 'step': 27462, 'epoch': 3}
{'type': 'loss', 'content': 0.0436256043612957, 'timestamp': '2025-10-02 01:00:14.195483', 'step': 27463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:14.250526', 'step': 27463, 'epoch': 3}
{'type': 'loss', 'content': 0.03150857612490654, 'timestamp': '2025-10-02 01:00:14.256876', 'step': 27464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:14.311511', 'step': 27464, 'epoch': 3}
{'type': 'loss', 'content': 0.05867002531886101, 'timestamp': '2025-10-02 01:00:14.321721', 'step': 27465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:14.378187', 'step': 27465, 'epoch': 3}
{'type': 'loss', 'content': 0.042752597481012344, 'timestamp': '2025-10-02 01:00:14.381397', 'step': 27466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:14.437370', 'step': 27466, 'epoch': 3}
{'type': 'loss', 'content': 0.001819518394768238, 'timestamp': '2025-10-02 01:00:14.444578', 'step': 27467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:14.499625', 'step': 27467, 'epoch': 3}
{'type': 'loss', 'content': 0.0026003483217209578, 'timestamp': '2025-10-02 01:00:14.507590', 'step': 27468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:14.561534', 'step': 27468, 'epoch': 3}
{'type': 'loss', 'content': 0.05223178490996361, 'timestamp': '2025-10-02 01:00:14.564229', 'step': 27469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:14.618834', 'step': 27469, 'epoch': 3}
{'type': 'loss', 'content': 0.07106729596853256, 'timestamp': '2025-10-02 01:00:14.622097', 'step': 27470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:14.678133', 'step': 27470, 'epoch': 3}
{'type': 'loss', 'content': 0.010079971514642239, 'timestamp': '2025-10-02 01:00:14.680763', 'step': 27471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:14.735525', 'step': 27471, 'epoch': 3}
{'type': 'loss', 'content': 0.01605888456106186, 'timestamp': '2025-10-02 01:00:14.743608', 'step': 27472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:14.798654', 'step': 27472, 'epoch': 3}
{'type': 'loss', 'content': 0.03448629006743431, 'timestamp': '2025-10-02 01:00:14.801469', 'step': 27473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:14.857042', 'step': 27473, 'epoch': 3}
{'type': 'loss', 'content': 0.049020033329725266, 'timestamp': '2025-10-02 01:00:14.859679', 'step': 27474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:14.914458', 'step': 27474, 'epoch': 3}
{'type': 'loss', 'content': 0.0449351966381073, 'timestamp': '2025-10-02 01:00:14.917330', 'step': 27475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:14.972198', 'step': 27475, 'epoch': 3}
{'type': 'loss', 'content': 0.0792953222990036, 'timestamp': '2025-10-02 01:00:14.978663', 'step': 27476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:15.033706', 'step': 27476, 'epoch': 3}
{'type': 'loss', 'content': 0.049085378646850586, 'timestamp': '2025-10-02 01:00:15.037225', 'step': 27477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:00:15.095179', 'step': 27477, 'epoch': 3}
{'type': 'loss', 'content': 0.028854865580797195, 'timestamp': '2025-10-02 01:00:15.102145', 'step': 27478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:15.170751', 'step': 27478, 'epoch': 3}
{'type': 'loss', 'content': 0.06132517755031586, 'timestamp': '2025-10-02 01:00:15.179193', 'step': 27479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:15.264992', 'step': 27479, 'epoch': 3}
{'type': 'loss', 'content': 0.15743303298950195, 'timestamp': '2025-10-02 01:00:15.271707', 'step': 27480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:15.339730', 'step': 27480, 'epoch': 3}
{'type': 'loss', 'content': 0.01582052744925022, 'timestamp': '2025-10-02 01:00:15.346802', 'step': 27481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:15.406682', 'step': 27481, 'epoch': 3}
{'type': 'loss', 'content': 0.05644870921969414, 'timestamp': '2025-10-02 01:00:15.409551', 'step': 27482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:15.467372', 'step': 27482, 'epoch': 3}
{'type': 'loss', 'content': 0.08514075726270676, 'timestamp': '2025-10-02 01:00:15.470148', 'step': 27483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:15.527579', 'step': 27483, 'epoch': 3}
{'type': 'loss', 'content': 0.07014031708240509, 'timestamp': '2025-10-02 01:00:15.534020', 'step': 27484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:15.590883', 'step': 27484, 'epoch': 3}
{'type': 'loss', 'content': 0.044840697199106216, 'timestamp': '2025-10-02 01:00:15.596679', 'step': 27485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:15.654761', 'step': 27485, 'epoch': 3}
{'type': 'loss', 'content': 0.039425067603588104, 'timestamp': '2025-10-02 01:00:15.658939', 'step': 27486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:15.717317', 'step': 27486, 'epoch': 3}
{'type': 'loss', 'content': 0.08017640560865402, 'timestamp': '2025-10-02 01:00:15.720925', 'step': 27487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:15.782429', 'step': 27487, 'epoch': 3}
{'type': 'loss', 'content': 0.05268174782395363, 'timestamp': '2025-10-02 01:00:15.791317', 'step': 27488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:15.849319', 'step': 27488, 'epoch': 3}
{'type': 'loss', 'content': 0.052408166229724884, 'timestamp': '2025-10-02 01:00:15.852571', 'step': 27489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:15.909919', 'step': 27489, 'epoch': 3}
{'type': 'loss', 'content': 0.006017341744154692, 'timestamp': '2025-10-02 01:00:15.917481', 'step': 27490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:15.975963', 'step': 27490, 'epoch': 3}
{'type': 'loss', 'content': 0.04250117018818855, 'timestamp': '2025-10-02 01:00:15.985115', 'step': 27491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:16.042070', 'step': 27491, 'epoch': 3}
{'type': 'loss', 'content': 0.030238652601838112, 'timestamp': '2025-10-02 01:00:16.051775', 'step': 27492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:16.107207', 'step': 27492, 'epoch': 3}
{'type': 'loss', 'content': 0.06924303621053696, 'timestamp': '2025-10-02 01:00:16.110234', 'step': 27493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:16.170639', 'step': 27493, 'epoch': 3}
{'type': 'loss', 'content': 0.02718937210738659, 'timestamp': '2025-10-02 01:00:16.173184', 'step': 27494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:16.231353', 'step': 27494, 'epoch': 3}
{'type': 'loss', 'content': 0.054222021251916885, 'timestamp': '2025-10-02 01:00:16.236396', 'step': 27495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:16.295824', 'step': 27495, 'epoch': 3}
{'type': 'loss', 'content': 0.04001931846141815, 'timestamp': '2025-10-02 01:00:16.305731', 'step': 27496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:16.360990', 'step': 27496, 'epoch': 3}
{'type': 'loss', 'content': 0.034517619758844376, 'timestamp': '2025-10-02 01:00:16.365410', 'step': 27497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:16.425893', 'step': 27497, 'epoch': 3}
{'type': 'loss', 'content': 0.016886021941900253, 'timestamp': '2025-10-02 01:00:16.429321', 'step': 27498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:16.511499', 'step': 27498, 'epoch': 3}
{'type': 'loss', 'content': 0.030092690140008926, 'timestamp': '2025-10-02 01:00:16.517021', 'step': 27499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:16.582831', 'step': 27499, 'epoch': 3}
{'type': 'loss', 'content': 0.10476262122392654, 'timestamp': '2025-10-02 01:00:16.590918', 'step': 27500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 27500', 'timestamp': '2025-10-02 01:00:17.116595', 'step': 27500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:17.173298', 'step': 27500, 'epoch': 3}
{'type': 'loss', 'content': 0.00031576064066030085, 'timestamp': '2025-10-02 01:00:17.175721', 'step': 27501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:00:17.246030', 'step': 27501, 'epoch': 3}
{'type': 'loss', 'content': 0.004603035748004913, 'timestamp': '2025-10-02 01:00:17.258328', 'step': 27502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:17.314917', 'step': 27502, 'epoch': 3}
{'type': 'loss', 'content': 0.045442912727594376, 'timestamp': '2025-10-02 01:00:17.318172', 'step': 27503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:17.379734', 'step': 27503, 'epoch': 3}
{'type': 'loss', 'content': 0.017221733927726746, 'timestamp': '2025-10-02 01:00:17.390932', 'step': 27504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:17.450889', 'step': 27504, 'epoch': 3}
{'type': 'loss', 'content': 0.010494313202798367, 'timestamp': '2025-10-02 01:00:17.462164', 'step': 27505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:17.517533', 'step': 27505, 'epoch': 3}
{'type': 'loss', 'content': 0.027413835749030113, 'timestamp': '2025-10-02 01:00:17.520510', 'step': 27506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:17.576804', 'step': 27506, 'epoch': 3}
{'type': 'loss', 'content': 0.11331161856651306, 'timestamp': '2025-10-02 01:00:17.579830', 'step': 27507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:00:17.658232', 'step': 27507, 'epoch': 3}
{'type': 'loss', 'content': 0.01078640017658472, 'timestamp': '2025-10-02 01:00:17.671252', 'step': 27508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:17.728706', 'step': 27508, 'epoch': 3}
{'type': 'loss', 'content': 0.05146954581141472, 'timestamp': '2025-10-02 01:00:17.734466', 'step': 27509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:17.796084', 'step': 27509, 'epoch': 3}
{'type': 'loss', 'content': 0.036052461713552475, 'timestamp': '2025-10-02 01:00:17.798897', 'step': 27510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:17.855608', 'step': 27510, 'epoch': 3}
{'type': 'loss', 'content': 0.06070844084024429, 'timestamp': '2025-10-02 01:00:17.858411', 'step': 27511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:17.911965', 'step': 27511, 'epoch': 3}
{'type': 'loss', 'content': 0.10490551590919495, 'timestamp': '2025-10-02 01:00:17.918742', 'step': 27512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:17.972989', 'step': 27512, 'epoch': 3}
{'type': 'loss', 'content': 0.06901830434799194, 'timestamp': '2025-10-02 01:00:17.983220', 'step': 27513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:18.037651', 'step': 27513, 'epoch': 3}
{'type': 'loss', 'content': 0.14099180698394775, 'timestamp': '2025-10-02 01:00:18.040575', 'step': 27514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:18.095184', 'step': 27514, 'epoch': 3}
{'type': 'loss', 'content': 0.03077041171491146, 'timestamp': '2025-10-02 01:00:18.104512', 'step': 27515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:18.159642', 'step': 27515, 'epoch': 3}
{'type': 'loss', 'content': 0.022174477577209473, 'timestamp': '2025-10-02 01:00:18.166009', 'step': 27516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:18.221421', 'step': 27516, 'epoch': 3}
{'type': 'loss', 'content': 0.020024297758936882, 'timestamp': '2025-10-02 01:00:18.227282', 'step': 27517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:18.288920', 'step': 27517, 'epoch': 3}
{'type': 'loss', 'content': 0.017777957022190094, 'timestamp': '2025-10-02 01:00:18.299101', 'step': 27518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:18.354803', 'step': 27518, 'epoch': 3}
{'type': 'loss', 'content': 0.022663425654172897, 'timestamp': '2025-10-02 01:00:18.357661', 'step': 27519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:18.412284', 'step': 27519, 'epoch': 3}
{'type': 'loss', 'content': 0.02187812328338623, 'timestamp': '2025-10-02 01:00:18.419279', 'step': 27520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:18.473853', 'step': 27520, 'epoch': 3}
{'type': 'loss', 'content': 0.016786612570285797, 'timestamp': '2025-10-02 01:00:18.476332', 'step': 27521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:18.530376', 'step': 27521, 'epoch': 3}
{'type': 'loss', 'content': 0.023007769137620926, 'timestamp': '2025-10-02 01:00:18.533408', 'step': 27522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:18.587982', 'step': 27522, 'epoch': 3}
{'type': 'loss', 'content': 0.022772764787077904, 'timestamp': '2025-10-02 01:00:18.590374', 'step': 27523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:18.644999', 'step': 27523, 'epoch': 3}
{'type': 'loss', 'content': 0.0455620214343071, 'timestamp': '2025-10-02 01:00:18.651210', 'step': 27524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:18.705024', 'step': 27524, 'epoch': 3}
{'type': 'loss', 'content': 0.04180996119976044, 'timestamp': '2025-10-02 01:00:18.712637', 'step': 27525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:18.768140', 'step': 27525, 'epoch': 3}
{'type': 'loss', 'content': 0.06113021448254585, 'timestamp': '2025-10-02 01:00:18.770919', 'step': 27526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:18.826387', 'step': 27526, 'epoch': 3}
{'type': 'loss', 'content': 0.043817322701215744, 'timestamp': '2025-10-02 01:00:18.828637', 'step': 27527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:18.891880', 'step': 27527, 'epoch': 3}
{'type': 'loss', 'content': 0.023840751498937607, 'timestamp': '2025-10-02 01:00:18.903117', 'step': 27528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:18.958023', 'step': 27528, 'epoch': 3}
{'type': 'loss', 'content': 0.022493993863463402, 'timestamp': '2025-10-02 01:00:18.967487', 'step': 27529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:19.024622', 'step': 27529, 'epoch': 3}
{'type': 'loss', 'content': 0.03177155926823616, 'timestamp': '2025-10-02 01:00:19.030438', 'step': 27530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:19.092293', 'step': 27530, 'epoch': 3}
{'type': 'loss', 'content': 0.0931314080953598, 'timestamp': '2025-10-02 01:00:19.096876', 'step': 27531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:19.157711', 'step': 27531, 'epoch': 3}
{'type': 'loss', 'content': 0.07088955491781235, 'timestamp': '2025-10-02 01:00:19.164518', 'step': 27532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:19.219661', 'step': 27532, 'epoch': 3}
{'type': 'loss', 'content': 0.04578383266925812, 'timestamp': '2025-10-02 01:00:19.225229', 'step': 27533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:19.284819', 'step': 27533, 'epoch': 3}
{'type': 'loss', 'content': 0.015988273546099663, 'timestamp': '2025-10-02 01:00:19.294970', 'step': 27534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:19.350343', 'step': 27534, 'epoch': 3}
{'type': 'loss', 'content': 0.023426569998264313, 'timestamp': '2025-10-02 01:00:19.353311', 'step': 27535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:19.408406', 'step': 27535, 'epoch': 3}
{'type': 'loss', 'content': 0.0768241286277771, 'timestamp': '2025-10-02 01:00:19.415085', 'step': 27536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:19.468809', 'step': 27536, 'epoch': 3}
{'type': 'loss', 'content': 0.05913699045777321, 'timestamp': '2025-10-02 01:00:19.471855', 'step': 27537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:19.525609', 'step': 27537, 'epoch': 3}
{'type': 'loss', 'content': 0.21727652847766876, 'timestamp': '2025-10-02 01:00:19.528126', 'step': 27538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:19.584397', 'step': 27538, 'epoch': 3}
{'type': 'loss', 'content': 0.04202638193964958, 'timestamp': '2025-10-02 01:00:19.593904', 'step': 27539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:19.648519', 'step': 27539, 'epoch': 3}
{'type': 'loss', 'content': 0.07169461995363235, 'timestamp': '2025-10-02 01:00:19.655128', 'step': 27540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:19.709304', 'step': 27540, 'epoch': 3}
{'type': 'loss', 'content': 0.04725949466228485, 'timestamp': '2025-10-02 01:00:19.713385', 'step': 27541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:19.773623', 'step': 27541, 'epoch': 3}
{'type': 'loss', 'content': 0.002566394628956914, 'timestamp': '2025-10-02 01:00:19.783749', 'step': 27542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:19.843571', 'step': 27542, 'epoch': 3}
{'type': 'loss', 'content': 0.022817568853497505, 'timestamp': '2025-10-02 01:00:19.853692', 'step': 27543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:19.911258', 'step': 27543, 'epoch': 3}
{'type': 'loss', 'content': 0.007962476462125778, 'timestamp': '2025-10-02 01:00:19.921361', 'step': 27544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:19.976201', 'step': 27544, 'epoch': 3}
{'type': 'loss', 'content': 0.07797078043222427, 'timestamp': '2025-10-02 01:00:19.978772', 'step': 27545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:20.033627', 'step': 27545, 'epoch': 3}
{'type': 'loss', 'content': 0.09355315566062927, 'timestamp': '2025-10-02 01:00:20.039521', 'step': 27546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:00:20.094357', 'step': 27546, 'epoch': 3}
{'type': 'loss', 'content': 0.038975656032562256, 'timestamp': '2025-10-02 01:00:20.097647', 'step': 27547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:20.152018', 'step': 27547, 'epoch': 3}
{'type': 'loss', 'content': 0.04236338287591934, 'timestamp': '2025-10-02 01:00:20.158505', 'step': 27548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:20.224208', 'step': 27548, 'epoch': 3}
{'type': 'loss', 'content': 0.010744249448180199, 'timestamp': '2025-10-02 01:00:20.229487', 'step': 27549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:20.297273', 'step': 27549, 'epoch': 3}
{'type': 'loss', 'content': 0.06443200260400772, 'timestamp': '2025-10-02 01:00:20.300659', 'step': 27550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:20.356365', 'step': 27550, 'epoch': 3}
{'type': 'loss', 'content': 0.017676303163170815, 'timestamp': '2025-10-02 01:00:20.362220', 'step': 27551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:20.418620', 'step': 27551, 'epoch': 3}
{'type': 'loss', 'content': 0.0964866653084755, 'timestamp': '2025-10-02 01:00:20.430224', 'step': 27552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:20.483517', 'step': 27552, 'epoch': 3}
{'type': 'loss', 'content': 0.11794480681419373, 'timestamp': '2025-10-02 01:00:20.486382', 'step': 27553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:20.540883', 'step': 27553, 'epoch': 3}
{'type': 'loss', 'content': 0.06019299104809761, 'timestamp': '2025-10-02 01:00:20.543145', 'step': 27554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:20.597219', 'step': 27554, 'epoch': 3}
{'type': 'loss', 'content': 0.08517130464315414, 'timestamp': '2025-10-02 01:00:20.601027', 'step': 27555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:20.655732', 'step': 27555, 'epoch': 3}
{'type': 'loss', 'content': 0.02520916238427162, 'timestamp': '2025-10-02 01:00:20.662919', 'step': 27556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:20.724100', 'step': 27556, 'epoch': 3}
{'type': 'loss', 'content': 0.00816352292895317, 'timestamp': '2025-10-02 01:00:20.735400', 'step': 27557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:20.789983', 'step': 27557, 'epoch': 3}
{'type': 'loss', 'content': 0.09205847978591919, 'timestamp': '2025-10-02 01:00:20.793308', 'step': 27558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:20.848174', 'step': 27558, 'epoch': 3}
{'type': 'loss', 'content': 0.04773521423339844, 'timestamp': '2025-10-02 01:00:20.850638', 'step': 27559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:20.904844', 'step': 27559, 'epoch': 3}
{'type': 'loss', 'content': 0.062078144401311874, 'timestamp': '2025-10-02 01:00:20.911671', 'step': 27560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:00:20.983309', 'step': 27560, 'epoch': 3}
{'type': 'loss', 'content': 0.031181151047348976, 'timestamp': '2025-10-02 01:00:20.997725', 'step': 27561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:21.052941', 'step': 27561, 'epoch': 3}
{'type': 'loss', 'content': 0.06024686247110367, 'timestamp': '2025-10-02 01:00:21.060564', 'step': 27562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:21.120303', 'step': 27562, 'epoch': 3}
{'type': 'loss', 'content': 0.05537189915776253, 'timestamp': '2025-10-02 01:00:21.130480', 'step': 27563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:21.189389', 'step': 27563, 'epoch': 3}
{'type': 'loss', 'content': 0.04936520382761955, 'timestamp': '2025-10-02 01:00:21.199696', 'step': 27564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:21.255867', 'step': 27564, 'epoch': 3}
{'type': 'loss', 'content': 0.030423389747738838, 'timestamp': '2025-10-02 01:00:21.258389', 'step': 27565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:21.313229', 'step': 27565, 'epoch': 3}
{'type': 'loss', 'content': 0.05886019766330719, 'timestamp': '2025-10-02 01:00:21.316396', 'step': 27566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:21.371150', 'step': 27566, 'epoch': 3}
{'type': 'loss', 'content': 0.026191826909780502, 'timestamp': '2025-10-02 01:00:21.373745', 'step': 27567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:21.429025', 'step': 27567, 'epoch': 3}
{'type': 'loss', 'content': 0.06510534137487411, 'timestamp': '2025-10-02 01:00:21.436821', 'step': 27568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:21.506398', 'step': 27568, 'epoch': 3}
{'type': 'loss', 'content': 0.04881809279322624, 'timestamp': '2025-10-02 01:00:21.516394', 'step': 27569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:21.580424', 'step': 27569, 'epoch': 3}
{'type': 'loss', 'content': 0.011849637143313885, 'timestamp': '2025-10-02 01:00:21.588042', 'step': 27570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:21.662938', 'step': 27570, 'epoch': 3}
{'type': 'loss', 'content': 0.10265929996967316, 'timestamp': '2025-10-02 01:00:21.669113', 'step': 27571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:21.723431', 'step': 27571, 'epoch': 3}
{'type': 'loss', 'content': 0.04969574138522148, 'timestamp': '2025-10-02 01:00:21.730037', 'step': 27572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:21.783843', 'step': 27572, 'epoch': 3}
{'type': 'loss', 'content': 0.010702873580157757, 'timestamp': '2025-10-02 01:00:21.786494', 'step': 27573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:21.841091', 'step': 27573, 'epoch': 3}
{'type': 'loss', 'content': 0.021238958463072777, 'timestamp': '2025-10-02 01:00:21.843665', 'step': 27574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:21.897621', 'step': 27574, 'epoch': 3}
{'type': 'loss', 'content': 0.0343838706612587, 'timestamp': '2025-10-02 01:00:21.900516', 'step': 27575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:21.954858', 'step': 27575, 'epoch': 3}
{'type': 'loss', 'content': 0.0575699508190155, 'timestamp': '2025-10-02 01:00:21.961114', 'step': 27576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:22.014751', 'step': 27576, 'epoch': 3}
{'type': 'loss', 'content': 0.01638890616595745, 'timestamp': '2025-10-02 01:00:22.017197', 'step': 27577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:22.071895', 'step': 27577, 'epoch': 3}
{'type': 'loss', 'content': 0.018034210428595543, 'timestamp': '2025-10-02 01:00:22.079515', 'step': 27578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:22.135524', 'step': 27578, 'epoch': 3}
{'type': 'loss', 'content': 0.016817184165120125, 'timestamp': '2025-10-02 01:00:22.141344', 'step': 27579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:22.195930', 'step': 27579, 'epoch': 3}
{'type': 'loss', 'content': 0.06880385428667068, 'timestamp': '2025-10-02 01:00:22.204358', 'step': 27580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:22.259392', 'step': 27580, 'epoch': 3}
{'type': 'loss', 'content': 0.022632045671343803, 'timestamp': '2025-10-02 01:00:22.261894', 'step': 27581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:22.316356', 'step': 27581, 'epoch': 3}
{'type': 'loss', 'content': 0.03971187025308609, 'timestamp': '2025-10-02 01:00:22.319431', 'step': 27582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:22.374674', 'step': 27582, 'epoch': 3}
{'type': 'loss', 'content': 0.05658162012696266, 'timestamp': '2025-10-02 01:00:22.380558', 'step': 27583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:22.434189', 'step': 27583, 'epoch': 3}
{'type': 'loss', 'content': 0.13075761497020721, 'timestamp': '2025-10-02 01:00:22.440324', 'step': 27584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:22.494103', 'step': 27584, 'epoch': 3}
{'type': 'loss', 'content': 0.041664134711027145, 'timestamp': '2025-10-02 01:00:22.497121', 'step': 27585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:22.551890', 'step': 27585, 'epoch': 3}
{'type': 'loss', 'content': 0.15399277210235596, 'timestamp': '2025-10-02 01:00:22.554530', 'step': 27586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:22.610002', 'step': 27586, 'epoch': 3}
{'type': 'loss', 'content': 0.14877794682979584, 'timestamp': '2025-10-02 01:00:22.619533', 'step': 27587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:22.675511', 'step': 27587, 'epoch': 3}
{'type': 'loss', 'content': 0.03593471273779869, 'timestamp': '2025-10-02 01:00:22.684048', 'step': 27588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:22.739088', 'step': 27588, 'epoch': 3}
{'type': 'loss', 'content': 0.07082776725292206, 'timestamp': '2025-10-02 01:00:22.744844', 'step': 27589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:22.800165', 'step': 27589, 'epoch': 3}
{'type': 'loss', 'content': 0.051767781376838684, 'timestamp': '2025-10-02 01:00:22.807660', 'step': 27590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:22.867328', 'step': 27590, 'epoch': 3}
{'type': 'loss', 'content': 0.02462609112262726, 'timestamp': '2025-10-02 01:00:22.877461', 'step': 27591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:22.934320', 'step': 27591, 'epoch': 3}
{'type': 'loss', 'content': 0.051493190228939056, 'timestamp': '2025-10-02 01:00:22.940746', 'step': 27592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:22.995506', 'step': 27592, 'epoch': 3}
{'type': 'loss', 'content': 0.08555353432893753, 'timestamp': '2025-10-02 01:00:22.998247', 'step': 27593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:23.053098', 'step': 27593, 'epoch': 3}
{'type': 'loss', 'content': 0.024536309763789177, 'timestamp': '2025-10-02 01:00:23.060531', 'step': 27594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:23.120550', 'step': 27594, 'epoch': 3}
{'type': 'loss', 'content': 0.011180354282259941, 'timestamp': '2025-10-02 01:00:23.130703', 'step': 27595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:23.185842', 'step': 27595, 'epoch': 3}
{'type': 'loss', 'content': 0.0728549063205719, 'timestamp': '2025-10-02 01:00:23.192065', 'step': 27596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:00:23.260349', 'step': 27596, 'epoch': 3}
{'type': 'loss', 'content': 0.027042068541049957, 'timestamp': '2025-10-02 01:00:23.273881', 'step': 27597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:23.329672', 'step': 27597, 'epoch': 3}
{'type': 'loss', 'content': 0.04456273093819618, 'timestamp': '2025-10-02 01:00:23.332297', 'step': 27598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:23.386673', 'step': 27598, 'epoch': 3}
{'type': 'loss', 'content': 0.08590088784694672, 'timestamp': '2025-10-02 01:00:23.389086', 'step': 27599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:23.443956', 'step': 27599, 'epoch': 3}
{'type': 'loss', 'content': 0.015536993741989136, 'timestamp': '2025-10-02 01:00:23.450108', 'step': 27600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:23.504798', 'step': 27600, 'epoch': 3}
{'type': 'loss', 'content': 0.01747351698577404, 'timestamp': '2025-10-02 01:00:23.508473', 'step': 27601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:00:23.563356', 'step': 27601, 'epoch': 3}
{'type': 'loss', 'content': 0.06611037254333496, 'timestamp': '2025-10-02 01:00:23.565788', 'step': 27602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:23.621495', 'step': 27602, 'epoch': 3}
{'type': 'loss', 'content': 0.05512788146734238, 'timestamp': '2025-10-02 01:00:23.626361', 'step': 27603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:23.681049', 'step': 27603, 'epoch': 3}
{'type': 'loss', 'content': 0.0674862489104271, 'timestamp': '2025-10-02 01:00:23.687327', 'step': 27604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:00:23.741108', 'step': 27604, 'epoch': 3}
{'type': 'loss', 'content': 0.04531364142894745, 'timestamp': '2025-10-02 01:00:23.744106', 'step': 27605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:23.799200', 'step': 27605, 'epoch': 3}
{'type': 'loss', 'content': 0.011147204786539078, 'timestamp': '2025-10-02 01:00:23.802382', 'step': 27606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:23.859563', 'step': 27606, 'epoch': 3}
{'type': 'loss', 'content': 0.0827411562204361, 'timestamp': '2025-10-02 01:00:23.862378', 'step': 27607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:23.918092', 'step': 27607, 'epoch': 3}
{'type': 'loss', 'content': 0.10180117934942245, 'timestamp': '2025-10-02 01:00:23.928394', 'step': 27608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:23.983313', 'step': 27608, 'epoch': 3}
{'type': 'loss', 'content': 0.01691633276641369, 'timestamp': '2025-10-02 01:00:23.993587', 'step': 27609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:24.048803', 'step': 27609, 'epoch': 3}
{'type': 'loss', 'content': 0.0044447509571909904, 'timestamp': '2025-10-02 01:00:24.051517', 'step': 27610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:24.105881', 'step': 27610, 'epoch': 3}
{'type': 'loss', 'content': 0.050204772502183914, 'timestamp': '2025-10-02 01:00:24.108366', 'step': 27611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:00:24.172595', 'step': 27611, 'epoch': 3}
{'type': 'loss', 'content': 0.023807087913155556, 'timestamp': '2025-10-02 01:00:24.184000', 'step': 27612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:24.238991', 'step': 27612, 'epoch': 3}
{'type': 'loss', 'content': 0.07761543244123459, 'timestamp': '2025-10-02 01:00:24.241738', 'step': 27613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:24.297761', 'step': 27613, 'epoch': 3}
{'type': 'loss', 'content': 0.029780294746160507, 'timestamp': '2025-10-02 01:00:24.305352', 'step': 27614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:24.361038', 'step': 27614, 'epoch': 3}
{'type': 'loss', 'content': 0.014237101189792156, 'timestamp': '2025-10-02 01:00:24.366827', 'step': 27615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:24.422057', 'step': 27615, 'epoch': 3}
{'type': 'loss', 'content': 0.07573584467172623, 'timestamp': '2025-10-02 01:00:24.428003', 'step': 27616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:24.485921', 'step': 27616, 'epoch': 3}
{'type': 'loss', 'content': 0.02334068901836872, 'timestamp': '2025-10-02 01:00:24.496914', 'step': 27617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:24.551769', 'step': 27617, 'epoch': 3}
{'type': 'loss', 'content': 0.05528036877512932, 'timestamp': '2025-10-02 01:00:24.554562', 'step': 27618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:24.609045', 'step': 27618, 'epoch': 3}
{'type': 'loss', 'content': 0.011555146425962448, 'timestamp': '2025-10-02 01:00:24.611595', 'step': 27619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:24.667616', 'step': 27619, 'epoch': 3}
{'type': 'loss', 'content': 0.08539562672376633, 'timestamp': '2025-10-02 01:00:24.674285', 'step': 27620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:24.730057', 'step': 27620, 'epoch': 3}
{'type': 'loss', 'content': 0.08060452342033386, 'timestamp': '2025-10-02 01:00:24.732726', 'step': 27621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:24.792132', 'step': 27621, 'epoch': 3}
{'type': 'loss', 'content': 0.05979381129145622, 'timestamp': '2025-10-02 01:00:24.802338', 'step': 27622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:24.858503', 'step': 27622, 'epoch': 3}
{'type': 'loss', 'content': 0.07531935721635818, 'timestamp': '2025-10-02 01:00:24.861042', 'step': 27623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:24.915266', 'step': 27623, 'epoch': 3}
{'type': 'loss', 'content': 0.05843942239880562, 'timestamp': '2025-10-02 01:00:24.921280', 'step': 27624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:24.975032', 'step': 27624, 'epoch': 3}
{'type': 'loss', 'content': 0.016037331894040108, 'timestamp': '2025-10-02 01:00:24.977667', 'step': 27625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:25.032051', 'step': 27625, 'epoch': 3}
{'type': 'loss', 'content': 0.06249178200960159, 'timestamp': '2025-10-02 01:00:25.034566', 'step': 27626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:25.090233', 'step': 27626, 'epoch': 3}
{'type': 'loss', 'content': 0.03265790641307831, 'timestamp': '2025-10-02 01:00:25.093006', 'step': 27627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:25.148056', 'step': 27627, 'epoch': 3}
{'type': 'loss', 'content': 0.13327796757221222, 'timestamp': '2025-10-02 01:00:25.154405', 'step': 27628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:25.209004', 'step': 27628, 'epoch': 3}
{'type': 'loss', 'content': 0.046399347484111786, 'timestamp': '2025-10-02 01:00:25.219269', 'step': 27629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:25.273999', 'step': 27629, 'epoch': 3}
{'type': 'loss', 'content': 0.12682956457138062, 'timestamp': '2025-10-02 01:00:25.276494', 'step': 27630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:25.331752', 'step': 27630, 'epoch': 3}
{'type': 'loss', 'content': 0.009035960771143436, 'timestamp': '2025-10-02 01:00:25.341413', 'step': 27631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:25.397916', 'step': 27631, 'epoch': 3}
{'type': 'loss', 'content': 0.03250769153237343, 'timestamp': '2025-10-02 01:00:25.404174', 'step': 27632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:25.458428', 'step': 27632, 'epoch': 3}
{'type': 'loss', 'content': 0.09980008006095886, 'timestamp': '2025-10-02 01:00:25.461251', 'step': 27633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:25.515797', 'step': 27633, 'epoch': 3}
{'type': 'loss', 'content': 0.12260114401578903, 'timestamp': '2025-10-02 01:00:25.518560', 'step': 27634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:25.579453', 'step': 27634, 'epoch': 3}
{'type': 'loss', 'content': 0.0538996197283268, 'timestamp': '2025-10-02 01:00:25.589601', 'step': 27635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:25.646871', 'step': 27635, 'epoch': 3}
{'type': 'loss', 'content': 0.1656578779220581, 'timestamp': '2025-10-02 01:00:25.653963', 'step': 27636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:25.709416', 'step': 27636, 'epoch': 3}
{'type': 'loss', 'content': 0.04421437159180641, 'timestamp': '2025-10-02 01:00:25.715355', 'step': 27637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:25.772510', 'step': 27637, 'epoch': 3}
{'type': 'loss', 'content': 0.019442599266767502, 'timestamp': '2025-10-02 01:00:25.776239', 'step': 27638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:25.834574', 'step': 27638, 'epoch': 3}
{'type': 'loss', 'content': 0.003503368701785803, 'timestamp': '2025-10-02 01:00:25.844095', 'step': 27639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:25.905386', 'step': 27639, 'epoch': 3}
{'type': 'loss', 'content': 0.04577139392495155, 'timestamp': '2025-10-02 01:00:25.916330', 'step': 27640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:25.972103', 'step': 27640, 'epoch': 3}
{'type': 'loss', 'content': 0.04805135726928711, 'timestamp': '2025-10-02 01:00:25.975340', 'step': 27641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:26.031796', 'step': 27641, 'epoch': 3}
{'type': 'loss', 'content': 0.055611610412597656, 'timestamp': '2025-10-02 01:00:26.036118', 'step': 27642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:26.093044', 'step': 27642, 'epoch': 3}
{'type': 'loss', 'content': 0.04074294492602348, 'timestamp': '2025-10-02 01:00:26.095985', 'step': 27643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:00:26.169674', 'step': 27643, 'epoch': 3}
{'type': 'loss', 'content': 0.0068534864112734795, 'timestamp': '2025-10-02 01:00:26.182858', 'step': 27644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:26.240665', 'step': 27644, 'epoch': 3}
{'type': 'loss', 'content': 0.031216450035572052, 'timestamp': '2025-10-02 01:00:26.244229', 'step': 27645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:26.302339', 'step': 27645, 'epoch': 3}
{'type': 'loss', 'content': 0.0411185547709465, 'timestamp': '2025-10-02 01:00:26.305808', 'step': 27646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:26.367718', 'step': 27646, 'epoch': 3}
{'type': 'loss', 'content': 0.015035883523523808, 'timestamp': '2025-10-02 01:00:26.377883', 'step': 27647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:26.436515', 'step': 27647, 'epoch': 3}
{'type': 'loss', 'content': 0.11391663551330566, 'timestamp': '2025-10-02 01:00:26.445406', 'step': 27648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:26.503263', 'step': 27648, 'epoch': 3}
{'type': 'loss', 'content': 0.12897519767284393, 'timestamp': '2025-10-02 01:00:26.506370', 'step': 27649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:26.562106', 'step': 27649, 'epoch': 3}
{'type': 'loss', 'content': 0.0720890462398529, 'timestamp': '2025-10-02 01:00:26.569655', 'step': 27650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:00:26.633098', 'step': 27650, 'epoch': 3}
{'type': 'loss', 'content': 0.008469226770102978, 'timestamp': '2025-10-02 01:00:26.643774', 'step': 27651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:26.703841', 'step': 27651, 'epoch': 3}
{'type': 'loss', 'content': 0.07318520545959473, 'timestamp': '2025-10-02 01:00:26.710511', 'step': 27652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:26.768490', 'step': 27652, 'epoch': 3}
{'type': 'loss', 'content': 0.014271481893956661, 'timestamp': '2025-10-02 01:00:26.775983', 'step': 27653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:26.832255', 'step': 27653, 'epoch': 3}
{'type': 'loss', 'content': 0.03659084811806679, 'timestamp': '2025-10-02 01:00:26.836892', 'step': 27654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:26.892036', 'step': 27654, 'epoch': 3}
{'type': 'loss', 'content': 0.030508069321513176, 'timestamp': '2025-10-02 01:00:26.899545', 'step': 27655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:26.956530', 'step': 27655, 'epoch': 3}
{'type': 'loss', 'content': 0.09029747545719147, 'timestamp': '2025-10-02 01:00:26.963317', 'step': 27656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:27.018332', 'step': 27656, 'epoch': 3}
{'type': 'loss', 'content': 0.11552755534648895, 'timestamp': '2025-10-02 01:00:27.021813', 'step': 27657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:27.076329', 'step': 27657, 'epoch': 3}
{'type': 'loss', 'content': 0.055448733270168304, 'timestamp': '2025-10-02 01:00:27.082283', 'step': 27658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:27.139622', 'step': 27658, 'epoch': 3}
{'type': 'loss', 'content': 0.052673764526844025, 'timestamp': '2025-10-02 01:00:27.145018', 'step': 27659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:00:27.213359', 'step': 27659, 'epoch': 3}
{'type': 'loss', 'content': 0.005676616914570332, 'timestamp': '2025-10-02 01:00:27.225016', 'step': 27660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:27.281857', 'step': 27660, 'epoch': 3}
{'type': 'loss', 'content': 0.021569881588220596, 'timestamp': '2025-10-02 01:00:27.287444', 'step': 27661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:27.344620', 'step': 27661, 'epoch': 3}
{'type': 'loss', 'content': 0.01709475740790367, 'timestamp': '2025-10-02 01:00:27.347549', 'step': 27662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:27.405753', 'step': 27662, 'epoch': 3}
{'type': 'loss', 'content': 0.03520883992314339, 'timestamp': '2025-10-02 01:00:27.408515', 'step': 27663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:27.462634', 'step': 27663, 'epoch': 3}
{'type': 'loss', 'content': 0.017354793846607208, 'timestamp': '2025-10-02 01:00:27.472786', 'step': 27664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:27.527232', 'step': 27664, 'epoch': 3}
{'type': 'loss', 'content': 0.014786492101848125, 'timestamp': '2025-10-02 01:00:27.537063', 'step': 27665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:27.592270', 'step': 27665, 'epoch': 3}
{'type': 'loss', 'content': 0.045447126030921936, 'timestamp': '2025-10-02 01:00:27.595049', 'step': 27666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:27.650714', 'step': 27666, 'epoch': 3}
{'type': 'loss', 'content': 0.02866850607097149, 'timestamp': '2025-10-02 01:00:27.653301', 'step': 27667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:27.716435', 'step': 27667, 'epoch': 3}
{'type': 'loss', 'content': 0.013303019106388092, 'timestamp': '2025-10-02 01:00:27.727615', 'step': 27668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:27.781849', 'step': 27668, 'epoch': 3}
{'type': 'loss', 'content': 0.04193735867738724, 'timestamp': '2025-10-02 01:00:27.784593', 'step': 27669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:27.838506', 'step': 27669, 'epoch': 3}
{'type': 'loss', 'content': 0.02199731580913067, 'timestamp': '2025-10-02 01:00:27.840912', 'step': 27670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:27.895915', 'step': 27670, 'epoch': 3}
{'type': 'loss', 'content': 0.005960635840892792, 'timestamp': '2025-10-02 01:00:27.903453', 'step': 27671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:27.960084', 'step': 27671, 'epoch': 3}
{'type': 'loss', 'content': 0.027367999777197838, 'timestamp': '2025-10-02 01:00:27.970401', 'step': 27672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:28.028183', 'step': 27672, 'epoch': 3}
{'type': 'loss', 'content': 0.041561391204595566, 'timestamp': '2025-10-02 01:00:28.034145', 'step': 27673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:28.089322', 'step': 27673, 'epoch': 3}
{'type': 'loss', 'content': 0.026692230254411697, 'timestamp': '2025-10-02 01:00:28.091923', 'step': 27674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:28.147761', 'step': 27674, 'epoch': 3}
{'type': 'loss', 'content': 0.02357863076031208, 'timestamp': '2025-10-02 01:00:28.150717', 'step': 27675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:28.209035', 'step': 27675, 'epoch': 3}
{'type': 'loss', 'content': 0.019006067886948586, 'timestamp': '2025-10-02 01:00:28.219342', 'step': 27676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:28.273002', 'step': 27676, 'epoch': 3}
{'type': 'loss', 'content': 0.035027772188186646, 'timestamp': '2025-10-02 01:00:28.278945', 'step': 27677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:00:28.350997', 'step': 27677, 'epoch': 3}
{'type': 'loss', 'content': 0.016737043857574463, 'timestamp': '2025-10-02 01:00:28.363439', 'step': 27678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:28.418681', 'step': 27678, 'epoch': 3}
{'type': 'loss', 'content': 0.27713513374328613, 'timestamp': '2025-10-02 01:00:28.421432', 'step': 27679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:28.475862', 'step': 27679, 'epoch': 3}
{'type': 'loss', 'content': 0.031123224645853043, 'timestamp': '2025-10-02 01:00:28.482689', 'step': 27680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:28.538283', 'step': 27680, 'epoch': 3}
{'type': 'loss', 'content': 0.014592020772397518, 'timestamp': '2025-10-02 01:00:28.547852', 'step': 27681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:28.602421', 'step': 27681, 'epoch': 3}
{'type': 'loss', 'content': 0.011054749600589275, 'timestamp': '2025-10-02 01:00:28.605375', 'step': 27682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:28.660356', 'step': 27682, 'epoch': 3}
{'type': 'loss', 'content': 0.012856203131377697, 'timestamp': '2025-10-02 01:00:28.667901', 'step': 27683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:28.725278', 'step': 27683, 'epoch': 3}
{'type': 'loss', 'content': 0.06260700523853302, 'timestamp': '2025-10-02 01:00:28.731619', 'step': 27684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:28.786183', 'step': 27684, 'epoch': 3}
{'type': 'loss', 'content': 0.012578514404594898, 'timestamp': '2025-10-02 01:00:28.788574', 'step': 27685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:28.843458', 'step': 27685, 'epoch': 3}
{'type': 'loss', 'content': 0.07634464651346207, 'timestamp': '2025-10-02 01:00:28.846356', 'step': 27686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:28.901037', 'step': 27686, 'epoch': 3}
{'type': 'loss', 'content': 0.04374656453728676, 'timestamp': '2025-10-02 01:00:28.906886', 'step': 27687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:28.961034', 'step': 27687, 'epoch': 3}
{'type': 'loss', 'content': 0.05538263916969299, 'timestamp': '2025-10-02 01:00:28.967271', 'step': 27688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:29.022013', 'step': 27688, 'epoch': 3}
{'type': 'loss', 'content': 0.07094191759824753, 'timestamp': '2025-10-02 01:00:29.024526', 'step': 27689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:29.079657', 'step': 27689, 'epoch': 3}
{'type': 'loss', 'content': 0.01987738162279129, 'timestamp': '2025-10-02 01:00:29.088927', 'step': 27690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:29.145022', 'step': 27690, 'epoch': 3}
{'type': 'loss', 'content': 0.023479321971535683, 'timestamp': '2025-10-02 01:00:29.154552', 'step': 27691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:29.208729', 'step': 27691, 'epoch': 3}
{'type': 'loss', 'content': 0.04633535444736481, 'timestamp': '2025-10-02 01:00:29.215395', 'step': 27692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:29.269525', 'step': 27692, 'epoch': 3}
{'type': 'loss', 'content': 0.06589262932538986, 'timestamp': '2025-10-02 01:00:29.276255', 'step': 27693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:29.331976', 'step': 27693, 'epoch': 3}
{'type': 'loss', 'content': 0.047670867294073105, 'timestamp': '2025-10-02 01:00:29.334276', 'step': 27694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:29.388661', 'step': 27694, 'epoch': 3}
{'type': 'loss', 'content': 0.06301479786634445, 'timestamp': '2025-10-02 01:00:29.391061', 'step': 27695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:29.445039', 'step': 27695, 'epoch': 3}
{'type': 'loss', 'content': 0.0805688127875328, 'timestamp': '2025-10-02 01:00:29.451419', 'step': 27696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:29.505379', 'step': 27696, 'epoch': 3}
{'type': 'loss', 'content': 0.06570015847682953, 'timestamp': '2025-10-02 01:00:29.515001', 'step': 27697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:29.570298', 'step': 27697, 'epoch': 3}
{'type': 'loss', 'content': 0.025400955229997635, 'timestamp': '2025-10-02 01:00:29.576394', 'step': 27698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:29.632937', 'step': 27698, 'epoch': 3}
{'type': 'loss', 'content': 0.015388531610369682, 'timestamp': '2025-10-02 01:00:29.638935', 'step': 27699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:29.699627', 'step': 27699, 'epoch': 3}
{'type': 'loss', 'content': 0.009276331402361393, 'timestamp': '2025-10-02 01:00:29.710598', 'step': 27700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:29.765292', 'step': 27700, 'epoch': 3}
{'type': 'loss', 'content': 0.02600170485675335, 'timestamp': '2025-10-02 01:00:29.767978', 'step': 27701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:29.821859', 'step': 27701, 'epoch': 3}
{'type': 'loss', 'content': 0.03751853108406067, 'timestamp': '2025-10-02 01:00:29.824364', 'step': 27702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:29.878903', 'step': 27702, 'epoch': 3}
{'type': 'loss', 'content': 0.034397322684526443, 'timestamp': '2025-10-02 01:00:29.881767', 'step': 27703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:00:29.942623', 'step': 27703, 'epoch': 3}
{'type': 'loss', 'content': 0.05973070114850998, 'timestamp': '2025-10-02 01:00:29.949427', 'step': 27704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:30.003073', 'step': 27704, 'epoch': 3}
{'type': 'loss', 'content': 0.038001108914613724, 'timestamp': '2025-10-02 01:00:30.005550', 'step': 27705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:30.060066', 'step': 27705, 'epoch': 3}
{'type': 'loss', 'content': 0.04020339623093605, 'timestamp': '2025-10-02 01:00:30.062560', 'step': 27706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:30.116842', 'step': 27706, 'epoch': 3}
{'type': 'loss', 'content': 0.020558418706059456, 'timestamp': '2025-10-02 01:00:30.122676', 'step': 27707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:00:30.185497', 'step': 27707, 'epoch': 3}
{'type': 'loss', 'content': 0.016200866550207138, 'timestamp': '2025-10-02 01:00:30.196907', 'step': 27708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:00:30.258809', 'step': 27708, 'epoch': 3}
{'type': 'loss', 'content': 0.0055673569440841675, 'timestamp': '2025-10-02 01:00:30.270610', 'step': 27709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:30.327141', 'step': 27709, 'epoch': 3}
{'type': 'loss', 'content': 0.042539991438388824, 'timestamp': '2025-10-02 01:00:30.329945', 'step': 27710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:30.386757', 'step': 27710, 'epoch': 3}
{'type': 'loss', 'content': 0.06488320976495743, 'timestamp': '2025-10-02 01:00:30.389373', 'step': 27711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:30.447924', 'step': 27711, 'epoch': 3}
{'type': 'loss', 'content': 0.07720983028411865, 'timestamp': '2025-10-02 01:00:30.454308', 'step': 27712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:30.510683', 'step': 27712, 'epoch': 3}
{'type': 'loss', 'content': 0.030748890712857246, 'timestamp': '2025-10-02 01:00:30.519782', 'step': 27713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:30.574310', 'step': 27713, 'epoch': 3}
{'type': 'loss', 'content': 0.09777017682790756, 'timestamp': '2025-10-02 01:00:30.576999', 'step': 27714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:00:30.631653', 'step': 27714, 'epoch': 3}
{'type': 'loss', 'content': 0.033516012132167816, 'timestamp': '2025-10-02 01:00:30.634336', 'step': 27715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:30.689080', 'step': 27715, 'epoch': 3}
{'type': 'loss', 'content': 0.06677442789077759, 'timestamp': '2025-10-02 01:00:30.695493', 'step': 27716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:30.749908', 'step': 27716, 'epoch': 3}
{'type': 'loss', 'content': 0.059443339705467224, 'timestamp': '2025-10-02 01:00:30.752824', 'step': 27717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:30.808403', 'step': 27717, 'epoch': 3}
{'type': 'loss', 'content': 0.05429365113377571, 'timestamp': '2025-10-02 01:00:30.811247', 'step': 27718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:30.866373', 'step': 27718, 'epoch': 3}
{'type': 'loss', 'content': 0.03619985654950142, 'timestamp': '2025-10-02 01:00:30.871938', 'step': 27719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:30.934769', 'step': 27719, 'epoch': 3}
{'type': 'loss', 'content': 0.030059225857257843, 'timestamp': '2025-10-02 01:00:30.946015', 'step': 27720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:31.002199', 'step': 27720, 'epoch': 3}
{'type': 'loss', 'content': 0.005101475398987532, 'timestamp': '2025-10-02 01:00:31.009375', 'step': 27721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:31.072023', 'step': 27721, 'epoch': 3}
{'type': 'loss', 'content': 0.01729774661362171, 'timestamp': '2025-10-02 01:00:31.082443', 'step': 27722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:31.137709', 'step': 27722, 'epoch': 3}
{'type': 'loss', 'content': 0.04047577828168869, 'timestamp': '2025-10-02 01:00:31.144656', 'step': 27723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:31.201311', 'step': 27723, 'epoch': 3}
{'type': 'loss', 'content': 0.031574271619319916, 'timestamp': '2025-10-02 01:00:31.209048', 'step': 27724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:31.264161', 'step': 27724, 'epoch': 3}
{'type': 'loss', 'content': 0.03915476053953171, 'timestamp': '2025-10-02 01:00:31.267008', 'step': 27725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:00:31.342638', 'step': 27725, 'epoch': 3}
{'type': 'loss', 'content': 0.030512483790516853, 'timestamp': '2025-10-02 01:00:31.356067', 'step': 27726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:31.420539', 'step': 27726, 'epoch': 3}
{'type': 'loss', 'content': 0.02793395332992077, 'timestamp': '2025-10-02 01:00:31.430973', 'step': 27727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:31.486530', 'step': 27727, 'epoch': 3}
{'type': 'loss', 'content': 0.026682527735829353, 'timestamp': '2025-10-02 01:00:31.492604', 'step': 27728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:31.547966', 'step': 27728, 'epoch': 3}
{'type': 'loss', 'content': 0.014950113371014595, 'timestamp': '2025-10-02 01:00:31.554696', 'step': 27729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:31.615157', 'step': 27729, 'epoch': 3}
{'type': 'loss', 'content': 0.023872731253504753, 'timestamp': '2025-10-02 01:00:31.625295', 'step': 27730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:31.680930', 'step': 27730, 'epoch': 3}
{'type': 'loss', 'content': 0.01698569767177105, 'timestamp': '2025-10-02 01:00:31.683603', 'step': 27731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:31.742345', 'step': 27731, 'epoch': 3}
{'type': 'loss', 'content': 0.044363733381032944, 'timestamp': '2025-10-02 01:00:31.748935', 'step': 27732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:31.804653', 'step': 27732, 'epoch': 3}
{'type': 'loss', 'content': 0.0824112668633461, 'timestamp': '2025-10-02 01:00:31.807136', 'step': 27733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:31.863929', 'step': 27733, 'epoch': 3}
{'type': 'loss', 'content': 0.016265666112303734, 'timestamp': '2025-10-02 01:00:31.866444', 'step': 27734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:31.922280', 'step': 27734, 'epoch': 3}
{'type': 'loss', 'content': 0.028261132538318634, 'timestamp': '2025-10-02 01:00:31.931805', 'step': 27735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:31.987895', 'step': 27735, 'epoch': 3}
{'type': 'loss', 'content': 0.011972615495324135, 'timestamp': '2025-10-02 01:00:31.998006', 'step': 27736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:32.052347', 'step': 27736, 'epoch': 3}
{'type': 'loss', 'content': 0.03374441713094711, 'timestamp': '2025-10-02 01:00:32.055019', 'step': 27737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:00:32.124986', 'step': 27737, 'epoch': 3}
{'type': 'loss', 'content': 0.014367122203111649, 'timestamp': '2025-10-02 01:00:32.137293', 'step': 27738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:32.194401', 'step': 27738, 'epoch': 3}
{'type': 'loss', 'content': 0.027018986642360687, 'timestamp': '2025-10-02 01:00:32.203887', 'step': 27739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:32.260414', 'step': 27739, 'epoch': 3}
{'type': 'loss', 'content': 0.021159488707780838, 'timestamp': '2025-10-02 01:00:32.267016', 'step': 27740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:32.329132', 'step': 27740, 'epoch': 3}
{'type': 'loss', 'content': 0.14457488059997559, 'timestamp': '2025-10-02 01:00:32.331512', 'step': 27741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:00:32.386133', 'step': 27741, 'epoch': 3}
{'type': 'loss', 'content': 0.01945449784398079, 'timestamp': '2025-10-02 01:00:32.389019', 'step': 27742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:32.444215', 'step': 27742, 'epoch': 3}
{'type': 'loss', 'content': 0.048008836805820465, 'timestamp': '2025-10-02 01:00:32.453531', 'step': 27743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:32.508627', 'step': 27743, 'epoch': 3}
{'type': 'loss', 'content': 0.11721399426460266, 'timestamp': '2025-10-02 01:00:32.515361', 'step': 27744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:32.571076', 'step': 27744, 'epoch': 3}
{'type': 'loss', 'content': 0.04849040135741234, 'timestamp': '2025-10-02 01:00:32.578384', 'step': 27745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:32.634051', 'step': 27745, 'epoch': 3}
{'type': 'loss', 'content': 0.06477560102939606, 'timestamp': '2025-10-02 01:00:32.639796', 'step': 27746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:32.695275', 'step': 27746, 'epoch': 3}
{'type': 'loss', 'content': 0.04965059086680412, 'timestamp': '2025-10-02 01:00:32.700658', 'step': 27747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:32.755089', 'step': 27747, 'epoch': 3}
{'type': 'loss', 'content': 0.056959979236125946, 'timestamp': '2025-10-02 01:00:32.761540', 'step': 27748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:32.815910', 'step': 27748, 'epoch': 3}
{'type': 'loss', 'content': 0.07036761194467545, 'timestamp': '2025-10-02 01:00:32.819850', 'step': 27749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:32.876720', 'step': 27749, 'epoch': 3}
{'type': 'loss', 'content': 0.008872096426784992, 'timestamp': '2025-10-02 01:00:32.884295', 'step': 27750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:32.946158', 'step': 27750, 'epoch': 3}
{'type': 'loss', 'content': 0.05507497116923332, 'timestamp': '2025-10-02 01:00:32.949239', 'step': 27751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:33.004409', 'step': 27751, 'epoch': 3}
{'type': 'loss', 'content': 0.012621019966900349, 'timestamp': '2025-10-02 01:00:33.012201', 'step': 27752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:33.066669', 'step': 27752, 'epoch': 3}
{'type': 'loss', 'content': 0.02060944028198719, 'timestamp': '2025-10-02 01:00:33.069107', 'step': 27753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:33.124304', 'step': 27753, 'epoch': 3}
{'type': 'loss', 'content': 0.1482475996017456, 'timestamp': '2025-10-02 01:00:33.127610', 'step': 27754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:33.182104', 'step': 27754, 'epoch': 3}
{'type': 'loss', 'content': 0.04274725541472435, 'timestamp': '2025-10-02 01:00:33.184885', 'step': 27755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:33.242215', 'step': 27755, 'epoch': 3}
{'type': 'loss', 'content': 0.0023290079552680254, 'timestamp': '2025-10-02 01:00:33.252517', 'step': 27756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:33.306826', 'step': 27756, 'epoch': 3}
{'type': 'loss', 'content': 0.05212157219648361, 'timestamp': '2025-10-02 01:00:33.310068', 'step': 27757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:33.369162', 'step': 27757, 'epoch': 3}
{'type': 'loss', 'content': 0.0251889917999506, 'timestamp': '2025-10-02 01:00:33.379355', 'step': 27758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:33.435636', 'step': 27758, 'epoch': 3}
{'type': 'loss', 'content': 0.024533933028578758, 'timestamp': '2025-10-02 01:00:33.445170', 'step': 27759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:33.500643', 'step': 27759, 'epoch': 3}
{'type': 'loss', 'content': 0.031562596559524536, 'timestamp': '2025-10-02 01:00:33.506984', 'step': 27760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:33.561714', 'step': 27760, 'epoch': 3}
{'type': 'loss', 'content': 0.047697484493255615, 'timestamp': '2025-10-02 01:00:33.564354', 'step': 27761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:33.618185', 'step': 27761, 'epoch': 3}
{'type': 'loss', 'content': 0.0761832520365715, 'timestamp': '2025-10-02 01:00:33.621003', 'step': 27762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:33.675916', 'step': 27762, 'epoch': 3}
{'type': 'loss', 'content': 0.05509199574589729, 'timestamp': '2025-10-02 01:00:33.678545', 'step': 27763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:33.733075', 'step': 27763, 'epoch': 3}
{'type': 'loss', 'content': 0.12591098248958588, 'timestamp': '2025-10-02 01:00:33.742915', 'step': 27764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:33.807195', 'step': 27764, 'epoch': 3}
{'type': 'loss', 'content': 0.11980527639389038, 'timestamp': '2025-10-02 01:00:33.809653', 'step': 27765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:33.863858', 'step': 27765, 'epoch': 3}
{'type': 'loss', 'content': 0.030496083199977875, 'timestamp': '2025-10-02 01:00:33.866500', 'step': 27766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:33.927636', 'step': 27766, 'epoch': 3}
{'type': 'loss', 'content': 0.0043853651732206345, 'timestamp': '2025-10-02 01:00:33.934913', 'step': 27767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:33.990013', 'step': 27767, 'epoch': 3}
{'type': 'loss', 'content': 0.03789641335606575, 'timestamp': '2025-10-02 01:00:33.996265', 'step': 27768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:34.056366', 'step': 27768, 'epoch': 3}
{'type': 'loss', 'content': 0.018461616709828377, 'timestamp': '2025-10-02 01:00:34.067371', 'step': 27769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:34.122089', 'step': 27769, 'epoch': 3}
{'type': 'loss', 'content': 0.04278694465756416, 'timestamp': '2025-10-02 01:00:34.124457', 'step': 27770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:34.193952', 'step': 27770, 'epoch': 3}
{'type': 'loss', 'content': 0.030418239533901215, 'timestamp': '2025-10-02 01:00:34.196549', 'step': 27771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:34.252247', 'step': 27771, 'epoch': 3}
{'type': 'loss', 'content': 0.11081764847040176, 'timestamp': '2025-10-02 01:00:34.258646', 'step': 27772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:34.315358', 'step': 27772, 'epoch': 3}
{'type': 'loss', 'content': 0.023181825876235962, 'timestamp': '2025-10-02 01:00:34.320985', 'step': 27773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:34.376055', 'step': 27773, 'epoch': 3}
{'type': 'loss', 'content': 0.046236552298069, 'timestamp': '2025-10-02 01:00:34.385411', 'step': 27774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:34.440962', 'step': 27774, 'epoch': 3}
{'type': 'loss', 'content': 0.023538624867796898, 'timestamp': '2025-10-02 01:00:34.444926', 'step': 27775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:34.505567', 'step': 27775, 'epoch': 3}
{'type': 'loss', 'content': 0.07013215869665146, 'timestamp': '2025-10-02 01:00:34.511985', 'step': 27776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:34.569752', 'step': 27776, 'epoch': 3}
{'type': 'loss', 'content': 0.06410186737775803, 'timestamp': '2025-10-02 01:00:34.571912', 'step': 27777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:00:34.625781', 'step': 27777, 'epoch': 3}
{'type': 'loss', 'content': 0.10945131629705429, 'timestamp': '2025-10-02 01:00:34.629634', 'step': 27778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:34.690636', 'step': 27778, 'epoch': 3}
{'type': 'loss', 'content': 0.04495372995734215, 'timestamp': '2025-10-02 01:00:34.696525', 'step': 27779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:34.751917', 'step': 27779, 'epoch': 3}
{'type': 'loss', 'content': 0.12821418046951294, 'timestamp': '2025-10-02 01:00:34.758329', 'step': 27780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:34.812935', 'step': 27780, 'epoch': 3}
{'type': 'loss', 'content': 0.05982835963368416, 'timestamp': '2025-10-02 01:00:34.816054', 'step': 27781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:00:34.880390', 'step': 27781, 'epoch': 3}
{'type': 'loss', 'content': 0.03048786334693432, 'timestamp': '2025-10-02 01:00:34.891019', 'step': 27782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:34.949034', 'step': 27782, 'epoch': 3}
{'type': 'loss', 'content': 0.06975284218788147, 'timestamp': '2025-10-02 01:00:34.953556', 'step': 27783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:35.012451', 'step': 27783, 'epoch': 3}
{'type': 'loss', 'content': 0.055771470069885254, 'timestamp': '2025-10-02 01:00:35.022858', 'step': 27784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:35.077624', 'step': 27784, 'epoch': 3}
{'type': 'loss', 'content': 0.029358740895986557, 'timestamp': '2025-10-02 01:00:35.081681', 'step': 27785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:35.140733', 'step': 27785, 'epoch': 3}
{'type': 'loss', 'content': 0.026701483875513077, 'timestamp': '2025-10-02 01:00:35.150331', 'step': 27786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:35.208706', 'step': 27786, 'epoch': 3}
{'type': 'loss', 'content': 0.04269137978553772, 'timestamp': '2025-10-02 01:00:35.212125', 'step': 27787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:35.270002', 'step': 27787, 'epoch': 3}
{'type': 'loss', 'content': 0.09915722161531448, 'timestamp': '2025-10-02 01:00:35.279767', 'step': 27788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:35.336934', 'step': 27788, 'epoch': 3}
{'type': 'loss', 'content': 0.06993523985147476, 'timestamp': '2025-10-02 01:00:35.347109', 'step': 27789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:35.407274', 'step': 27789, 'epoch': 3}
{'type': 'loss', 'content': 0.05587421730160713, 'timestamp': '2025-10-02 01:00:35.411069', 'step': 27790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:35.467866', 'step': 27790, 'epoch': 3}
{'type': 'loss', 'content': 0.09899139404296875, 'timestamp': '2025-10-02 01:00:35.470930', 'step': 27791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:35.527995', 'step': 27791, 'epoch': 3}
{'type': 'loss', 'content': 0.01625250093638897, 'timestamp': '2025-10-02 01:00:35.534162', 'step': 27792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:35.591802', 'step': 27792, 'epoch': 3}
{'type': 'loss', 'content': 0.06350281834602356, 'timestamp': '2025-10-02 01:00:35.594323', 'step': 27793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:35.649324', 'step': 27793, 'epoch': 3}
{'type': 'loss', 'content': 0.034667015075683594, 'timestamp': '2025-10-02 01:00:35.656634', 'step': 27794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:35.712781', 'step': 27794, 'epoch': 3}
{'type': 'loss', 'content': 0.039732255041599274, 'timestamp': '2025-10-02 01:00:35.727052', 'step': 27795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:35.785695', 'step': 27795, 'epoch': 3}
{'type': 'loss', 'content': 0.14499488472938538, 'timestamp': '2025-10-02 01:00:35.797603', 'step': 27796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:35.854431', 'step': 27796, 'epoch': 3}
{'type': 'loss', 'content': 0.06487994641065598, 'timestamp': '2025-10-02 01:00:35.857821', 'step': 27797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:35.927377', 'step': 27797, 'epoch': 3}
{'type': 'loss', 'content': 0.1098564937710762, 'timestamp': '2025-10-02 01:00:35.934302', 'step': 27798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:36.004656', 'step': 27798, 'epoch': 3}
{'type': 'loss', 'content': 0.06668950617313385, 'timestamp': '2025-10-02 01:00:36.011730', 'step': 27799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:36.076273', 'step': 27799, 'epoch': 3}
{'type': 'loss', 'content': 0.2003687024116516, 'timestamp': '2025-10-02 01:00:36.086470', 'step': 27800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:36.143094', 'step': 27800, 'epoch': 3}
{'type': 'loss', 'content': 0.039720579981803894, 'timestamp': '2025-10-02 01:00:36.145288', 'step': 27801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:36.200358', 'step': 27801, 'epoch': 3}
{'type': 'loss', 'content': 0.018247364088892937, 'timestamp': '2025-10-02 01:00:36.206063', 'step': 27802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:36.267921', 'step': 27802, 'epoch': 3}
{'type': 'loss', 'content': 0.052573297172784805, 'timestamp': '2025-10-02 01:00:36.275448', 'step': 27803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:36.335377', 'step': 27803, 'epoch': 3}
{'type': 'loss', 'content': 0.08808249980211258, 'timestamp': '2025-10-02 01:00:36.341770', 'step': 27804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:36.397824', 'step': 27804, 'epoch': 3}
{'type': 'loss', 'content': 0.03235749900341034, 'timestamp': '2025-10-02 01:00:36.400228', 'step': 27805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:36.455979', 'step': 27805, 'epoch': 3}
{'type': 'loss', 'content': 0.03202904760837555, 'timestamp': '2025-10-02 01:00:36.458930', 'step': 27806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:36.516410', 'step': 27806, 'epoch': 3}
{'type': 'loss', 'content': 0.09829612076282501, 'timestamp': '2025-10-02 01:00:36.518773', 'step': 27807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:36.576179', 'step': 27807, 'epoch': 3}
{'type': 'loss', 'content': 0.02625115215778351, 'timestamp': '2025-10-02 01:00:36.585201', 'step': 27808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:36.642646', 'step': 27808, 'epoch': 3}
{'type': 'loss', 'content': 0.09013229608535767, 'timestamp': '2025-10-02 01:00:36.646567', 'step': 27809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:36.707910', 'step': 27809, 'epoch': 3}
{'type': 'loss', 'content': 0.024349771440029144, 'timestamp': '2025-10-02 01:00:36.717440', 'step': 27810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:00:36.784249', 'step': 27810, 'epoch': 3}
{'type': 'loss', 'content': 0.06956523656845093, 'timestamp': '2025-10-02 01:00:36.786702', 'step': 27811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:36.843106', 'step': 27811, 'epoch': 3}
{'type': 'loss', 'content': 0.03331390395760536, 'timestamp': '2025-10-02 01:00:36.849168', 'step': 27812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:36.904091', 'step': 27812, 'epoch': 3}
{'type': 'loss', 'content': 0.012565342709422112, 'timestamp': '2025-10-02 01:00:36.906569', 'step': 27813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:36.960929', 'step': 27813, 'epoch': 3}
{'type': 'loss', 'content': 0.02863277867436409, 'timestamp': '2025-10-02 01:00:36.966917', 'step': 27814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:00:37.043804', 'step': 27814, 'epoch': 3}
{'type': 'loss', 'content': 0.043988119810819626, 'timestamp': '2025-10-02 01:00:37.056486', 'step': 27815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:37.111598', 'step': 27815, 'epoch': 3}
{'type': 'loss', 'content': 0.04501758888363838, 'timestamp': '2025-10-02 01:00:37.117435', 'step': 27816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:37.172205', 'step': 27816, 'epoch': 3}
{'type': 'loss', 'content': 0.014932667836546898, 'timestamp': '2025-10-02 01:00:37.175025', 'step': 27817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:00:37.231041', 'step': 27817, 'epoch': 3}
{'type': 'loss', 'content': 0.031100362539291382, 'timestamp': '2025-10-02 01:00:37.240539', 'step': 27818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:00:37.303132', 'step': 27818, 'epoch': 3}
{'type': 'loss', 'content': 0.03159886598587036, 'timestamp': '2025-10-02 01:00:37.313583', 'step': 27819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:37.368635', 'step': 27819, 'epoch': 3}
{'type': 'loss', 'content': 0.027004363015294075, 'timestamp': '2025-10-02 01:00:37.375306', 'step': 27820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:00:37.429884', 'step': 27820, 'epoch': 3}
{'type': 'loss', 'content': 0.02677225135266781, 'timestamp': '2025-10-02 01:00:37.432361', 'step': 27821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:00:37.486944', 'step': 27821, 'epoch': 3}
{'type': 'loss', 'content': 0.02766025997698307, 'timestamp': '2025-10-02 01:00:37.489148', 'step': 27822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:00:37.548113', 'step': 27822, 'epoch': 3}
{'type': 'loss', 'content': 0.008970505557954311, 'timestamp': '2025-10-02 01:00:37.558280', 'step': 27823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:37.614049', 'step': 27823, 'epoch': 3}
{'type': 'loss', 'content': 0.01904447004199028, 'timestamp': '2025-10-02 01:00:37.620447', 'step': 27824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:37.675388', 'step': 27824, 'epoch': 3}
{'type': 'loss', 'content': 0.08058202266693115, 'timestamp': '2025-10-02 01:00:37.677996', 'step': 27825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:37.732807', 'step': 27825, 'epoch': 3}
{'type': 'loss', 'content': 0.026064706966280937, 'timestamp': '2025-10-02 01:00:37.735158', 'step': 27826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:00:37.790473', 'step': 27826, 'epoch': 3}
{'type': 'loss', 'content': 0.044154148548841476, 'timestamp': '2025-10-02 01:00:37.792904', 'step': 27827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:00:37.846683', 'step': 27827, 'epoch': 3}
{'type': 'loss', 'content': 0.12765833735466003, 'timestamp': '2025-10-02 01:00:37.852991', 'step': 27828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:00:37.907320', 'step': 27828, 'epoch': 3}
{'type': 'loss', 'content': 0.04330936074256897, 'timestamp': '2025-10-02 01:00:37.912928', 'step': 27829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:37.968265', 'step': 27829, 'epoch': 3}
{'type': 'loss', 'content': 0.06461367011070251, 'timestamp': '2025-10-02 01:00:37.975440', 'step': 27830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:00:38.030530', 'step': 27830, 'epoch': 3}
{'type': 'loss', 'content': 0.021604228764772415, 'timestamp': '2025-10-02 01:00:38.039885', 'step': 27831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:38.093999', 'step': 27831, 'epoch': 3}
{'type': 'loss', 'content': 0.03752255439758301, 'timestamp': '2025-10-02 01:00:38.100583', 'step': 27832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:38.154993', 'step': 27832, 'epoch': 3}
{'type': 'loss', 'content': 0.049953483045101166, 'timestamp': '2025-10-02 01:00:38.157920', 'step': 27833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:38.213436', 'step': 27833, 'epoch': 3}
{'type': 'loss', 'content': 0.05275959149003029, 'timestamp': '2025-10-02 01:00:38.216011', 'step': 27834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:00:38.270774', 'step': 27834, 'epoch': 3}
{'type': 'loss', 'content': 0.06001921743154526, 'timestamp': '2025-10-02 01:00:38.273331', 'step': 27835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:00:38.327409', 'step': 27835, 'epoch': 3}
{'type': 'loss', 'content': 0.03483419492840767, 'timestamp': '2025-10-02 01:00:38.333455', 'step': 27836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:00:38.387556', 'step': 27836, 'epoch': 3}
{'type': 'loss', 'content': 0.04208941385149956, 'timestamp': '2025-10-02 01:00:38.390100', 'step': 27837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:00:38.445889', 'step': 27837, 'epoch': 3}
{'type': 'loss', 'content': 0.04050638526678085, 'timestamp': '2025-10-02 01:00:38.448051', 'step': 27838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:00:38.503235', 'step': 27838, 'epoch': 3}
{'type': 'loss', 'content': 0.07398203015327454, 'timestamp': '2025-10-02 01:00:38.510316', 'step': 27839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:00:38.566218', 'step': 27839, 'epoch': 3}
{'type': 'loss', 'content': 0.052446600049734116, 'timestamp': '2025-10-02 01:00:38.572204', 'step': 27840, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 01:01:05.854694', 'step': 27840, 'epoch': 3}
{'type': 'pplx', 'content': 90.0884359254528, 'timestamp': '2025-10-02 01:01:05.860947', 'step': 27840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:05.922143', 'step': 27840, 'epoch': 3}
{'type': 'loss', 'content': 0.02299598604440689, 'timestamp': '2025-10-02 01:01:05.930900', 'step': 27841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:05.995226', 'step': 27841, 'epoch': 3}
{'type': 'loss', 'content': 0.020893188193440437, 'timestamp': '2025-10-02 01:01:06.005429', 'step': 27842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:06.085306', 'step': 27842, 'epoch': 3}
{'type': 'loss', 'content': 0.121924489736557, 'timestamp': '2025-10-02 01:01:06.088099', 'step': 27843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:06.151949', 'step': 27843, 'epoch': 3}
{'type': 'loss', 'content': 0.043994296342134476, 'timestamp': '2025-10-02 01:01:06.159441', 'step': 27844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:06.235590', 'step': 27844, 'epoch': 3}
{'type': 'loss', 'content': 0.04686739668250084, 'timestamp': '2025-10-02 01:01:06.246475', 'step': 27845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:06.319059', 'step': 27845, 'epoch': 3}
{'type': 'loss', 'content': 0.059145499020814896, 'timestamp': '2025-10-02 01:01:06.322448', 'step': 27846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:06.383511', 'step': 27846, 'epoch': 3}
{'type': 'loss', 'content': 0.02483821101486683, 'timestamp': '2025-10-02 01:01:06.387241', 'step': 27847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:06.449165', 'step': 27847, 'epoch': 3}
{'type': 'loss', 'content': 0.11042340844869614, 'timestamp': '2025-10-02 01:01:06.460498', 'step': 27848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:06.521364', 'step': 27848, 'epoch': 3}
{'type': 'loss', 'content': 0.0943542867898941, 'timestamp': '2025-10-02 01:01:06.530805', 'step': 27849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:06.604365', 'step': 27849, 'epoch': 3}
{'type': 'loss', 'content': 0.007117376197129488, 'timestamp': '2025-10-02 01:01:06.614216', 'step': 27850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:06.682437', 'step': 27850, 'epoch': 3}
{'type': 'loss', 'content': 0.002090458059683442, 'timestamp': '2025-10-02 01:01:06.693116', 'step': 27851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:06.757520', 'step': 27851, 'epoch': 3}
{'type': 'loss', 'content': 0.03793792426586151, 'timestamp': '2025-10-02 01:01:06.767818', 'step': 27852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:06.832212', 'step': 27852, 'epoch': 3}
{'type': 'loss', 'content': 0.11708162724971771, 'timestamp': '2025-10-02 01:01:06.841496', 'step': 27853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:06.911443', 'step': 27853, 'epoch': 3}
{'type': 'loss', 'content': 0.03229384124279022, 'timestamp': '2025-10-02 01:01:06.930708', 'step': 27854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:07.000669', 'step': 27854, 'epoch': 3}
{'type': 'loss', 'content': 0.03096490539610386, 'timestamp': '2025-10-02 01:01:07.007998', 'step': 27855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:07.087841', 'step': 27855, 'epoch': 3}
{'type': 'loss', 'content': 0.0908019095659256, 'timestamp': '2025-10-02 01:01:07.096053', 'step': 27856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:07.166741', 'step': 27856, 'epoch': 3}
{'type': 'loss', 'content': 0.08938571065664291, 'timestamp': '2025-10-02 01:01:07.177115', 'step': 27857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:07.259868', 'step': 27857, 'epoch': 3}
{'type': 'loss', 'content': 0.0022971106227487326, 'timestamp': '2025-10-02 01:01:07.301090', 'step': 27858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:07.397706', 'step': 27858, 'epoch': 3}
{'type': 'loss', 'content': 0.03431055322289467, 'timestamp': '2025-10-02 01:01:07.409569', 'step': 27859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:07.504833', 'step': 27859, 'epoch': 3}
{'type': 'loss', 'content': 0.06364471465349197, 'timestamp': '2025-10-02 01:01:07.532408', 'step': 27860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:07.615874', 'step': 27860, 'epoch': 3}
{'type': 'loss', 'content': 0.006552957464009523, 'timestamp': '2025-10-02 01:01:07.626642', 'step': 27861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:07.701656', 'step': 27861, 'epoch': 3}
{'type': 'loss', 'content': 0.07704798132181168, 'timestamp': '2025-10-02 01:01:07.712653', 'step': 27862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:07.808753', 'step': 27862, 'epoch': 3}
{'type': 'loss', 'content': 0.06343556940555573, 'timestamp': '2025-10-02 01:01:07.822632', 'step': 27863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:07.916978', 'step': 27863, 'epoch': 3}
{'type': 'loss', 'content': 0.0540228933095932, 'timestamp': '2025-10-02 01:01:07.930049', 'step': 27864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:08.026448', 'step': 27864, 'epoch': 3}
{'type': 'loss', 'content': 0.04450498893857002, 'timestamp': '2025-10-02 01:01:08.039495', 'step': 27865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:08.129445', 'step': 27865, 'epoch': 3}
{'type': 'loss', 'content': 0.05533762276172638, 'timestamp': '2025-10-02 01:01:08.152644', 'step': 27866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:08.225714', 'step': 27866, 'epoch': 3}
{'type': 'loss', 'content': 0.021087665110826492, 'timestamp': '2025-10-02 01:01:08.242301', 'step': 27867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:08.323175', 'step': 27867, 'epoch': 3}
{'type': 'loss', 'content': 0.035090506076812744, 'timestamp': '2025-10-02 01:01:08.343354', 'step': 27868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:08.427817', 'step': 27868, 'epoch': 3}
{'type': 'loss', 'content': 0.053738534450531006, 'timestamp': '2025-10-02 01:01:08.461771', 'step': 27869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:08.540299', 'step': 27869, 'epoch': 3}
{'type': 'loss', 'content': 0.11658705025911331, 'timestamp': '2025-10-02 01:01:08.545450', 'step': 27870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:08.625321', 'step': 27870, 'epoch': 3}
{'type': 'loss', 'content': 0.05336267128586769, 'timestamp': '2025-10-02 01:01:08.634769', 'step': 27871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:08.702302', 'step': 27871, 'epoch': 3}
{'type': 'loss', 'content': 0.013704068027436733, 'timestamp': '2025-10-02 01:01:08.708902', 'step': 27872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:01:08.795228', 'step': 27872, 'epoch': 3}
{'type': 'loss', 'content': 0.03967568278312683, 'timestamp': '2025-10-02 01:01:08.806938', 'step': 27873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:08.881582', 'step': 27873, 'epoch': 3}
{'type': 'loss', 'content': 0.04019384831190109, 'timestamp': '2025-10-02 01:01:08.888838', 'step': 27874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:08.951437', 'step': 27874, 'epoch': 3}
{'type': 'loss', 'content': 0.08988087624311447, 'timestamp': '2025-10-02 01:01:08.954305', 'step': 27875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:09.010920', 'step': 27875, 'epoch': 3}
{'type': 'loss', 'content': 0.04221126437187195, 'timestamp': '2025-10-02 01:01:09.017424', 'step': 27876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:09.093883', 'step': 27876, 'epoch': 3}
{'type': 'loss', 'content': 0.04435540363192558, 'timestamp': '2025-10-02 01:01:09.101244', 'step': 27877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:09.175895', 'step': 27877, 'epoch': 3}
{'type': 'loss', 'content': 0.051666438579559326, 'timestamp': '2025-10-02 01:01:09.186517', 'step': 27878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:09.260655', 'step': 27878, 'epoch': 3}
{'type': 'loss', 'content': 0.0576038621366024, 'timestamp': '2025-10-02 01:01:09.268954', 'step': 27879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:09.343500', 'step': 27879, 'epoch': 3}
{'type': 'loss', 'content': 0.07978779822587967, 'timestamp': '2025-10-02 01:01:09.356703', 'step': 27880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:09.420354', 'step': 27880, 'epoch': 3}
{'type': 'loss', 'content': 0.037154000252485275, 'timestamp': '2025-10-02 01:01:09.430806', 'step': 27881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:09.500672', 'step': 27881, 'epoch': 3}
{'type': 'loss', 'content': 0.02018146589398384, 'timestamp': '2025-10-02 01:01:09.504540', 'step': 27882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:09.565789', 'step': 27882, 'epoch': 3}
{'type': 'loss', 'content': 0.08806072175502777, 'timestamp': '2025-10-02 01:01:09.568910', 'step': 27883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:09.642501', 'step': 27883, 'epoch': 3}
{'type': 'loss', 'content': 0.030834544450044632, 'timestamp': '2025-10-02 01:01:09.650584', 'step': 27884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:01:09.719279', 'step': 27884, 'epoch': 3}
{'type': 'loss', 'content': 0.10482365638017654, 'timestamp': '2025-10-02 01:01:09.722229', 'step': 27885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:09.789632', 'step': 27885, 'epoch': 3}
{'type': 'loss', 'content': 0.06690609455108643, 'timestamp': '2025-10-02 01:01:09.795544', 'step': 27886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:09.852505', 'step': 27886, 'epoch': 3}
{'type': 'loss', 'content': 0.030089134350419044, 'timestamp': '2025-10-02 01:01:09.856393', 'step': 27887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:09.926268', 'step': 27887, 'epoch': 3}
{'type': 'loss', 'content': 0.04595495015382767, 'timestamp': '2025-10-02 01:01:09.938444', 'step': 27888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:10.007759', 'step': 27888, 'epoch': 3}
{'type': 'loss', 'content': 0.10465624183416367, 'timestamp': '2025-10-02 01:01:10.011485', 'step': 27889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:10.076977', 'step': 27889, 'epoch': 3}
{'type': 'loss', 'content': 0.10678736865520477, 'timestamp': '2025-10-02 01:01:10.080547', 'step': 27890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:10.154666', 'step': 27890, 'epoch': 3}
{'type': 'loss', 'content': 0.1536455601453781, 'timestamp': '2025-10-02 01:01:10.157947', 'step': 27891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:10.236917', 'step': 27891, 'epoch': 3}
{'type': 'loss', 'content': 0.03934773802757263, 'timestamp': '2025-10-02 01:01:10.248301', 'step': 27892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:10.318163', 'step': 27892, 'epoch': 3}
{'type': 'loss', 'content': 0.029737791046500206, 'timestamp': '2025-10-02 01:01:10.332863', 'step': 27893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:10.414126', 'step': 27893, 'epoch': 3}
{'type': 'loss', 'content': 0.06301819533109665, 'timestamp': '2025-10-02 01:01:10.421999', 'step': 27894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:10.479846', 'step': 27894, 'epoch': 3}
{'type': 'loss', 'content': 0.010938180610537529, 'timestamp': '2025-10-02 01:01:10.483399', 'step': 27895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:10.552015', 'step': 27895, 'epoch': 3}
{'type': 'loss', 'content': 0.007774859666824341, 'timestamp': '2025-10-02 01:01:10.563794', 'step': 27896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:10.627094', 'step': 27896, 'epoch': 3}
{'type': 'loss', 'content': 0.04068468138575554, 'timestamp': '2025-10-02 01:01:10.636262', 'step': 27897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:10.712256', 'step': 27897, 'epoch': 3}
{'type': 'loss', 'content': 0.029051383957266808, 'timestamp': '2025-10-02 01:01:10.715661', 'step': 27898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:10.778672', 'step': 27898, 'epoch': 3}
{'type': 'loss', 'content': 0.022590722888708115, 'timestamp': '2025-10-02 01:01:10.788542', 'step': 27899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:10.845730', 'step': 27899, 'epoch': 3}
{'type': 'loss', 'content': 0.001353223342448473, 'timestamp': '2025-10-02 01:01:10.853290', 'step': 27900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:10.914306', 'step': 27900, 'epoch': 3}
{'type': 'loss', 'content': 0.10052742063999176, 'timestamp': '2025-10-02 01:01:10.922577', 'step': 27901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:10.997948', 'step': 27901, 'epoch': 3}
{'type': 'loss', 'content': 0.07374086230993271, 'timestamp': '2025-10-02 01:01:11.005635', 'step': 27902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:11.067106', 'step': 27902, 'epoch': 3}
{'type': 'loss', 'content': 0.030777622014284134, 'timestamp': '2025-10-02 01:01:11.078108', 'step': 27903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:11.146196', 'step': 27903, 'epoch': 3}
{'type': 'loss', 'content': 0.051319416612386703, 'timestamp': '2025-10-02 01:01:11.159729', 'step': 27904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:11.244292', 'step': 27904, 'epoch': 3}
{'type': 'loss', 'content': 0.06918346136808395, 'timestamp': '2025-10-02 01:01:11.247556', 'step': 27905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:11.321449', 'step': 27905, 'epoch': 3}
{'type': 'loss', 'content': 0.017265545204281807, 'timestamp': '2025-10-02 01:01:11.334244', 'step': 27906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:11.417844', 'step': 27906, 'epoch': 3}
{'type': 'loss', 'content': 0.06805719435214996, 'timestamp': '2025-10-02 01:01:11.431910', 'step': 27907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:11.509115', 'step': 27907, 'epoch': 3}
{'type': 'loss', 'content': 0.019912581890821457, 'timestamp': '2025-10-02 01:01:11.520728', 'step': 27908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:11.594982', 'step': 27908, 'epoch': 3}
{'type': 'loss', 'content': 0.01681017503142357, 'timestamp': '2025-10-02 01:01:11.605222', 'step': 27909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:11.681610', 'step': 27909, 'epoch': 3}
{'type': 'loss', 'content': 0.11338848620653152, 'timestamp': '2025-10-02 01:01:11.689962', 'step': 27910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:11.757331', 'step': 27910, 'epoch': 3}
{'type': 'loss', 'content': 0.07628189027309418, 'timestamp': '2025-10-02 01:01:11.760775', 'step': 27911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:11.821915', 'step': 27911, 'epoch': 3}
{'type': 'loss', 'content': 0.04192622750997543, 'timestamp': '2025-10-02 01:01:11.832918', 'step': 27912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:11.910412', 'step': 27912, 'epoch': 3}
{'type': 'loss', 'content': 0.05226368084549904, 'timestamp': '2025-10-02 01:01:11.920017', 'step': 27913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:12.023216', 'step': 27913, 'epoch': 3}
{'type': 'loss', 'content': 0.07705213129520416, 'timestamp': '2025-10-02 01:01:12.027722', 'step': 27914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:12.114891', 'step': 27914, 'epoch': 3}
{'type': 'loss', 'content': 0.015050589106976986, 'timestamp': '2025-10-02 01:01:12.126033', 'step': 27915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:12.201267', 'step': 27915, 'epoch': 3}
{'type': 'loss', 'content': 0.022217167541384697, 'timestamp': '2025-10-02 01:01:12.209191', 'step': 27916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:12.265656', 'step': 27916, 'epoch': 3}
{'type': 'loss', 'content': 0.011928621679544449, 'timestamp': '2025-10-02 01:01:12.270632', 'step': 27917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:12.330395', 'step': 27917, 'epoch': 3}
{'type': 'loss', 'content': 0.039421312510967255, 'timestamp': '2025-10-02 01:01:12.333083', 'step': 27918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:12.408879', 'step': 27918, 'epoch': 3}
{'type': 'loss', 'content': 0.07150501757860184, 'timestamp': '2025-10-02 01:01:12.413874', 'step': 27919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:12.481684', 'step': 27919, 'epoch': 3}
{'type': 'loss', 'content': 0.03511406108736992, 'timestamp': '2025-10-02 01:01:12.492634', 'step': 27920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:12.548327', 'step': 27920, 'epoch': 3}
{'type': 'loss', 'content': 0.049522802233695984, 'timestamp': '2025-10-02 01:01:12.555548', 'step': 27921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:12.615285', 'step': 27921, 'epoch': 3}
{'type': 'loss', 'content': 0.045378465205430984, 'timestamp': '2025-10-02 01:01:12.621458', 'step': 27922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:12.690511', 'step': 27922, 'epoch': 3}
{'type': 'loss', 'content': 0.001105823670513928, 'timestamp': '2025-10-02 01:01:12.695656', 'step': 27923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:12.752820', 'step': 27923, 'epoch': 3}
{'type': 'loss', 'content': 0.06537873297929764, 'timestamp': '2025-10-02 01:01:12.759273', 'step': 27924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:12.819325', 'step': 27924, 'epoch': 3}
{'type': 'loss', 'content': 0.011565463617444038, 'timestamp': '2025-10-02 01:01:12.828883', 'step': 27925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:12.886613', 'step': 27925, 'epoch': 3}
{'type': 'loss', 'content': 0.02547234669327736, 'timestamp': '2025-10-02 01:01:12.889329', 'step': 27926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:12.952520', 'step': 27926, 'epoch': 3}
{'type': 'loss', 'content': 0.04416608065366745, 'timestamp': '2025-10-02 01:01:12.955421', 'step': 27927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:13.016801', 'step': 27927, 'epoch': 3}
{'type': 'loss', 'content': 0.018376709893345833, 'timestamp': '2025-10-02 01:01:13.022629', 'step': 27928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:13.082001', 'step': 27928, 'epoch': 3}
{'type': 'loss', 'content': 0.027904167771339417, 'timestamp': '2025-10-02 01:01:13.088001', 'step': 27929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:13.145678', 'step': 27929, 'epoch': 3}
{'type': 'loss', 'content': 0.02673976495862007, 'timestamp': '2025-10-02 01:01:13.148755', 'step': 27930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:13.208113', 'step': 27930, 'epoch': 3}
{'type': 'loss', 'content': 0.01791512221097946, 'timestamp': '2025-10-02 01:01:13.212183', 'step': 27931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:13.276223', 'step': 27931, 'epoch': 3}
{'type': 'loss', 'content': 0.04924864321947098, 'timestamp': '2025-10-02 01:01:13.282683', 'step': 27932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:13.343028', 'step': 27932, 'epoch': 3}
{'type': 'loss', 'content': 0.09660623222589493, 'timestamp': '2025-10-02 01:01:13.346037', 'step': 27933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:13.415953', 'step': 27933, 'epoch': 3}
{'type': 'loss', 'content': 0.002645449945703149, 'timestamp': '2025-10-02 01:01:13.426444', 'step': 27934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:13.488986', 'step': 27934, 'epoch': 3}
{'type': 'loss', 'content': 0.01876683533191681, 'timestamp': '2025-10-02 01:01:13.496544', 'step': 27935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:13.559631', 'step': 27935, 'epoch': 3}
{'type': 'loss', 'content': 0.03979578614234924, 'timestamp': '2025-10-02 01:01:13.570570', 'step': 27936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:13.639593', 'step': 27936, 'epoch': 3}
{'type': 'loss', 'content': 0.013875090517103672, 'timestamp': '2025-10-02 01:01:13.650931', 'step': 27937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:13.710104', 'step': 27937, 'epoch': 3}
{'type': 'loss', 'content': 0.07356835156679153, 'timestamp': '2025-10-02 01:01:13.718049', 'step': 27938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:13.791870', 'step': 27938, 'epoch': 3}
{'type': 'loss', 'content': 0.05375117063522339, 'timestamp': '2025-10-02 01:01:13.795257', 'step': 27939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:13.852757', 'step': 27939, 'epoch': 3}
{'type': 'loss', 'content': 0.026236003264784813, 'timestamp': '2025-10-02 01:01:13.865052', 'step': 27940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:13.932798', 'step': 27940, 'epoch': 3}
{'type': 'loss', 'content': 0.03408190980553627, 'timestamp': '2025-10-02 01:01:13.938798', 'step': 27941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:13.998851', 'step': 27941, 'epoch': 3}
{'type': 'loss', 'content': 0.029069332405924797, 'timestamp': '2025-10-02 01:01:14.002388', 'step': 27942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:14.065379', 'step': 27942, 'epoch': 3}
{'type': 'loss', 'content': 0.10182704776525497, 'timestamp': '2025-10-02 01:01:14.069061', 'step': 27943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:14.130317', 'step': 27943, 'epoch': 3}
{'type': 'loss', 'content': 0.12450020760297775, 'timestamp': '2025-10-02 01:01:14.137099', 'step': 27944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:14.193265', 'step': 27944, 'epoch': 3}
{'type': 'loss', 'content': 0.00466886255890131, 'timestamp': '2025-10-02 01:01:14.203002', 'step': 27945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:14.259423', 'step': 27945, 'epoch': 3}
{'type': 'loss', 'content': 0.06533947587013245, 'timestamp': '2025-10-02 01:01:14.262236', 'step': 27946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:14.318131', 'step': 27946, 'epoch': 3}
{'type': 'loss', 'content': 0.022407829761505127, 'timestamp': '2025-10-02 01:01:14.321158', 'step': 27947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:14.377783', 'step': 27947, 'epoch': 3}
{'type': 'loss', 'content': 0.02611882984638214, 'timestamp': '2025-10-02 01:01:14.388081', 'step': 27948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:14.445270', 'step': 27948, 'epoch': 3}
{'type': 'loss', 'content': 0.09879706054925919, 'timestamp': '2025-10-02 01:01:14.451548', 'step': 27949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:14.522889', 'step': 27949, 'epoch': 3}
{'type': 'loss', 'content': 0.03726004436612129, 'timestamp': '2025-10-02 01:01:14.533368', 'step': 27950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:14.594509', 'step': 27950, 'epoch': 3}
{'type': 'loss', 'content': 0.012111762538552284, 'timestamp': '2025-10-02 01:01:14.598588', 'step': 27951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:14.659761', 'step': 27951, 'epoch': 3}
{'type': 'loss', 'content': 0.06334728747606277, 'timestamp': '2025-10-02 01:01:14.666898', 'step': 27952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:14.722947', 'step': 27952, 'epoch': 3}
{'type': 'loss', 'content': 0.006295431405305862, 'timestamp': '2025-10-02 01:01:14.725884', 'step': 27953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:14.787222', 'step': 27953, 'epoch': 3}
{'type': 'loss', 'content': 0.08234372735023499, 'timestamp': '2025-10-02 01:01:14.794644', 'step': 27954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:14.851738', 'step': 27954, 'epoch': 3}
{'type': 'loss', 'content': 0.03786255791783333, 'timestamp': '2025-10-02 01:01:14.861082', 'step': 27955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:14.923633', 'step': 27955, 'epoch': 3}
{'type': 'loss', 'content': 0.03381258621811867, 'timestamp': '2025-10-02 01:01:14.934189', 'step': 27956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:14.995890', 'step': 27956, 'epoch': 3}
{'type': 'loss', 'content': 0.03548508509993553, 'timestamp': '2025-10-02 01:01:14.999433', 'step': 27957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:15.056686', 'step': 27957, 'epoch': 3}
{'type': 'loss', 'content': 0.005255227908492088, 'timestamp': '2025-10-02 01:01:15.064315', 'step': 27958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:15.121873', 'step': 27958, 'epoch': 3}
{'type': 'loss', 'content': 0.044896118342876434, 'timestamp': '2025-10-02 01:01:15.131186', 'step': 27959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:15.193870', 'step': 27959, 'epoch': 3}
{'type': 'loss', 'content': 0.029180852696299553, 'timestamp': '2025-10-02 01:01:15.202352', 'step': 27960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:15.275994', 'step': 27960, 'epoch': 3}
{'type': 'loss', 'content': 0.017800163477659225, 'timestamp': '2025-10-02 01:01:15.278543', 'step': 27961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:15.335001', 'step': 27961, 'epoch': 3}
{'type': 'loss', 'content': 0.04400930926203728, 'timestamp': '2025-10-02 01:01:15.338516', 'step': 27962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:15.399519', 'step': 27962, 'epoch': 3}
{'type': 'loss', 'content': 0.02141260728240013, 'timestamp': '2025-10-02 01:01:15.408862', 'step': 27963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:15.465264', 'step': 27963, 'epoch': 3}
{'type': 'loss', 'content': 0.06141181290149689, 'timestamp': '2025-10-02 01:01:15.472048', 'step': 27964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:15.533051', 'step': 27964, 'epoch': 3}
{'type': 'loss', 'content': 0.004916223231703043, 'timestamp': '2025-10-02 01:01:15.540284', 'step': 27965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:15.603592', 'step': 27965, 'epoch': 3}
{'type': 'loss', 'content': 0.016344239935278893, 'timestamp': '2025-10-02 01:01:15.613139', 'step': 27966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:15.671729', 'step': 27966, 'epoch': 3}
{'type': 'loss', 'content': 0.06287131458520889, 'timestamp': '2025-10-02 01:01:15.679193', 'step': 27967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:15.733508', 'step': 27967, 'epoch': 3}
{'type': 'loss', 'content': 0.12694895267486572, 'timestamp': '2025-10-02 01:01:15.740032', 'step': 27968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:15.800712', 'step': 27968, 'epoch': 3}
{'type': 'loss', 'content': 0.022726964205503464, 'timestamp': '2025-10-02 01:01:15.812003', 'step': 27969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:01:15.866627', 'step': 27969, 'epoch': 3}
{'type': 'loss', 'content': 0.04298974573612213, 'timestamp': '2025-10-02 01:01:15.869007', 'step': 27970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:15.925369', 'step': 27970, 'epoch': 3}
{'type': 'loss', 'content': 0.04192543402314186, 'timestamp': '2025-10-02 01:01:15.928689', 'step': 27971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:15.983186', 'step': 27971, 'epoch': 3}
{'type': 'loss', 'content': 0.04564196243882179, 'timestamp': '2025-10-02 01:01:15.989590', 'step': 27972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:16.048310', 'step': 27972, 'epoch': 3}
{'type': 'loss', 'content': 0.035873979330062866, 'timestamp': '2025-10-02 01:01:16.059288', 'step': 27973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:16.115201', 'step': 27973, 'epoch': 3}
{'type': 'loss', 'content': 0.05861542373895645, 'timestamp': '2025-10-02 01:01:16.117899', 'step': 27974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:16.180302', 'step': 27974, 'epoch': 3}
{'type': 'loss', 'content': 0.010736611671745777, 'timestamp': '2025-10-02 01:01:16.190931', 'step': 27975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:16.245859', 'step': 27975, 'epoch': 3}
{'type': 'loss', 'content': 0.05850832164287567, 'timestamp': '2025-10-02 01:01:16.251705', 'step': 27976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:16.307523', 'step': 27976, 'epoch': 3}
{'type': 'loss', 'content': 0.05608194321393967, 'timestamp': '2025-10-02 01:01:16.310064', 'step': 27977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:16.364277', 'step': 27977, 'epoch': 3}
{'type': 'loss', 'content': 0.08621969819068909, 'timestamp': '2025-10-02 01:01:16.366739', 'step': 27978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:16.423611', 'step': 27978, 'epoch': 3}
{'type': 'loss', 'content': 0.06715348362922668, 'timestamp': '2025-10-02 01:01:16.426235', 'step': 27979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:16.480500', 'step': 27979, 'epoch': 3}
{'type': 'loss', 'content': 0.03959629312157631, 'timestamp': '2025-10-02 01:01:16.486471', 'step': 27980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:16.547886', 'step': 27980, 'epoch': 3}
{'type': 'loss', 'content': 0.015501347370445728, 'timestamp': '2025-10-02 01:01:16.559423', 'step': 27981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:16.622109', 'step': 27981, 'epoch': 3}
{'type': 'loss', 'content': 0.015776438638567924, 'timestamp': '2025-10-02 01:01:16.632536', 'step': 27982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:16.695510', 'step': 27982, 'epoch': 3}
{'type': 'loss', 'content': 0.06040794402360916, 'timestamp': '2025-10-02 01:01:16.706156', 'step': 27983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:16.761139', 'step': 27983, 'epoch': 3}
{'type': 'loss', 'content': 0.06657762825489044, 'timestamp': '2025-10-02 01:01:16.773747', 'step': 27984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:16.827917', 'step': 27984, 'epoch': 3}
{'type': 'loss', 'content': 0.1399398297071457, 'timestamp': '2025-10-02 01:01:16.830511', 'step': 27985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:16.884785', 'step': 27985, 'epoch': 3}
{'type': 'loss', 'content': 0.012307485565543175, 'timestamp': '2025-10-02 01:01:16.887258', 'step': 27986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:16.942703', 'step': 27986, 'epoch': 3}
{'type': 'loss', 'content': 0.057533636689186096, 'timestamp': '2025-10-02 01:01:16.945248', 'step': 27987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:16.999674', 'step': 27987, 'epoch': 3}
{'type': 'loss', 'content': 0.0655108317732811, 'timestamp': '2025-10-02 01:01:17.006205', 'step': 27988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:17.061706', 'step': 27988, 'epoch': 3}
{'type': 'loss', 'content': 0.0002554288075771183, 'timestamp': '2025-10-02 01:01:17.069307', 'step': 27989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:17.128023', 'step': 27989, 'epoch': 3}
{'type': 'loss', 'content': 0.01998787932097912, 'timestamp': '2025-10-02 01:01:17.135390', 'step': 27990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:17.191524', 'step': 27990, 'epoch': 3}
{'type': 'loss', 'content': 0.059459444135427475, 'timestamp': '2025-10-02 01:01:17.194045', 'step': 27991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:17.248602', 'step': 27991, 'epoch': 3}
{'type': 'loss', 'content': 0.02241184562444687, 'timestamp': '2025-10-02 01:01:17.254866', 'step': 27992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:17.308945', 'step': 27992, 'epoch': 3}
{'type': 'loss', 'content': 0.0034215219784528017, 'timestamp': '2025-10-02 01:01:17.316676', 'step': 27993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:17.372799', 'step': 27993, 'epoch': 3}
{'type': 'loss', 'content': 0.02596253529191017, 'timestamp': '2025-10-02 01:01:17.382303', 'step': 27994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:17.436842', 'step': 27994, 'epoch': 3}
{'type': 'loss', 'content': 0.04839600622653961, 'timestamp': '2025-10-02 01:01:17.439353', 'step': 27995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:17.495302', 'step': 27995, 'epoch': 3}
{'type': 'loss', 'content': 0.03034231998026371, 'timestamp': '2025-10-02 01:01:17.501344', 'step': 27996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:01:17.561844', 'step': 27996, 'epoch': 3}
{'type': 'loss', 'content': 0.03193000704050064, 'timestamp': '2025-10-02 01:01:17.573599', 'step': 27997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:17.629826', 'step': 27997, 'epoch': 3}
{'type': 'loss', 'content': 0.02263863943517208, 'timestamp': '2025-10-02 01:01:17.637674', 'step': 27998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:17.693795', 'step': 27998, 'epoch': 3}
{'type': 'loss', 'content': 0.0194085780531168, 'timestamp': '2025-10-02 01:01:17.699420', 'step': 27999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:17.754051', 'step': 27999, 'epoch': 3}
{'type': 'loss', 'content': 0.025722188875079155, 'timestamp': '2025-10-02 01:01:17.760070', 'step': 28000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 28000', 'timestamp': '2025-10-02 01:01:18.157423', 'step': 28000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:01:18.223428', 'step': 28000, 'epoch': 3}
{'type': 'loss', 'content': 0.009269447065889835, 'timestamp': '2025-10-02 01:01:18.236808', 'step': 28001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:18.300358', 'step': 28001, 'epoch': 3}
{'type': 'loss', 'content': 0.005078916437923908, 'timestamp': '2025-10-02 01:01:18.310873', 'step': 28002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:18.367081', 'step': 28002, 'epoch': 3}
{'type': 'loss', 'content': 0.08637583255767822, 'timestamp': '2025-10-02 01:01:18.369483', 'step': 28003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:18.424165', 'step': 28003, 'epoch': 3}
{'type': 'loss', 'content': 0.01889570616185665, 'timestamp': '2025-10-02 01:01:18.430420', 'step': 28004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:18.484907', 'step': 28004, 'epoch': 3}
{'type': 'loss', 'content': 0.03261943906545639, 'timestamp': '2025-10-02 01:01:18.490579', 'step': 28005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:18.546146', 'step': 28005, 'epoch': 3}
{'type': 'loss', 'content': 0.041782405227422714, 'timestamp': '2025-10-02 01:01:18.555558', 'step': 28006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:18.611006', 'step': 28006, 'epoch': 3}
{'type': 'loss', 'content': 0.09389130771160126, 'timestamp': '2025-10-02 01:01:18.613093', 'step': 28007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:18.668750', 'step': 28007, 'epoch': 3}
{'type': 'loss', 'content': 0.07727470248937607, 'timestamp': '2025-10-02 01:01:18.674911', 'step': 28008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:18.728295', 'step': 28008, 'epoch': 3}
{'type': 'loss', 'content': 0.02624306082725525, 'timestamp': '2025-10-02 01:01:18.735720', 'step': 28009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:18.790716', 'step': 28009, 'epoch': 3}
{'type': 'loss', 'content': 0.10316372662782669, 'timestamp': '2025-10-02 01:01:18.799746', 'step': 28010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:18.854446', 'step': 28010, 'epoch': 3}
{'type': 'loss', 'content': 0.0173073410987854, 'timestamp': '2025-10-02 01:01:18.863780', 'step': 28011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:18.919893', 'step': 28011, 'epoch': 3}
{'type': 'loss', 'content': 0.013417646288871765, 'timestamp': '2025-10-02 01:01:18.930305', 'step': 28012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:18.984040', 'step': 28012, 'epoch': 3}
{'type': 'loss', 'content': 0.08452367037534714, 'timestamp': '2025-10-02 01:01:18.990903', 'step': 28013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:19.049450', 'step': 28013, 'epoch': 3}
{'type': 'loss', 'content': 0.11654049158096313, 'timestamp': '2025-10-02 01:01:19.051820', 'step': 28014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:19.108158', 'step': 28014, 'epoch': 3}
{'type': 'loss', 'content': 0.060081176459789276, 'timestamp': '2025-10-02 01:01:19.110639', 'step': 28015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:19.167406', 'step': 28015, 'epoch': 3}
{'type': 'loss', 'content': 0.048028137534856796, 'timestamp': '2025-10-02 01:01:19.175836', 'step': 28016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:19.232297', 'step': 28016, 'epoch': 3}
{'type': 'loss', 'content': 0.0662081390619278, 'timestamp': '2025-10-02 01:01:19.235555', 'step': 28017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:19.292360', 'step': 28017, 'epoch': 3}
{'type': 'loss', 'content': 0.03158089891076088, 'timestamp': '2025-10-02 01:01:19.295479', 'step': 28018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:19.351618', 'step': 28018, 'epoch': 3}
{'type': 'loss', 'content': 0.0540652871131897, 'timestamp': '2025-10-02 01:01:19.354107', 'step': 28019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:19.409175', 'step': 28019, 'epoch': 3}
{'type': 'loss', 'content': 0.01359986886382103, 'timestamp': '2025-10-02 01:01:19.415539', 'step': 28020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:19.470033', 'step': 28020, 'epoch': 3}
{'type': 'loss', 'content': 0.10113946348428726, 'timestamp': '2025-10-02 01:01:19.472579', 'step': 28021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:01:19.543765', 'step': 28021, 'epoch': 3}
{'type': 'loss', 'content': 0.0219778660684824, 'timestamp': '2025-10-02 01:01:19.556162', 'step': 28022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:19.615242', 'step': 28022, 'epoch': 3}
{'type': 'loss', 'content': 0.050498105585575104, 'timestamp': '2025-10-02 01:01:19.617698', 'step': 28023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:01:19.682384', 'step': 28023, 'epoch': 3}
{'type': 'loss', 'content': 0.04257851094007492, 'timestamp': '2025-10-02 01:01:19.694084', 'step': 28024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:19.752298', 'step': 28024, 'epoch': 3}
{'type': 'loss', 'content': 0.018529027700424194, 'timestamp': '2025-10-02 01:01:19.758479', 'step': 28025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:19.814319', 'step': 28025, 'epoch': 3}
{'type': 'loss', 'content': 0.07797833532094955, 'timestamp': '2025-10-02 01:01:19.817218', 'step': 28026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:19.873428', 'step': 28026, 'epoch': 3}
{'type': 'loss', 'content': 0.010010524652898312, 'timestamp': '2025-10-02 01:01:19.876589', 'step': 28027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:19.931687', 'step': 28027, 'epoch': 3}
{'type': 'loss', 'content': 0.027123406529426575, 'timestamp': '2025-10-02 01:01:19.938079', 'step': 28028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:19.992959', 'step': 28028, 'epoch': 3}
{'type': 'loss', 'content': 0.1379055231809616, 'timestamp': '2025-10-02 01:01:19.995758', 'step': 28029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:20.055572', 'step': 28029, 'epoch': 3}
{'type': 'loss', 'content': 0.022425444796681404, 'timestamp': '2025-10-02 01:01:20.065080', 'step': 28030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:20.121977', 'step': 28030, 'epoch': 3}
{'type': 'loss', 'content': 0.06248103082180023, 'timestamp': '2025-10-02 01:01:20.125244', 'step': 28031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:20.181113', 'step': 28031, 'epoch': 3}
{'type': 'loss', 'content': 0.04366040974855423, 'timestamp': '2025-10-02 01:01:20.187253', 'step': 28032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:20.242416', 'step': 28032, 'epoch': 3}
{'type': 'loss', 'content': 0.0033790888264775276, 'timestamp': '2025-10-02 01:01:20.249950', 'step': 28033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:20.313355', 'step': 28033, 'epoch': 3}
{'type': 'loss', 'content': 0.013749866746366024, 'timestamp': '2025-10-02 01:01:20.323868', 'step': 28034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:20.383623', 'step': 28034, 'epoch': 3}
{'type': 'loss', 'content': 0.09444105625152588, 'timestamp': '2025-10-02 01:01:20.387572', 'step': 28035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:20.459775', 'step': 28035, 'epoch': 3}
{'type': 'loss', 'content': 0.034320153295993805, 'timestamp': '2025-10-02 01:01:20.467265', 'step': 28036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:20.536030', 'step': 28036, 'epoch': 3}
{'type': 'loss', 'content': 0.1299183964729309, 'timestamp': '2025-10-02 01:01:20.540004', 'step': 28037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:20.599702', 'step': 28037, 'epoch': 3}
{'type': 'loss', 'content': 0.025118257850408554, 'timestamp': '2025-10-02 01:01:20.604000', 'step': 28038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:20.669976', 'step': 28038, 'epoch': 3}
{'type': 'loss', 'content': 0.06856319308280945, 'timestamp': '2025-10-02 01:01:20.676022', 'step': 28039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:20.744125', 'step': 28039, 'epoch': 3}
{'type': 'loss', 'content': 0.02160990983247757, 'timestamp': '2025-10-02 01:01:20.755080', 'step': 28040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:20.811870', 'step': 28040, 'epoch': 3}
{'type': 'loss', 'content': 0.0394960418343544, 'timestamp': '2025-10-02 01:01:20.815859', 'step': 28041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:20.874667', 'step': 28041, 'epoch': 3}
{'type': 'loss', 'content': 0.05549196898937225, 'timestamp': '2025-10-02 01:01:20.878782', 'step': 28042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:20.947659', 'step': 28042, 'epoch': 3}
{'type': 'loss', 'content': 0.09443001449108124, 'timestamp': '2025-10-02 01:01:20.951451', 'step': 28043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:21.016063', 'step': 28043, 'epoch': 3}
{'type': 'loss', 'content': 0.07048823684453964, 'timestamp': '2025-10-02 01:01:21.024113', 'step': 28044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:21.082490', 'step': 28044, 'epoch': 3}
{'type': 'loss', 'content': 0.0519724115729332, 'timestamp': '2025-10-02 01:01:21.088258', 'step': 28045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:21.154029', 'step': 28045, 'epoch': 3}
{'type': 'loss', 'content': 0.02715509943664074, 'timestamp': '2025-10-02 01:01:21.156503', 'step': 28046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:21.227198', 'step': 28046, 'epoch': 3}
{'type': 'loss', 'content': 0.018203506246209145, 'timestamp': '2025-10-02 01:01:21.230164', 'step': 28047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:21.313379', 'step': 28047, 'epoch': 3}
{'type': 'loss', 'content': 0.10157248377799988, 'timestamp': '2025-10-02 01:01:21.328378', 'step': 28048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:21.385816', 'step': 28048, 'epoch': 3}
{'type': 'loss', 'content': 0.06781161576509476, 'timestamp': '2025-10-02 01:01:21.389685', 'step': 28049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:01:21.447086', 'step': 28049, 'epoch': 3}
{'type': 'loss', 'content': 0.08768163621425629, 'timestamp': '2025-10-02 01:01:21.450702', 'step': 28050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:21.510284', 'step': 28050, 'epoch': 3}
{'type': 'loss', 'content': 0.005079708993434906, 'timestamp': '2025-10-02 01:01:21.513234', 'step': 28051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:21.572454', 'step': 28051, 'epoch': 3}
{'type': 'loss', 'content': 0.034218400716781616, 'timestamp': '2025-10-02 01:01:21.578958', 'step': 28052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:21.654811', 'step': 28052, 'epoch': 3}
{'type': 'loss', 'content': 0.02986323833465576, 'timestamp': '2025-10-02 01:01:21.658874', 'step': 28053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:21.717287', 'step': 28053, 'epoch': 3}
{'type': 'loss', 'content': 0.0463530495762825, 'timestamp': '2025-10-02 01:01:21.719880', 'step': 28054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:21.786248', 'step': 28054, 'epoch': 3}
{'type': 'loss', 'content': 0.01653422601521015, 'timestamp': '2025-10-02 01:01:21.788922', 'step': 28055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:21.856491', 'step': 28055, 'epoch': 3}
{'type': 'loss', 'content': 0.043542005121707916, 'timestamp': '2025-10-02 01:01:21.862306', 'step': 28056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:21.918831', 'step': 28056, 'epoch': 3}
{'type': 'loss', 'content': 0.11619648337364197, 'timestamp': '2025-10-02 01:01:21.929119', 'step': 28057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:21.996509', 'step': 28057, 'epoch': 3}
{'type': 'loss', 'content': 0.03325827047228813, 'timestamp': '2025-10-02 01:01:22.006636', 'step': 28058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:22.070591', 'step': 28058, 'epoch': 3}
{'type': 'loss', 'content': 0.00011114242079202086, 'timestamp': '2025-10-02 01:01:22.078290', 'step': 28059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:01:22.148932', 'step': 28059, 'epoch': 3}
{'type': 'loss', 'content': 0.019210273399949074, 'timestamp': '2025-10-02 01:01:22.160584', 'step': 28060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:22.223646', 'step': 28060, 'epoch': 3}
{'type': 'loss', 'content': 0.045002568513154984, 'timestamp': '2025-10-02 01:01:22.233887', 'step': 28061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:22.295988', 'step': 28061, 'epoch': 3}
{'type': 'loss', 'content': 0.04761948809027672, 'timestamp': '2025-10-02 01:01:22.298877', 'step': 28062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:22.356472', 'step': 28062, 'epoch': 3}
{'type': 'loss', 'content': 0.028533753007650375, 'timestamp': '2025-10-02 01:01:22.363950', 'step': 28063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:22.422556', 'step': 28063, 'epoch': 3}
{'type': 'loss', 'content': 0.031106533482670784, 'timestamp': '2025-10-02 01:01:22.430123', 'step': 28064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:22.491447', 'step': 28064, 'epoch': 3}
{'type': 'loss', 'content': 0.04701792821288109, 'timestamp': '2025-10-02 01:01:22.502347', 'step': 28065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:01:22.567326', 'step': 28065, 'epoch': 3}
{'type': 'loss', 'content': 0.013747499324381351, 'timestamp': '2025-10-02 01:01:22.578198', 'step': 28066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:22.638562', 'step': 28066, 'epoch': 3}
{'type': 'loss', 'content': 0.02854941412806511, 'timestamp': '2025-10-02 01:01:22.642005', 'step': 28067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:22.705099', 'step': 28067, 'epoch': 3}
{'type': 'loss', 'content': 0.019648034125566483, 'timestamp': '2025-10-02 01:01:22.713013', 'step': 28068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:22.783572', 'step': 28068, 'epoch': 3}
{'type': 'loss', 'content': 0.15211229026317596, 'timestamp': '2025-10-02 01:01:22.788057', 'step': 28069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:22.853683', 'step': 28069, 'epoch': 3}
{'type': 'loss', 'content': 0.02613171376287937, 'timestamp': '2025-10-02 01:01:22.857382', 'step': 28070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:22.921022', 'step': 28070, 'epoch': 3}
{'type': 'loss', 'content': 0.04199546203017235, 'timestamp': '2025-10-02 01:01:22.923661', 'step': 28071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:22.980992', 'step': 28071, 'epoch': 3}
{'type': 'loss', 'content': 0.013991444371640682, 'timestamp': '2025-10-02 01:01:22.988457', 'step': 28072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:23.045604', 'step': 28072, 'epoch': 3}
{'type': 'loss', 'content': 0.04141707718372345, 'timestamp': '2025-10-02 01:01:23.051340', 'step': 28073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:23.113726', 'step': 28073, 'epoch': 3}
{'type': 'loss', 'content': 0.034462425857782364, 'timestamp': '2025-10-02 01:01:23.121062', 'step': 28074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:23.180460', 'step': 28074, 'epoch': 3}
{'type': 'loss', 'content': 0.05846592038869858, 'timestamp': '2025-10-02 01:01:23.186225', 'step': 28075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:23.243768', 'step': 28075, 'epoch': 3}
{'type': 'loss', 'content': 0.018323233351111412, 'timestamp': '2025-10-02 01:01:23.254975', 'step': 28076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:23.316185', 'step': 28076, 'epoch': 3}
{'type': 'loss', 'content': 0.04156568646430969, 'timestamp': '2025-10-02 01:01:23.318732', 'step': 28077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:23.380939', 'step': 28077, 'epoch': 3}
{'type': 'loss', 'content': 0.09598593413829803, 'timestamp': '2025-10-02 01:01:23.384169', 'step': 28078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:23.445265', 'step': 28078, 'epoch': 3}
{'type': 'loss', 'content': 0.03874204680323601, 'timestamp': '2025-10-02 01:01:23.448299', 'step': 28079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:23.519388', 'step': 28079, 'epoch': 3}
{'type': 'loss', 'content': 0.14130799472332, 'timestamp': '2025-10-02 01:01:23.527136', 'step': 28080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:23.589891', 'step': 28080, 'epoch': 3}
{'type': 'loss', 'content': 0.06015939638018608, 'timestamp': '2025-10-02 01:01:23.595720', 'step': 28081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:23.653855', 'step': 28081, 'epoch': 3}
{'type': 'loss', 'content': 0.06421852111816406, 'timestamp': '2025-10-02 01:01:23.656985', 'step': 28082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 01:01:23.735277', 'step': 28082, 'epoch': 3}
{'type': 'loss', 'content': 0.01942935213446617, 'timestamp': '2025-10-02 01:01:23.749088', 'step': 28083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:23.807686', 'step': 28083, 'epoch': 3}
{'type': 'loss', 'content': 0.022809620946645737, 'timestamp': '2025-10-02 01:01:23.814198', 'step': 28084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:23.870016', 'step': 28084, 'epoch': 3}
{'type': 'loss', 'content': 0.03650972619652748, 'timestamp': '2025-10-02 01:01:23.873229', 'step': 28085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:23.949928', 'step': 28085, 'epoch': 3}
{'type': 'loss', 'content': 0.03750276193022728, 'timestamp': '2025-10-02 01:01:23.952481', 'step': 28086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:24.019740', 'step': 28086, 'epoch': 3}
{'type': 'loss', 'content': 0.039191361516714096, 'timestamp': '2025-10-02 01:01:24.027285', 'step': 28087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:24.088413', 'step': 28087, 'epoch': 3}
{'type': 'loss', 'content': 0.028994152322411537, 'timestamp': '2025-10-02 01:01:24.095480', 'step': 28088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:24.157273', 'step': 28088, 'epoch': 3}
{'type': 'loss', 'content': 0.03463957458734512, 'timestamp': '2025-10-02 01:01:24.160627', 'step': 28089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:24.219053', 'step': 28089, 'epoch': 3}
{'type': 'loss', 'content': 0.07048167288303375, 'timestamp': '2025-10-02 01:01:24.222119', 'step': 28090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:24.284517', 'step': 28090, 'epoch': 3}
{'type': 'loss', 'content': 0.12813790142536163, 'timestamp': '2025-10-02 01:01:24.288542', 'step': 28091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:24.344964', 'step': 28091, 'epoch': 3}
{'type': 'loss', 'content': 0.023282241076231003, 'timestamp': '2025-10-02 01:01:24.354822', 'step': 28092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:24.416239', 'step': 28092, 'epoch': 3}
{'type': 'loss', 'content': 0.05691995099186897, 'timestamp': '2025-10-02 01:01:24.427189', 'step': 28093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:24.484308', 'step': 28093, 'epoch': 3}
{'type': 'loss', 'content': 0.09391072392463684, 'timestamp': '2025-10-02 01:01:24.487806', 'step': 28094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:01:24.563523', 'step': 28094, 'epoch': 3}
{'type': 'loss', 'content': 0.005746964830905199, 'timestamp': '2025-10-02 01:01:24.576694', 'step': 28095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:24.636214', 'step': 28095, 'epoch': 3}
{'type': 'loss', 'content': 0.029290955513715744, 'timestamp': '2025-10-02 01:01:24.646506', 'step': 28096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:24.706087', 'step': 28096, 'epoch': 3}
{'type': 'loss', 'content': 0.0925762951374054, 'timestamp': '2025-10-02 01:01:24.709938', 'step': 28097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:24.766387', 'step': 28097, 'epoch': 3}
{'type': 'loss', 'content': 0.030151143670082092, 'timestamp': '2025-10-02 01:01:24.771909', 'step': 28098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:24.833878', 'step': 28098, 'epoch': 3}
{'type': 'loss', 'content': 0.0410965196788311, 'timestamp': '2025-10-02 01:01:24.844118', 'step': 28099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:24.924419', 'step': 28099, 'epoch': 3}
{'type': 'loss', 'content': 0.03519736975431442, 'timestamp': '2025-10-02 01:01:24.935661', 'step': 28100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:24.992266', 'step': 28100, 'epoch': 3}
{'type': 'loss', 'content': 0.006447099149227142, 'timestamp': '2025-10-02 01:01:24.994786', 'step': 28101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:25.056729', 'step': 28101, 'epoch': 3}
{'type': 'loss', 'content': 0.02640421874821186, 'timestamp': '2025-10-02 01:01:25.066265', 'step': 28102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:25.123440', 'step': 28102, 'epoch': 3}
{'type': 'loss', 'content': 0.06514463573694229, 'timestamp': '2025-10-02 01:01:25.126240', 'step': 28103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:25.183125', 'step': 28103, 'epoch': 3}
{'type': 'loss', 'content': 0.008498935960233212, 'timestamp': '2025-10-02 01:01:25.193463', 'step': 28104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:25.250858', 'step': 28104, 'epoch': 3}
{'type': 'loss', 'content': 0.0208565816283226, 'timestamp': '2025-10-02 01:01:25.254436', 'step': 28105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:25.312124', 'step': 28105, 'epoch': 3}
{'type': 'loss', 'content': 0.02614734135568142, 'timestamp': '2025-10-02 01:01:25.319792', 'step': 28106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:25.380183', 'step': 28106, 'epoch': 3}
{'type': 'loss', 'content': 0.04511498659849167, 'timestamp': '2025-10-02 01:01:25.387571', 'step': 28107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:25.444426', 'step': 28107, 'epoch': 3}
{'type': 'loss', 'content': 0.10320046544075012, 'timestamp': '2025-10-02 01:01:25.452196', 'step': 28108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:25.510685', 'step': 28108, 'epoch': 3}
{'type': 'loss', 'content': 0.01904349774122238, 'timestamp': '2025-10-02 01:01:25.520943', 'step': 28109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:25.582343', 'step': 28109, 'epoch': 3}
{'type': 'loss', 'content': 0.06627404689788818, 'timestamp': '2025-10-02 01:01:25.591918', 'step': 28110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:25.653415', 'step': 28110, 'epoch': 3}
{'type': 'loss', 'content': 0.09824854135513306, 'timestamp': '2025-10-02 01:01:25.660805', 'step': 28111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:25.719806', 'step': 28111, 'epoch': 3}
{'type': 'loss', 'content': 0.03906934708356857, 'timestamp': '2025-10-02 01:01:25.725566', 'step': 28112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:25.784265', 'step': 28112, 'epoch': 3}
{'type': 'loss', 'content': 0.016419610008597374, 'timestamp': '2025-10-02 01:01:25.793801', 'step': 28113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:25.858990', 'step': 28113, 'epoch': 3}
{'type': 'loss', 'content': 0.05016801878809929, 'timestamp': '2025-10-02 01:01:25.869624', 'step': 28114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:25.934001', 'step': 28114, 'epoch': 3}
{'type': 'loss', 'content': 0.06920578330755234, 'timestamp': '2025-10-02 01:01:25.943531', 'step': 28115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:26.011846', 'step': 28115, 'epoch': 3}
{'type': 'loss', 'content': 0.12944357097148895, 'timestamp': '2025-10-02 01:01:26.020288', 'step': 28116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:26.079032', 'step': 28116, 'epoch': 3}
{'type': 'loss', 'content': 0.016048645600676537, 'timestamp': '2025-10-02 01:01:26.082610', 'step': 28117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:26.139421', 'step': 28117, 'epoch': 3}
{'type': 'loss', 'content': 0.020836105570197105, 'timestamp': '2025-10-02 01:01:26.148807', 'step': 28118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:26.209501', 'step': 28118, 'epoch': 3}
{'type': 'loss', 'content': 0.02032466046512127, 'timestamp': '2025-10-02 01:01:26.218764', 'step': 28119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:26.281300', 'step': 28119, 'epoch': 3}
{'type': 'loss', 'content': 0.009352060034871101, 'timestamp': '2025-10-02 01:01:26.288415', 'step': 28120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:26.358620', 'step': 28120, 'epoch': 3}
{'type': 'loss', 'content': 0.05048049986362457, 'timestamp': '2025-10-02 01:01:26.363452', 'step': 28121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:26.427962', 'step': 28121, 'epoch': 3}
{'type': 'loss', 'content': 0.0752837061882019, 'timestamp': '2025-10-02 01:01:26.433930', 'step': 28122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:26.513841', 'step': 28122, 'epoch': 3}
{'type': 'loss', 'content': 0.10975667834281921, 'timestamp': '2025-10-02 01:01:26.518706', 'step': 28123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:26.584267', 'step': 28123, 'epoch': 3}
{'type': 'loss', 'content': 0.04704461246728897, 'timestamp': '2025-10-02 01:01:26.591921', 'step': 28124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:26.654652', 'step': 28124, 'epoch': 3}
{'type': 'loss', 'content': 0.0461636483669281, 'timestamp': '2025-10-02 01:01:26.659516', 'step': 28125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:26.721660', 'step': 28125, 'epoch': 3}
{'type': 'loss', 'content': 0.05489840358495712, 'timestamp': '2025-10-02 01:01:26.729296', 'step': 28126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:26.791066', 'step': 28126, 'epoch': 3}
{'type': 'loss', 'content': 0.0020835718605667353, 'timestamp': '2025-10-02 01:01:26.794642', 'step': 28127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:26.860559', 'step': 28127, 'epoch': 3}
{'type': 'loss', 'content': 0.01263859961181879, 'timestamp': '2025-10-02 01:01:26.871970', 'step': 28128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:26.928251', 'step': 28128, 'epoch': 3}
{'type': 'loss', 'content': 0.04215230420231819, 'timestamp': '2025-10-02 01:01:26.934252', 'step': 28129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:27.005856', 'step': 28129, 'epoch': 3}
{'type': 'loss', 'content': 0.1462525874376297, 'timestamp': '2025-10-02 01:01:27.009807', 'step': 28130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:27.067616', 'step': 28130, 'epoch': 3}
{'type': 'loss', 'content': 0.046199340373277664, 'timestamp': '2025-10-02 01:01:27.077089', 'step': 28131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:27.131733', 'step': 28131, 'epoch': 3}
{'type': 'loss', 'content': 0.049435313791036606, 'timestamp': '2025-10-02 01:01:27.139159', 'step': 28132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:27.194268', 'step': 28132, 'epoch': 3}
{'type': 'loss', 'content': 0.03142844885587692, 'timestamp': '2025-10-02 01:01:27.196904', 'step': 28133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:27.252252', 'step': 28133, 'epoch': 3}
{'type': 'loss', 'content': 0.0628395527601242, 'timestamp': '2025-10-02 01:01:27.256143', 'step': 28134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:27.314455', 'step': 28134, 'epoch': 3}
{'type': 'loss', 'content': 0.022432679310441017, 'timestamp': '2025-10-02 01:01:27.321945', 'step': 28135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:27.383829', 'step': 28135, 'epoch': 3}
{'type': 'loss', 'content': 0.07744665443897247, 'timestamp': '2025-10-02 01:01:27.390929', 'step': 28136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:27.447449', 'step': 28136, 'epoch': 3}
{'type': 'loss', 'content': 0.07091677188873291, 'timestamp': '2025-10-02 01:01:27.450647', 'step': 28137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:27.508900', 'step': 28137, 'epoch': 3}
{'type': 'loss', 'content': 0.05313364788889885, 'timestamp': '2025-10-02 01:01:27.514913', 'step': 28138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:27.571964', 'step': 28138, 'epoch': 3}
{'type': 'loss', 'content': 0.06411062180995941, 'timestamp': '2025-10-02 01:01:27.574887', 'step': 28139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:27.661033', 'step': 28139, 'epoch': 3}
{'type': 'loss', 'content': 0.04699353501200676, 'timestamp': '2025-10-02 01:01:27.670777', 'step': 28140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:27.747491', 'step': 28140, 'epoch': 3}
{'type': 'loss', 'content': 0.1163702979683876, 'timestamp': '2025-10-02 01:01:27.751455', 'step': 28141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:27.811597', 'step': 28141, 'epoch': 3}
{'type': 'loss', 'content': 0.013222411274909973, 'timestamp': '2025-10-02 01:01:27.819175', 'step': 28142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:27.909885', 'step': 28142, 'epoch': 3}
{'type': 'loss', 'content': 0.015685291960835457, 'timestamp': '2025-10-02 01:01:27.913599', 'step': 28143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:27.982363', 'step': 28143, 'epoch': 3}
{'type': 'loss', 'content': 0.09874077886343002, 'timestamp': '2025-10-02 01:01:27.993808', 'step': 28144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:28.050518', 'step': 28144, 'epoch': 3}
{'type': 'loss', 'content': 0.0783434510231018, 'timestamp': '2025-10-02 01:01:28.055275', 'step': 28145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:28.123815', 'step': 28145, 'epoch': 3}
{'type': 'loss', 'content': 0.03654053434729576, 'timestamp': '2025-10-02 01:01:28.127738', 'step': 28146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:28.186348', 'step': 28146, 'epoch': 3}
{'type': 'loss', 'content': 0.015477750450372696, 'timestamp': '2025-10-02 01:01:28.195875', 'step': 28147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:28.270271', 'step': 28147, 'epoch': 3}
{'type': 'loss', 'content': 0.0070279366336762905, 'timestamp': '2025-10-02 01:01:28.278524', 'step': 28148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:28.335598', 'step': 28148, 'epoch': 3}
{'type': 'loss', 'content': 0.13373655080795288, 'timestamp': '2025-10-02 01:01:28.347116', 'step': 28149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:28.430782', 'step': 28149, 'epoch': 3}
{'type': 'loss', 'content': 0.10085436701774597, 'timestamp': '2025-10-02 01:01:28.442816', 'step': 28150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:01:28.534736', 'step': 28150, 'epoch': 3}
{'type': 'loss', 'content': 0.03679906949400902, 'timestamp': '2025-10-02 01:01:28.545942', 'step': 28151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:28.635456', 'step': 28151, 'epoch': 3}
{'type': 'loss', 'content': 0.024232102558016777, 'timestamp': '2025-10-02 01:01:28.650750', 'step': 28152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:28.729777', 'step': 28152, 'epoch': 3}
{'type': 'loss', 'content': 0.04709850251674652, 'timestamp': '2025-10-02 01:01:28.733886', 'step': 28153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:28.798634', 'step': 28153, 'epoch': 3}
{'type': 'loss', 'content': 0.028761830180883408, 'timestamp': '2025-10-02 01:01:28.813048', 'step': 28154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:28.892192', 'step': 28154, 'epoch': 3}
{'type': 'loss', 'content': 0.0032345440704375505, 'timestamp': '2025-10-02 01:01:28.903255', 'step': 28155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:28.982508', 'step': 28155, 'epoch': 3}
{'type': 'loss', 'content': 0.042449552565813065, 'timestamp': '2025-10-02 01:01:28.989323', 'step': 28156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:29.046934', 'step': 28156, 'epoch': 3}
{'type': 'loss', 'content': 0.06761214137077332, 'timestamp': '2025-10-02 01:01:29.052664', 'step': 28157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:29.127653', 'step': 28157, 'epoch': 3}
{'type': 'loss', 'content': 0.06532109528779984, 'timestamp': '2025-10-02 01:01:29.131200', 'step': 28158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:29.206846', 'step': 28158, 'epoch': 3}
{'type': 'loss', 'content': 0.04165346175432205, 'timestamp': '2025-10-02 01:01:29.219629', 'step': 28159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:01:29.294275', 'step': 28159, 'epoch': 3}
{'type': 'loss', 'content': 0.006320565473288298, 'timestamp': '2025-10-02 01:01:29.310292', 'step': 28160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:29.382978', 'step': 28160, 'epoch': 3}
{'type': 'loss', 'content': 0.060660261660814285, 'timestamp': '2025-10-02 01:01:29.386861', 'step': 28161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:29.451691', 'step': 28161, 'epoch': 3}
{'type': 'loss', 'content': 0.03785165399312973, 'timestamp': '2025-10-02 01:01:29.462476', 'step': 28162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:29.545840', 'step': 28162, 'epoch': 3}
{'type': 'loss', 'content': 0.06964778900146484, 'timestamp': '2025-10-02 01:01:29.558079', 'step': 28163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:29.647924', 'step': 28163, 'epoch': 3}
{'type': 'loss', 'content': 0.01815500482916832, 'timestamp': '2025-10-02 01:01:29.664251', 'step': 28164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:29.745940', 'step': 28164, 'epoch': 3}
{'type': 'loss', 'content': 0.02963334508240223, 'timestamp': '2025-10-02 01:01:29.755515', 'step': 28165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:29.819144', 'step': 28165, 'epoch': 3}
{'type': 'loss', 'content': 0.06983077526092529, 'timestamp': '2025-10-02 01:01:29.823628', 'step': 28166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:29.883519', 'step': 28166, 'epoch': 3}
{'type': 'loss', 'content': 0.0263208020478487, 'timestamp': '2025-10-02 01:01:29.895459', 'step': 28167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:29.961719', 'step': 28167, 'epoch': 3}
{'type': 'loss', 'content': 0.06970835477113724, 'timestamp': '2025-10-02 01:01:29.977418', 'step': 28168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:30.044486', 'step': 28168, 'epoch': 3}
{'type': 'loss', 'content': 0.03833373636007309, 'timestamp': '2025-10-02 01:01:30.047306', 'step': 28169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:30.135194', 'step': 28169, 'epoch': 3}
{'type': 'loss', 'content': 0.008593964390456676, 'timestamp': '2025-10-02 01:01:30.146988', 'step': 28170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:30.234440', 'step': 28170, 'epoch': 3}
{'type': 'loss', 'content': 0.048797670751810074, 'timestamp': '2025-10-02 01:01:30.246684', 'step': 28171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:30.342920', 'step': 28171, 'epoch': 3}
{'type': 'loss', 'content': 0.024562520906329155, 'timestamp': '2025-10-02 01:01:30.367194', 'step': 28172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:30.465085', 'step': 28172, 'epoch': 3}
{'type': 'loss', 'content': 0.028068868443369865, 'timestamp': '2025-10-02 01:01:30.475298', 'step': 28173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:30.548198', 'step': 28173, 'epoch': 3}
{'type': 'loss', 'content': 0.030472705140709877, 'timestamp': '2025-10-02 01:01:30.552719', 'step': 28174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:30.635355', 'step': 28174, 'epoch': 3}
{'type': 'loss', 'content': 0.05734097212553024, 'timestamp': '2025-10-02 01:01:30.640233', 'step': 28175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:30.699173', 'step': 28175, 'epoch': 3}
{'type': 'loss', 'content': 0.014276862144470215, 'timestamp': '2025-10-02 01:01:30.706185', 'step': 28176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:30.767848', 'step': 28176, 'epoch': 3}
{'type': 'loss', 'content': 0.011653964407742023, 'timestamp': '2025-10-02 01:01:30.775369', 'step': 28177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:30.847003', 'step': 28177, 'epoch': 3}
{'type': 'loss', 'content': 0.11875075846910477, 'timestamp': '2025-10-02 01:01:30.852306', 'step': 28178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:01:30.909436', 'step': 28178, 'epoch': 3}
{'type': 'loss', 'content': 0.1443248987197876, 'timestamp': '2025-10-02 01:01:30.912267', 'step': 28179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:30.968169', 'step': 28179, 'epoch': 3}
{'type': 'loss', 'content': 0.029508745297789574, 'timestamp': '2025-10-02 01:01:30.974613', 'step': 28180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:31.033334', 'step': 28180, 'epoch': 3}
{'type': 'loss', 'content': 0.014001026749610901, 'timestamp': '2025-10-02 01:01:31.044321', 'step': 28181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:31.099099', 'step': 28181, 'epoch': 3}
{'type': 'loss', 'content': 0.06451165676116943, 'timestamp': '2025-10-02 01:01:31.106101', 'step': 28182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:31.168581', 'step': 28182, 'epoch': 3}
{'type': 'loss', 'content': 0.0272877998650074, 'timestamp': '2025-10-02 01:01:31.171133', 'step': 28183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:31.232225', 'step': 28183, 'epoch': 3}
{'type': 'loss', 'content': 0.08333791047334671, 'timestamp': '2025-10-02 01:01:31.248383', 'step': 28184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:31.303415', 'step': 28184, 'epoch': 3}
{'type': 'loss', 'content': 0.01012851856648922, 'timestamp': '2025-10-02 01:01:31.310232', 'step': 28185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:31.375733', 'step': 28185, 'epoch': 3}
{'type': 'loss', 'content': 0.07740071415901184, 'timestamp': '2025-10-02 01:01:31.382909', 'step': 28186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:31.443037', 'step': 28186, 'epoch': 3}
{'type': 'loss', 'content': 0.04939611628651619, 'timestamp': '2025-10-02 01:01:31.450773', 'step': 28187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:31.524671', 'step': 28187, 'epoch': 3}
{'type': 'loss', 'content': 0.015687154605984688, 'timestamp': '2025-10-02 01:01:31.532234', 'step': 28188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:31.591862', 'step': 28188, 'epoch': 3}
{'type': 'loss', 'content': 0.03055422008037567, 'timestamp': '2025-10-02 01:01:31.602852', 'step': 28189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:31.669950', 'step': 28189, 'epoch': 3}
{'type': 'loss', 'content': 0.0011669127270579338, 'timestamp': '2025-10-02 01:01:31.675817', 'step': 28190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:31.739612', 'step': 28190, 'epoch': 3}
{'type': 'loss', 'content': 0.011092670261859894, 'timestamp': '2025-10-02 01:01:31.744287', 'step': 28191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:31.810426', 'step': 28191, 'epoch': 3}
{'type': 'loss', 'content': 0.023045500740408897, 'timestamp': '2025-10-02 01:01:31.820434', 'step': 28192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:31.880609', 'step': 28192, 'epoch': 3}
{'type': 'loss', 'content': 0.023619147017598152, 'timestamp': '2025-10-02 01:01:31.890322', 'step': 28193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:31.952988', 'step': 28193, 'epoch': 3}
{'type': 'loss', 'content': 0.039366550743579865, 'timestamp': '2025-10-02 01:01:31.960028', 'step': 28194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:32.032243', 'step': 28194, 'epoch': 3}
{'type': 'loss', 'content': 0.03396730124950409, 'timestamp': '2025-10-02 01:01:32.037706', 'step': 28195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:32.103914', 'step': 28195, 'epoch': 3}
{'type': 'loss', 'content': 0.02413257025182247, 'timestamp': '2025-10-02 01:01:32.116558', 'step': 28196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:32.185274', 'step': 28196, 'epoch': 3}
{'type': 'loss', 'content': 0.05951639637351036, 'timestamp': '2025-10-02 01:01:32.194795', 'step': 28197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:32.262607', 'step': 28197, 'epoch': 3}
{'type': 'loss', 'content': 0.06462398916482925, 'timestamp': '2025-10-02 01:01:32.266529', 'step': 28198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:32.334927', 'step': 28198, 'epoch': 3}
{'type': 'loss', 'content': 0.03427338972687721, 'timestamp': '2025-10-02 01:01:32.342596', 'step': 28199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:32.404363', 'step': 28199, 'epoch': 3}
{'type': 'loss', 'content': 0.06079309806227684, 'timestamp': '2025-10-02 01:01:32.412653', 'step': 28200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:32.469291', 'step': 28200, 'epoch': 3}
{'type': 'loss', 'content': 0.01785234361886978, 'timestamp': '2025-10-02 01:01:32.471934', 'step': 28201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:32.533783', 'step': 28201, 'epoch': 3}
{'type': 'loss', 'content': 0.003724842332303524, 'timestamp': '2025-10-02 01:01:32.536585', 'step': 28202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:32.595383', 'step': 28202, 'epoch': 3}
{'type': 'loss', 'content': 0.0011054144706577063, 'timestamp': '2025-10-02 01:01:32.602865', 'step': 28203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:32.669455', 'step': 28203, 'epoch': 3}
{'type': 'loss', 'content': 0.042375437915325165, 'timestamp': '2025-10-02 01:01:32.676507', 'step': 28204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:32.734649', 'step': 28204, 'epoch': 3}
{'type': 'loss', 'content': 0.05395857244729996, 'timestamp': '2025-10-02 01:01:32.744905', 'step': 28205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:32.817309', 'step': 28205, 'epoch': 3}
{'type': 'loss', 'content': 0.054976873099803925, 'timestamp': '2025-10-02 01:01:32.830519', 'step': 28206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:32.896237', 'step': 28206, 'epoch': 3}
{'type': 'loss', 'content': 0.011165381409227848, 'timestamp': '2025-10-02 01:01:32.903353', 'step': 28207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:32.962260', 'step': 28207, 'epoch': 3}
{'type': 'loss', 'content': 0.05310571566224098, 'timestamp': '2025-10-02 01:01:32.971003', 'step': 28208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:33.031093', 'step': 28208, 'epoch': 3}
{'type': 'loss', 'content': 0.11495865881443024, 'timestamp': '2025-10-02 01:01:33.037560', 'step': 28209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:33.099233', 'step': 28209, 'epoch': 3}
{'type': 'loss', 'content': 0.04000967741012573, 'timestamp': '2025-10-02 01:01:33.108610', 'step': 28210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:33.174045', 'step': 28210, 'epoch': 3}
{'type': 'loss', 'content': 0.030396418645977974, 'timestamp': '2025-10-02 01:01:33.179961', 'step': 28211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:33.246796', 'step': 28211, 'epoch': 3}
{'type': 'loss', 'content': 0.0693352073431015, 'timestamp': '2025-10-02 01:01:33.258089', 'step': 28212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:33.318042', 'step': 28212, 'epoch': 3}
{'type': 'loss', 'content': 0.028945112600922585, 'timestamp': '2025-10-02 01:01:33.320979', 'step': 28213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:33.382820', 'step': 28213, 'epoch': 3}
{'type': 'loss', 'content': 0.09770624339580536, 'timestamp': '2025-10-02 01:01:33.387956', 'step': 28214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:33.455837', 'step': 28214, 'epoch': 3}
{'type': 'loss', 'content': 0.0113676143810153, 'timestamp': '2025-10-02 01:01:33.468245', 'step': 28215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:33.551015', 'step': 28215, 'epoch': 3}
{'type': 'loss', 'content': 0.05112910270690918, 'timestamp': '2025-10-02 01:01:33.566989', 'step': 28216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:01:33.650290', 'step': 28216, 'epoch': 3}
{'type': 'loss', 'content': 0.02800176665186882, 'timestamp': '2025-10-02 01:01:33.674247', 'step': 28217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:33.757567', 'step': 28217, 'epoch': 3}
{'type': 'loss', 'content': 0.007235516794025898, 'timestamp': '2025-10-02 01:01:33.760846', 'step': 28218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:33.841184', 'step': 28218, 'epoch': 3}
{'type': 'loss', 'content': 0.1336212456226349, 'timestamp': '2025-10-02 01:01:33.845850', 'step': 28219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:33.902245', 'step': 28219, 'epoch': 3}
{'type': 'loss', 'content': 0.05448712408542633, 'timestamp': '2025-10-02 01:01:33.909286', 'step': 28220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:33.979127', 'step': 28220, 'epoch': 3}
{'type': 'loss', 'content': 0.06502286344766617, 'timestamp': '2025-10-02 01:01:33.985164', 'step': 28221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:34.047755', 'step': 28221, 'epoch': 3}
{'type': 'loss', 'content': 0.05138202756643295, 'timestamp': '2025-10-02 01:01:34.052529', 'step': 28222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:34.117989', 'step': 28222, 'epoch': 3}
{'type': 'loss', 'content': 0.01327027939260006, 'timestamp': '2025-10-02 01:01:34.120734', 'step': 28223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:34.188025', 'step': 28223, 'epoch': 3}
{'type': 'loss', 'content': 0.010645943693816662, 'timestamp': '2025-10-02 01:01:34.199300', 'step': 28224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:34.262749', 'step': 28224, 'epoch': 3}
{'type': 'loss', 'content': 0.02446635253727436, 'timestamp': '2025-10-02 01:01:34.272607', 'step': 28225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:34.328746', 'step': 28225, 'epoch': 3}
{'type': 'loss', 'content': 0.11987795680761337, 'timestamp': '2025-10-02 01:01:34.331430', 'step': 28226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:34.391980', 'step': 28226, 'epoch': 3}
{'type': 'loss', 'content': 0.07347135990858078, 'timestamp': '2025-10-02 01:01:34.396190', 'step': 28227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:34.456658', 'step': 28227, 'epoch': 3}
{'type': 'loss', 'content': 0.04391256347298622, 'timestamp': '2025-10-02 01:01:34.464459', 'step': 28228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:34.528586', 'step': 28228, 'epoch': 3}
{'type': 'loss', 'content': 0.02817627042531967, 'timestamp': '2025-10-02 01:01:34.538062', 'step': 28229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:34.607410', 'step': 28229, 'epoch': 3}
{'type': 'loss', 'content': 0.1367788165807724, 'timestamp': '2025-10-02 01:01:34.612288', 'step': 28230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:34.679239', 'step': 28230, 'epoch': 3}
{'type': 'loss', 'content': 0.012595958076417446, 'timestamp': '2025-10-02 01:01:34.689398', 'step': 28231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:34.752759', 'step': 28231, 'epoch': 3}
{'type': 'loss', 'content': 0.07693330198526382, 'timestamp': '2025-10-02 01:01:34.760648', 'step': 28232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:34.821248', 'step': 28232, 'epoch': 3}
{'type': 'loss', 'content': 0.04098083823919296, 'timestamp': '2025-10-02 01:01:34.825610', 'step': 28233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:34.885980', 'step': 28233, 'epoch': 3}
{'type': 'loss', 'content': 0.02653341367840767, 'timestamp': '2025-10-02 01:01:34.888600', 'step': 28234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:34.948424', 'step': 28234, 'epoch': 3}
{'type': 'loss', 'content': 0.058562345802783966, 'timestamp': '2025-10-02 01:01:34.960621', 'step': 28235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:35.036349', 'step': 28235, 'epoch': 3}
{'type': 'loss', 'content': 0.061099614948034286, 'timestamp': '2025-10-02 01:01:35.042936', 'step': 28236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:35.140360', 'step': 28236, 'epoch': 3}
{'type': 'loss', 'content': 0.03155384585261345, 'timestamp': '2025-10-02 01:01:35.146994', 'step': 28237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:35.239324', 'step': 28237, 'epoch': 3}
{'type': 'loss', 'content': 0.047181785106658936, 'timestamp': '2025-10-02 01:01:35.253750', 'step': 28238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:35.344182', 'step': 28238, 'epoch': 3}
{'type': 'loss', 'content': 0.06371872127056122, 'timestamp': '2025-10-02 01:01:35.353718', 'step': 28239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:35.421173', 'step': 28239, 'epoch': 3}
{'type': 'loss', 'content': 0.035283658653497696, 'timestamp': '2025-10-02 01:01:35.432457', 'step': 28240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:35.517489', 'step': 28240, 'epoch': 3}
{'type': 'loss', 'content': 0.0938376784324646, 'timestamp': '2025-10-02 01:01:35.526912', 'step': 28241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:35.597991', 'step': 28241, 'epoch': 3}
{'type': 'loss', 'content': 0.015342683531343937, 'timestamp': '2025-10-02 01:01:35.603846', 'step': 28242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:35.678242', 'step': 28242, 'epoch': 3}
{'type': 'loss', 'content': 0.03901241719722748, 'timestamp': '2025-10-02 01:01:35.690383', 'step': 28243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:35.779896', 'step': 28243, 'epoch': 3}
{'type': 'loss', 'content': 0.053603168576955795, 'timestamp': '2025-10-02 01:01:35.801071', 'step': 28244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:35.890820', 'step': 28244, 'epoch': 3}
{'type': 'loss', 'content': 0.025301793590188026, 'timestamp': '2025-10-02 01:01:35.900848', 'step': 28245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:35.968156', 'step': 28245, 'epoch': 3}
{'type': 'loss', 'content': 0.00794853176921606, 'timestamp': '2025-10-02 01:01:35.985743', 'step': 28246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:36.066764', 'step': 28246, 'epoch': 3}
{'type': 'loss', 'content': 0.05334854871034622, 'timestamp': '2025-10-02 01:01:36.080568', 'step': 28247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:36.164672', 'step': 28247, 'epoch': 3}
{'type': 'loss', 'content': 0.021928610280156136, 'timestamp': '2025-10-02 01:01:36.178990', 'step': 28248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:36.257665', 'step': 28248, 'epoch': 3}
{'type': 'loss', 'content': 0.06291481852531433, 'timestamp': '2025-10-02 01:01:36.266574', 'step': 28249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:01:36.345165', 'step': 28249, 'epoch': 3}
{'type': 'loss', 'content': 0.12033635377883911, 'timestamp': '2025-10-02 01:01:36.352448', 'step': 28250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:36.410502', 'step': 28250, 'epoch': 3}
{'type': 'loss', 'content': 0.016412856057286263, 'timestamp': '2025-10-02 01:01:36.417654', 'step': 28251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:36.478015', 'step': 28251, 'epoch': 3}
{'type': 'loss', 'content': 0.038812048733234406, 'timestamp': '2025-10-02 01:01:36.485003', 'step': 28252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:36.543454', 'step': 28252, 'epoch': 3}
{'type': 'loss', 'content': 0.05463366582989693, 'timestamp': '2025-10-02 01:01:36.554483', 'step': 28253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:36.625178', 'step': 28253, 'epoch': 3}
{'type': 'loss', 'content': 0.07341841608285904, 'timestamp': '2025-10-02 01:01:36.632471', 'step': 28254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:36.689888', 'step': 28254, 'epoch': 3}
{'type': 'loss', 'content': 0.08341314643621445, 'timestamp': '2025-10-02 01:01:36.693055', 'step': 28255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:36.758745', 'step': 28255, 'epoch': 3}
{'type': 'loss', 'content': 0.03160689398646355, 'timestamp': '2025-10-02 01:01:36.768825', 'step': 28256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:36.840294', 'step': 28256, 'epoch': 3}
{'type': 'loss', 'content': 0.12056098133325577, 'timestamp': '2025-10-02 01:01:36.843534', 'step': 28257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:36.923120', 'step': 28257, 'epoch': 3}
{'type': 'loss', 'content': 0.03440827876329422, 'timestamp': '2025-10-02 01:01:36.930389', 'step': 28258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:37.003831', 'step': 28258, 'epoch': 3}
{'type': 'loss', 'content': 0.059919171035289764, 'timestamp': '2025-10-02 01:01:37.009476', 'step': 28259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:37.069990', 'step': 28259, 'epoch': 3}
{'type': 'loss', 'content': 0.030988644808530807, 'timestamp': '2025-10-02 01:01:37.078005', 'step': 28260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:37.132997', 'step': 28260, 'epoch': 3}
{'type': 'loss', 'content': 0.07319451868534088, 'timestamp': '2025-10-02 01:01:37.135455', 'step': 28261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:37.196749', 'step': 28261, 'epoch': 3}
{'type': 'loss', 'content': 0.04061857983469963, 'timestamp': '2025-10-02 01:01:37.201817', 'step': 28262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:37.262517', 'step': 28262, 'epoch': 3}
{'type': 'loss', 'content': 0.05094737187027931, 'timestamp': '2025-10-02 01:01:37.265607', 'step': 28263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:37.325854', 'step': 28263, 'epoch': 3}
{'type': 'loss', 'content': 0.021089518442749977, 'timestamp': '2025-10-02 01:01:37.334409', 'step': 28264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:37.400874', 'step': 28264, 'epoch': 3}
{'type': 'loss', 'content': 0.014706834219396114, 'timestamp': '2025-10-02 01:01:37.412223', 'step': 28265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:37.474498', 'step': 28265, 'epoch': 3}
{'type': 'loss', 'content': 0.032092660665512085, 'timestamp': '2025-10-02 01:01:37.482027', 'step': 28266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:37.546784', 'step': 28266, 'epoch': 3}
{'type': 'loss', 'content': 0.037113022059202194, 'timestamp': '2025-10-02 01:01:37.556343', 'step': 28267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:37.622413', 'step': 28267, 'epoch': 3}
{'type': 'loss', 'content': 0.04006432369351387, 'timestamp': '2025-10-02 01:01:37.632578', 'step': 28268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:37.690247', 'step': 28268, 'epoch': 3}
{'type': 'loss', 'content': 0.09123121947050095, 'timestamp': '2025-10-02 01:01:37.702909', 'step': 28269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:37.759840', 'step': 28269, 'epoch': 3}
{'type': 'loss', 'content': 0.022862281650304794, 'timestamp': '2025-10-02 01:01:37.762694', 'step': 28270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:37.821464', 'step': 28270, 'epoch': 3}
{'type': 'loss', 'content': 0.029502753168344498, 'timestamp': '2025-10-02 01:01:37.828864', 'step': 28271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:37.887089', 'step': 28271, 'epoch': 3}
{'type': 'loss', 'content': 0.018730659037828445, 'timestamp': '2025-10-02 01:01:37.894043', 'step': 28272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:37.951110', 'step': 28272, 'epoch': 3}
{'type': 'loss', 'content': 0.022370442748069763, 'timestamp': '2025-10-02 01:01:37.958775', 'step': 28273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:38.018291', 'step': 28273, 'epoch': 3}
{'type': 'loss', 'content': 0.03816154971718788, 'timestamp': '2025-10-02 01:01:38.021356', 'step': 28274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:38.084696', 'step': 28274, 'epoch': 3}
{'type': 'loss', 'content': 0.029603315517306328, 'timestamp': '2025-10-02 01:01:38.095120', 'step': 28275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:38.167245', 'step': 28275, 'epoch': 3}
{'type': 'loss', 'content': 0.04242073744535446, 'timestamp': '2025-10-02 01:01:38.178220', 'step': 28276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:38.235557', 'step': 28276, 'epoch': 3}
{'type': 'loss', 'content': 0.014966791495680809, 'timestamp': '2025-10-02 01:01:38.241587', 'step': 28277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:38.297222', 'step': 28277, 'epoch': 3}
{'type': 'loss', 'content': 0.005005904473364353, 'timestamp': '2025-10-02 01:01:38.301818', 'step': 28278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:38.360098', 'step': 28278, 'epoch': 3}
{'type': 'loss', 'content': 0.06584851443767548, 'timestamp': '2025-10-02 01:01:38.362708', 'step': 28279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:38.423432', 'step': 28279, 'epoch': 3}
{'type': 'loss', 'content': 0.02539021335542202, 'timestamp': '2025-10-02 01:01:38.429962', 'step': 28280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:38.493881', 'step': 28280, 'epoch': 3}
{'type': 'loss', 'content': 0.07848890125751495, 'timestamp': '2025-10-02 01:01:38.496707', 'step': 28281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:38.552501', 'step': 28281, 'epoch': 3}
{'type': 'loss', 'content': 0.0190863236784935, 'timestamp': '2025-10-02 01:01:38.558511', 'step': 28282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:38.618319', 'step': 28282, 'epoch': 3}
{'type': 'loss', 'content': 0.0714479312300682, 'timestamp': '2025-10-02 01:01:38.621015', 'step': 28283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:38.694330', 'step': 28283, 'epoch': 3}
{'type': 'loss', 'content': 0.005261372774839401, 'timestamp': '2025-10-02 01:01:38.702760', 'step': 28284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:38.769815', 'step': 28284, 'epoch': 3}
{'type': 'loss', 'content': 0.045268964022397995, 'timestamp': '2025-10-02 01:01:38.779474', 'step': 28285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:38.845262', 'step': 28285, 'epoch': 3}
{'type': 'loss', 'content': 0.00089037767611444, 'timestamp': '2025-10-02 01:01:38.863805', 'step': 28286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:01:38.965421', 'step': 28286, 'epoch': 3}
{'type': 'loss', 'content': 0.0035743401385843754, 'timestamp': '2025-10-02 01:01:38.978655', 'step': 28287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:39.049244', 'step': 28287, 'epoch': 3}
{'type': 'loss', 'content': 0.03353691101074219, 'timestamp': '2025-10-02 01:01:39.056118', 'step': 28288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:39.137803', 'step': 28288, 'epoch': 3}
{'type': 'loss', 'content': 0.04793567210435867, 'timestamp': '2025-10-02 01:01:39.144527', 'step': 28289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:39.209602', 'step': 28289, 'epoch': 3}
{'type': 'loss', 'content': 0.09916912019252777, 'timestamp': '2025-10-02 01:01:39.215629', 'step': 28290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:39.275747', 'step': 28290, 'epoch': 3}
{'type': 'loss', 'content': 0.06565482914447784, 'timestamp': '2025-10-02 01:01:39.280049', 'step': 28291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:39.334083', 'step': 28291, 'epoch': 3}
{'type': 'loss', 'content': 0.045010630041360855, 'timestamp': '2025-10-02 01:01:39.340275', 'step': 28292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:39.397172', 'step': 28292, 'epoch': 3}
{'type': 'loss', 'content': 0.035139624029397964, 'timestamp': '2025-10-02 01:01:39.399802', 'step': 28293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:39.455729', 'step': 28293, 'epoch': 3}
{'type': 'loss', 'content': 0.011440573260188103, 'timestamp': '2025-10-02 01:01:39.460060', 'step': 28294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:39.520159', 'step': 28294, 'epoch': 3}
{'type': 'loss', 'content': 0.005942576099187136, 'timestamp': '2025-10-02 01:01:39.522963', 'step': 28295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:39.583218', 'step': 28295, 'epoch': 3}
{'type': 'loss', 'content': 0.02670789323747158, 'timestamp': '2025-10-02 01:01:39.589785', 'step': 28296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:39.644707', 'step': 28296, 'epoch': 3}
{'type': 'loss', 'content': 0.05121907219290733, 'timestamp': '2025-10-02 01:01:39.647485', 'step': 28297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:39.714104', 'step': 28297, 'epoch': 3}
{'type': 'loss', 'content': 5.976906686555594e-05, 'timestamp': '2025-10-02 01:01:39.724506', 'step': 28298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:39.787190', 'step': 28298, 'epoch': 3}
{'type': 'loss', 'content': 0.00801599957048893, 'timestamp': '2025-10-02 01:01:39.797832', 'step': 28299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:39.855675', 'step': 28299, 'epoch': 3}
{'type': 'loss', 'content': 0.036774758249521255, 'timestamp': '2025-10-02 01:01:39.863333', 'step': 28300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:39.919278', 'step': 28300, 'epoch': 3}
{'type': 'loss', 'content': 0.02624260075390339, 'timestamp': '2025-10-02 01:01:39.921709', 'step': 28301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:39.980048', 'step': 28301, 'epoch': 3}
{'type': 'loss', 'content': 0.007443245034664869, 'timestamp': '2025-10-02 01:01:39.983069', 'step': 28302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:40.043629', 'step': 28302, 'epoch': 3}
{'type': 'loss', 'content': 0.00909158680588007, 'timestamp': '2025-10-02 01:01:40.051101', 'step': 28303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:40.114861', 'step': 28303, 'epoch': 3}
{'type': 'loss', 'content': 0.051115404814481735, 'timestamp': '2025-10-02 01:01:40.126979', 'step': 28304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:40.189824', 'step': 28304, 'epoch': 3}
{'type': 'loss', 'content': 0.03363978490233421, 'timestamp': '2025-10-02 01:01:40.199120', 'step': 28305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:40.270685', 'step': 28305, 'epoch': 3}
{'type': 'loss', 'content': 0.024684490635991096, 'timestamp': '2025-10-02 01:01:40.281344', 'step': 28306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:40.348949', 'step': 28306, 'epoch': 3}
{'type': 'loss', 'content': 0.05782824009656906, 'timestamp': '2025-10-02 01:01:40.358549', 'step': 28307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:40.414750', 'step': 28307, 'epoch': 3}
{'type': 'loss', 'content': 0.05759664997458458, 'timestamp': '2025-10-02 01:01:40.421055', 'step': 28308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:40.483662', 'step': 28308, 'epoch': 3}
{'type': 'loss', 'content': 0.019775686785578728, 'timestamp': '2025-10-02 01:01:40.493915', 'step': 28309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:40.553622', 'step': 28309, 'epoch': 3}
{'type': 'loss', 'content': 0.0470757782459259, 'timestamp': '2025-10-02 01:01:40.556291', 'step': 28310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:40.620030', 'step': 28310, 'epoch': 3}
{'type': 'loss', 'content': 0.17584149539470673, 'timestamp': '2025-10-02 01:01:40.622548', 'step': 28311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:40.678430', 'step': 28311, 'epoch': 3}
{'type': 'loss', 'content': 0.034906212240457535, 'timestamp': '2025-10-02 01:01:40.685136', 'step': 28312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:40.743482', 'step': 28312, 'epoch': 3}
{'type': 'loss', 'content': 0.07165215909481049, 'timestamp': '2025-10-02 01:01:40.749492', 'step': 28313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:40.817326', 'step': 28313, 'epoch': 3}
{'type': 'loss', 'content': 0.01663518324494362, 'timestamp': '2025-10-02 01:01:40.826990', 'step': 28314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:40.886774', 'step': 28314, 'epoch': 3}
{'type': 'loss', 'content': 0.00326976808719337, 'timestamp': '2025-10-02 01:01:40.892858', 'step': 28315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:40.955979', 'step': 28315, 'epoch': 3}
{'type': 'loss', 'content': 0.05794098973274231, 'timestamp': '2025-10-02 01:01:40.968672', 'step': 28316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:41.036596', 'step': 28316, 'epoch': 3}
{'type': 'loss', 'content': 0.029030941426753998, 'timestamp': '2025-10-02 01:01:41.041647', 'step': 28317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:41.153841', 'step': 28317, 'epoch': 3}
{'type': 'loss', 'content': 0.042011942714452744, 'timestamp': '2025-10-02 01:01:41.164282', 'step': 28318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:41.234136', 'step': 28318, 'epoch': 3}
{'type': 'loss', 'content': 0.08173269033432007, 'timestamp': '2025-10-02 01:01:41.249446', 'step': 28319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:41.325941', 'step': 28319, 'epoch': 3}
{'type': 'loss', 'content': 0.039938777685165405, 'timestamp': '2025-10-02 01:01:41.344217', 'step': 28320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:41.414720', 'step': 28320, 'epoch': 3}
{'type': 'loss', 'content': 0.0953589528799057, 'timestamp': '2025-10-02 01:01:41.418770', 'step': 28321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:41.491673', 'step': 28321, 'epoch': 3}
{'type': 'loss', 'content': 0.051354825496673584, 'timestamp': '2025-10-02 01:01:41.496534', 'step': 28322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:41.555018', 'step': 28322, 'epoch': 3}
{'type': 'loss', 'content': 0.028113508597016335, 'timestamp': '2025-10-02 01:01:41.564561', 'step': 28323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:41.656197', 'step': 28323, 'epoch': 3}
{'type': 'loss', 'content': 0.026730388402938843, 'timestamp': '2025-10-02 01:01:41.666297', 'step': 28324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:41.767424', 'step': 28324, 'epoch': 3}
{'type': 'loss', 'content': 0.05008083954453468, 'timestamp': '2025-10-02 01:01:41.770965', 'step': 28325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:41.839856', 'step': 28325, 'epoch': 3}
{'type': 'loss', 'content': 0.016004841774702072, 'timestamp': '2025-10-02 01:01:41.850145', 'step': 28326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:41.931226', 'step': 28326, 'epoch': 3}
{'type': 'loss', 'content': 0.06025288254022598, 'timestamp': '2025-10-02 01:01:41.933639', 'step': 28327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:41.996744', 'step': 28327, 'epoch': 3}
{'type': 'loss', 'content': 0.022116247564554214, 'timestamp': '2025-10-02 01:01:42.008102', 'step': 28328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:42.072882', 'step': 28328, 'epoch': 3}
{'type': 'loss', 'content': 0.01274513453245163, 'timestamp': '2025-10-02 01:01:42.075754', 'step': 28329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:42.142984', 'step': 28329, 'epoch': 3}
{'type': 'loss', 'content': 0.03902175650000572, 'timestamp': '2025-10-02 01:01:42.153648', 'step': 28330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:42.218844', 'step': 28330, 'epoch': 3}
{'type': 'loss', 'content': 0.09631689637899399, 'timestamp': '2025-10-02 01:01:42.224421', 'step': 28331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:42.302370', 'step': 28331, 'epoch': 3}
{'type': 'loss', 'content': 0.12015848606824875, 'timestamp': '2025-10-02 01:01:42.309375', 'step': 28332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:42.365498', 'step': 28332, 'epoch': 3}
{'type': 'loss', 'content': 0.027210470288991928, 'timestamp': '2025-10-02 01:01:42.368652', 'step': 28333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:42.431437', 'step': 28333, 'epoch': 3}
{'type': 'loss', 'content': 0.09983769804239273, 'timestamp': '2025-10-02 01:01:42.440460', 'step': 28334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:42.516121', 'step': 28334, 'epoch': 3}
{'type': 'loss', 'content': 0.020715145394206047, 'timestamp': '2025-10-02 01:01:42.526633', 'step': 28335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:42.585955', 'step': 28335, 'epoch': 3}
{'type': 'loss', 'content': 0.09116405248641968, 'timestamp': '2025-10-02 01:01:42.593014', 'step': 28336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:42.664491', 'step': 28336, 'epoch': 3}
{'type': 'loss', 'content': 0.08385736495256424, 'timestamp': '2025-10-02 01:01:42.673140', 'step': 28337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:42.730174', 'step': 28337, 'epoch': 3}
{'type': 'loss', 'content': 0.06267642229795456, 'timestamp': '2025-10-02 01:01:42.733555', 'step': 28338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:42.789324', 'step': 28338, 'epoch': 3}
{'type': 'loss', 'content': 0.042790547013282776, 'timestamp': '2025-10-02 01:01:42.791943', 'step': 28339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:42.864286', 'step': 28339, 'epoch': 3}
{'type': 'loss', 'content': 0.012902859598398209, 'timestamp': '2025-10-02 01:01:42.875234', 'step': 28340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:42.955952', 'step': 28340, 'epoch': 3}
{'type': 'loss', 'content': 0.026786239817738533, 'timestamp': '2025-10-02 01:01:42.963716', 'step': 28341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:01:43.040018', 'step': 28341, 'epoch': 3}
{'type': 'loss', 'content': 0.10141374170780182, 'timestamp': '2025-10-02 01:01:43.043800', 'step': 28342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:43.109943', 'step': 28342, 'epoch': 3}
{'type': 'loss', 'content': 0.15256422758102417, 'timestamp': '2025-10-02 01:01:43.114766', 'step': 28343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:43.182710', 'step': 28343, 'epoch': 3}
{'type': 'loss', 'content': 0.04196813702583313, 'timestamp': '2025-10-02 01:01:43.201152', 'step': 28344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:43.260794', 'step': 28344, 'epoch': 3}
{'type': 'loss', 'content': 0.016855565831065178, 'timestamp': '2025-10-02 01:01:43.271126', 'step': 28345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:43.347241', 'step': 28345, 'epoch': 3}
{'type': 'loss', 'content': 0.0022036260925233364, 'timestamp': '2025-10-02 01:01:43.353595', 'step': 28346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:43.450713', 'step': 28346, 'epoch': 3}
{'type': 'loss', 'content': 0.019572582095861435, 'timestamp': '2025-10-02 01:01:43.453468', 'step': 28347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:43.508735', 'step': 28347, 'epoch': 3}
{'type': 'loss', 'content': 0.020754745230078697, 'timestamp': '2025-10-02 01:01:43.515604', 'step': 28348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:01:43.579269', 'step': 28348, 'epoch': 3}
{'type': 'loss', 'content': 0.000494357431307435, 'timestamp': '2025-10-02 01:01:43.591087', 'step': 28349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:43.651998', 'step': 28349, 'epoch': 3}
{'type': 'loss', 'content': 0.044550493359565735, 'timestamp': '2025-10-02 01:01:43.657392', 'step': 28350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:43.716160', 'step': 28350, 'epoch': 3}
{'type': 'loss', 'content': 0.030308976769447327, 'timestamp': '2025-10-02 01:01:43.718681', 'step': 28351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:43.773748', 'step': 28351, 'epoch': 3}
{'type': 'loss', 'content': 0.044945210218429565, 'timestamp': '2025-10-02 01:01:43.781425', 'step': 28352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:43.844793', 'step': 28352, 'epoch': 3}
{'type': 'loss', 'content': 0.022811241447925568, 'timestamp': '2025-10-02 01:01:43.850741', 'step': 28353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:43.908085', 'step': 28353, 'epoch': 3}
{'type': 'loss', 'content': 0.06381979584693909, 'timestamp': '2025-10-02 01:01:43.912741', 'step': 28354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:43.967732', 'step': 28354, 'epoch': 3}
{'type': 'loss', 'content': 0.045378535985946655, 'timestamp': '2025-10-02 01:01:43.970427', 'step': 28355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:44.026680', 'step': 28355, 'epoch': 3}
{'type': 'loss', 'content': 0.045617055147886276, 'timestamp': '2025-10-02 01:01:44.038074', 'step': 28356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:01:44.101602', 'step': 28356, 'epoch': 3}
{'type': 'loss', 'content': 0.042984336614608765, 'timestamp': '2025-10-02 01:01:44.113321', 'step': 28357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:44.171979', 'step': 28357, 'epoch': 3}
{'type': 'loss', 'content': 0.03888745233416557, 'timestamp': '2025-10-02 01:01:44.181547', 'step': 28358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:44.256336', 'step': 28358, 'epoch': 3}
{'type': 'loss', 'content': 0.14423172175884247, 'timestamp': '2025-10-02 01:01:44.258656', 'step': 28359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:44.317889', 'step': 28359, 'epoch': 3}
{'type': 'loss', 'content': 0.002102880971506238, 'timestamp': '2025-10-02 01:01:44.328120', 'step': 28360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:44.384289', 'step': 28360, 'epoch': 3}
{'type': 'loss', 'content': 0.0888356864452362, 'timestamp': '2025-10-02 01:01:44.387209', 'step': 28361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:44.444481', 'step': 28361, 'epoch': 3}
{'type': 'loss', 'content': 0.08435236662626266, 'timestamp': '2025-10-02 01:01:44.446889', 'step': 28362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:44.510658', 'step': 28362, 'epoch': 3}
{'type': 'loss', 'content': 0.04857770726084709, 'timestamp': '2025-10-02 01:01:44.520171', 'step': 28363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:44.578348', 'step': 28363, 'epoch': 3}
{'type': 'loss', 'content': 0.035923369228839874, 'timestamp': '2025-10-02 01:01:44.584509', 'step': 28364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:44.638885', 'step': 28364, 'epoch': 3}
{'type': 'loss', 'content': 0.08306913077831268, 'timestamp': '2025-10-02 01:01:44.641306', 'step': 28365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:44.708670', 'step': 28365, 'epoch': 3}
{'type': 'loss', 'content': 0.03472549468278885, 'timestamp': '2025-10-02 01:01:44.719021', 'step': 28366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:44.774430', 'step': 28366, 'epoch': 3}
{'type': 'loss', 'content': 0.038287386298179626, 'timestamp': '2025-10-02 01:01:44.781803', 'step': 28367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:44.845277', 'step': 28367, 'epoch': 3}
{'type': 'loss', 'content': 0.08862397819757462, 'timestamp': '2025-10-02 01:01:44.851141', 'step': 28368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:44.925141', 'step': 28368, 'epoch': 3}
{'type': 'loss', 'content': 0.052766699343919754, 'timestamp': '2025-10-02 01:01:44.936653', 'step': 28369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:44.993942', 'step': 28369, 'epoch': 3}
{'type': 'loss', 'content': 0.058999765664339066, 'timestamp': '2025-10-02 01:01:44.996397', 'step': 28370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:01:45.074119', 'step': 28370, 'epoch': 3}
{'type': 'loss', 'content': 0.0001240434794453904, 'timestamp': '2025-10-02 01:01:45.087387', 'step': 28371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:45.150743', 'step': 28371, 'epoch': 3}
{'type': 'loss', 'content': 0.005959193222224712, 'timestamp': '2025-10-02 01:01:45.161020', 'step': 28372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:45.217249', 'step': 28372, 'epoch': 3}
{'type': 'loss', 'content': 0.10116394609212875, 'timestamp': '2025-10-02 01:01:45.221934', 'step': 28373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:45.292447', 'step': 28373, 'epoch': 3}
{'type': 'loss', 'content': 0.007367095444351435, 'timestamp': '2025-10-02 01:01:45.303122', 'step': 28374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:45.361544', 'step': 28374, 'epoch': 3}
{'type': 'loss', 'content': 0.05277818441390991, 'timestamp': '2025-10-02 01:01:45.367467', 'step': 28375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:45.426004', 'step': 28375, 'epoch': 3}
{'type': 'loss', 'content': 0.036124564707279205, 'timestamp': '2025-10-02 01:01:45.433145', 'step': 28376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:45.489981', 'step': 28376, 'epoch': 3}
{'type': 'loss', 'content': 0.04036843404173851, 'timestamp': '2025-10-02 01:01:45.495074', 'step': 28377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:45.553816', 'step': 28377, 'epoch': 3}
{'type': 'loss', 'content': 0.002359786070883274, 'timestamp': '2025-10-02 01:01:45.563313', 'step': 28378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:45.620144', 'step': 28378, 'epoch': 3}
{'type': 'loss', 'content': 0.07072755694389343, 'timestamp': '2025-10-02 01:01:45.625862', 'step': 28379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:45.682261', 'step': 28379, 'epoch': 3}
{'type': 'loss', 'content': 0.028069881722331047, 'timestamp': '2025-10-02 01:01:45.688324', 'step': 28380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:45.748861', 'step': 28380, 'epoch': 3}
{'type': 'loss', 'content': 0.014239807613193989, 'timestamp': '2025-10-02 01:01:45.760186', 'step': 28381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:45.822784', 'step': 28381, 'epoch': 3}
{'type': 'loss', 'content': 0.02730315364897251, 'timestamp': '2025-10-02 01:01:45.833221', 'step': 28382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:45.890296', 'step': 28382, 'epoch': 3}
{'type': 'loss', 'content': 0.07622799277305603, 'timestamp': '2025-10-02 01:01:45.893415', 'step': 28383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:45.948255', 'step': 28383, 'epoch': 3}
{'type': 'loss', 'content': 0.0332287922501564, 'timestamp': '2025-10-02 01:01:45.954910', 'step': 28384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:01:46.009305', 'step': 28384, 'epoch': 3}
{'type': 'loss', 'content': 0.12954705953598022, 'timestamp': '2025-10-02 01:01:46.011657', 'step': 28385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:46.068808', 'step': 28385, 'epoch': 3}
{'type': 'loss', 'content': 0.023759156465530396, 'timestamp': '2025-10-02 01:01:46.072049', 'step': 28386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:01:46.141877', 'step': 28386, 'epoch': 3}
{'type': 'loss', 'content': 0.02405421808362007, 'timestamp': '2025-10-02 01:01:46.154179', 'step': 28387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:46.213189', 'step': 28387, 'epoch': 3}
{'type': 'loss', 'content': 0.055355146527290344, 'timestamp': '2025-10-02 01:01:46.219365', 'step': 28388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:46.273818', 'step': 28388, 'epoch': 3}
{'type': 'loss', 'content': 0.05611244589090347, 'timestamp': '2025-10-02 01:01:46.277090', 'step': 28389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:46.332852', 'step': 28389, 'epoch': 3}
{'type': 'loss', 'content': 0.03891238942742348, 'timestamp': '2025-10-02 01:01:46.335319', 'step': 28390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:46.396820', 'step': 28390, 'epoch': 3}
{'type': 'loss', 'content': 0.08956928551197052, 'timestamp': '2025-10-02 01:01:46.407237', 'step': 28391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:46.462269', 'step': 28391, 'epoch': 3}
{'type': 'loss', 'content': 0.027755025774240494, 'timestamp': '2025-10-02 01:01:46.468063', 'step': 28392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:46.522132', 'step': 28392, 'epoch': 3}
{'type': 'loss', 'content': 0.06337901949882507, 'timestamp': '2025-10-02 01:01:46.525397', 'step': 28393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:46.579752', 'step': 28393, 'epoch': 3}
{'type': 'loss', 'content': 0.026873283088207245, 'timestamp': '2025-10-02 01:01:46.582596', 'step': 28394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:46.638159', 'step': 28394, 'epoch': 3}
{'type': 'loss', 'content': 0.04612806811928749, 'timestamp': '2025-10-02 01:01:46.640788', 'step': 28395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:46.698258', 'step': 28395, 'epoch': 3}
{'type': 'loss', 'content': 0.007107362616807222, 'timestamp': '2025-10-02 01:01:46.712642', 'step': 28396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:46.784149', 'step': 28396, 'epoch': 3}
{'type': 'loss', 'content': 0.025053098797798157, 'timestamp': '2025-10-02 01:01:46.790748', 'step': 28397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:46.864431', 'step': 28397, 'epoch': 3}
{'type': 'loss', 'content': 0.039771631360054016, 'timestamp': '2025-10-02 01:01:46.867291', 'step': 28398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:46.921747', 'step': 28398, 'epoch': 3}
{'type': 'loss', 'content': 0.0573902502655983, 'timestamp': '2025-10-02 01:01:46.924263', 'step': 28399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:46.978778', 'step': 28399, 'epoch': 3}
{'type': 'loss', 'content': 0.06969738751649857, 'timestamp': '2025-10-02 01:01:46.985778', 'step': 28400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:47.039608', 'step': 28400, 'epoch': 3}
{'type': 'loss', 'content': 0.11285441368818283, 'timestamp': '2025-10-02 01:01:47.041758', 'step': 28401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:47.097335', 'step': 28401, 'epoch': 3}
{'type': 'loss', 'content': 0.014987677335739136, 'timestamp': '2025-10-02 01:01:47.106833', 'step': 28402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:01:47.161188', 'step': 28402, 'epoch': 3}
{'type': 'loss', 'content': 0.0628945380449295, 'timestamp': '2025-10-02 01:01:47.163775', 'step': 28403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:47.218405', 'step': 28403, 'epoch': 3}
{'type': 'loss', 'content': 0.04883236810564995, 'timestamp': '2025-10-02 01:01:47.224923', 'step': 28404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:47.282337', 'step': 28404, 'epoch': 3}
{'type': 'loss', 'content': 0.005246600601822138, 'timestamp': '2025-10-02 01:01:47.293305', 'step': 28405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:47.348736', 'step': 28405, 'epoch': 3}
{'type': 'loss', 'content': 0.009709924459457397, 'timestamp': '2025-10-02 01:01:47.354802', 'step': 28406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:47.414517', 'step': 28406, 'epoch': 3}
{'type': 'loss', 'content': 0.01809309609234333, 'timestamp': '2025-10-02 01:01:47.424708', 'step': 28407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:47.479500', 'step': 28407, 'epoch': 3}
{'type': 'loss', 'content': 0.07139427214860916, 'timestamp': '2025-10-02 01:01:47.485563', 'step': 28408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:47.539558', 'step': 28408, 'epoch': 3}
{'type': 'loss', 'content': 0.03446004539728165, 'timestamp': '2025-10-02 01:01:47.545614', 'step': 28409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:47.605505', 'step': 28409, 'epoch': 3}
{'type': 'loss', 'content': 0.0032948872540146112, 'timestamp': '2025-10-02 01:01:47.615670', 'step': 28410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:47.678341', 'step': 28410, 'epoch': 3}
{'type': 'loss', 'content': 0.05459881201386452, 'timestamp': '2025-10-02 01:01:47.681287', 'step': 28411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:47.736076', 'step': 28411, 'epoch': 3}
{'type': 'loss', 'content': 0.03548651933670044, 'timestamp': '2025-10-02 01:01:47.744388', 'step': 28412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:47.798689', 'step': 28412, 'epoch': 3}
{'type': 'loss', 'content': 0.0323416031897068, 'timestamp': '2025-10-02 01:01:47.801291', 'step': 28413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:47.860871', 'step': 28413, 'epoch': 3}
{'type': 'loss', 'content': 0.018742822110652924, 'timestamp': '2025-10-02 01:01:47.871067', 'step': 28414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:47.929344', 'step': 28414, 'epoch': 3}
{'type': 'loss', 'content': 0.011726729571819305, 'timestamp': '2025-10-02 01:01:47.935510', 'step': 28415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:47.998714', 'step': 28415, 'epoch': 3}
{'type': 'loss', 'content': 0.08859539031982422, 'timestamp': '2025-10-02 01:01:48.014033', 'step': 28416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:48.073150', 'step': 28416, 'epoch': 3}
{'type': 'loss', 'content': 0.0633724257349968, 'timestamp': '2025-10-02 01:01:48.076051', 'step': 28417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:48.130590', 'step': 28417, 'epoch': 3}
{'type': 'loss', 'content': 0.09960876405239105, 'timestamp': '2025-10-02 01:01:48.133475', 'step': 28418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:48.194305', 'step': 28418, 'epoch': 3}
{'type': 'loss', 'content': 0.021318169310688972, 'timestamp': '2025-10-02 01:01:48.204494', 'step': 28419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:48.259311', 'step': 28419, 'epoch': 3}
{'type': 'loss', 'content': 0.0359337255358696, 'timestamp': '2025-10-02 01:01:48.265063', 'step': 28420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:48.319592', 'step': 28420, 'epoch': 3}
{'type': 'loss', 'content': 0.03860178589820862, 'timestamp': '2025-10-02 01:01:48.322346', 'step': 28421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:48.378941', 'step': 28421, 'epoch': 3}
{'type': 'loss', 'content': 0.0692271962761879, 'timestamp': '2025-10-02 01:01:48.381745', 'step': 28422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:48.437376', 'step': 28422, 'epoch': 3}
{'type': 'loss', 'content': 0.03801785781979561, 'timestamp': '2025-10-02 01:01:48.444929', 'step': 28423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:48.500409', 'step': 28423, 'epoch': 3}
{'type': 'loss', 'content': 0.006630308926105499, 'timestamp': '2025-10-02 01:01:48.506876', 'step': 28424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:48.560978', 'step': 28424, 'epoch': 3}
{'type': 'loss', 'content': 0.08128375560045242, 'timestamp': '2025-10-02 01:01:48.563554', 'step': 28425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:48.619165', 'step': 28425, 'epoch': 3}
{'type': 'loss', 'content': 0.002904328750446439, 'timestamp': '2025-10-02 01:01:48.628490', 'step': 28426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:48.683195', 'step': 28426, 'epoch': 3}
{'type': 'loss', 'content': 0.08442989736795425, 'timestamp': '2025-10-02 01:01:48.685775', 'step': 28427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:48.740692', 'step': 28427, 'epoch': 3}
{'type': 'loss', 'content': 0.05321661755442619, 'timestamp': '2025-10-02 01:01:48.746839', 'step': 28428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:48.801336', 'step': 28428, 'epoch': 3}
{'type': 'loss', 'content': 0.03681487590074539, 'timestamp': '2025-10-02 01:01:48.805545', 'step': 28429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:48.860216', 'step': 28429, 'epoch': 3}
{'type': 'loss', 'content': 0.1000358983874321, 'timestamp': '2025-10-02 01:01:48.862686', 'step': 28430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:48.917506', 'step': 28430, 'epoch': 3}
{'type': 'loss', 'content': 0.04317181929945946, 'timestamp': '2025-10-02 01:01:48.920365', 'step': 28431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:48.975602', 'step': 28431, 'epoch': 3}
{'type': 'loss', 'content': 0.060446400195360184, 'timestamp': '2025-10-02 01:01:48.985698', 'step': 28432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:49.039408', 'step': 28432, 'epoch': 3}
{'type': 'loss', 'content': 0.06837315112352371, 'timestamp': '2025-10-02 01:01:49.042060', 'step': 28433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:49.100163', 'step': 28433, 'epoch': 3}
{'type': 'loss', 'content': 0.028700806200504303, 'timestamp': '2025-10-02 01:01:49.103638', 'step': 28434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:49.176436', 'step': 28434, 'epoch': 3}
{'type': 'loss', 'content': 0.1638297438621521, 'timestamp': '2025-10-02 01:01:49.182036', 'step': 28435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:49.246155', 'step': 28435, 'epoch': 3}
{'type': 'loss', 'content': 0.044416576623916626, 'timestamp': '2025-10-02 01:01:49.252842', 'step': 28436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:49.312715', 'step': 28436, 'epoch': 3}
{'type': 'loss', 'content': 0.04838218539953232, 'timestamp': '2025-10-02 01:01:49.324034', 'step': 28437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:49.387394', 'step': 28437, 'epoch': 3}
{'type': 'loss', 'content': 0.013801205903291702, 'timestamp': '2025-10-02 01:01:49.398028', 'step': 28438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:49.461505', 'step': 28438, 'epoch': 3}
{'type': 'loss', 'content': 0.011652130633592606, 'timestamp': '2025-10-02 01:01:49.472180', 'step': 28439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:49.527043', 'step': 28439, 'epoch': 3}
{'type': 'loss', 'content': 0.02488788589835167, 'timestamp': '2025-10-02 01:01:49.532911', 'step': 28440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:49.587292', 'step': 28440, 'epoch': 3}
{'type': 'loss', 'content': 0.01921175792813301, 'timestamp': '2025-10-02 01:01:49.589964', 'step': 28441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:49.644331', 'step': 28441, 'epoch': 3}
{'type': 'loss', 'content': 0.04240445792675018, 'timestamp': '2025-10-02 01:01:49.650440', 'step': 28442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:49.705503', 'step': 28442, 'epoch': 3}
{'type': 'loss', 'content': 0.034170959144830704, 'timestamp': '2025-10-02 01:01:49.707610', 'step': 28443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:49.768391', 'step': 28443, 'epoch': 3}
{'type': 'loss', 'content': 0.06881219148635864, 'timestamp': '2025-10-02 01:01:49.779641', 'step': 28444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:49.835525', 'step': 28444, 'epoch': 3}
{'type': 'loss', 'content': 0.06626074016094208, 'timestamp': '2025-10-02 01:01:49.838110', 'step': 28445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:49.892866', 'step': 28445, 'epoch': 3}
{'type': 'loss', 'content': 0.017727185040712357, 'timestamp': '2025-10-02 01:01:49.895509', 'step': 28446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:49.949033', 'step': 28446, 'epoch': 3}
{'type': 'loss', 'content': 0.021733487024903297, 'timestamp': '2025-10-02 01:01:49.951430', 'step': 28447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:50.006132', 'step': 28447, 'epoch': 3}
{'type': 'loss', 'content': 0.16547535359859467, 'timestamp': '2025-10-02 01:01:50.011988', 'step': 28448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:50.066808', 'step': 28448, 'epoch': 3}
{'type': 'loss', 'content': 0.018670743331313133, 'timestamp': '2025-10-02 01:01:50.077062', 'step': 28449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:50.131745', 'step': 28449, 'epoch': 3}
{'type': 'loss', 'content': 0.015733949840068817, 'timestamp': '2025-10-02 01:01:50.134496', 'step': 28450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:50.191252', 'step': 28450, 'epoch': 3}
{'type': 'loss', 'content': 0.017263827845454216, 'timestamp': '2025-10-02 01:01:50.193504', 'step': 28451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:50.248901', 'step': 28451, 'epoch': 3}
{'type': 'loss', 'content': 0.1176510602235794, 'timestamp': '2025-10-02 01:01:50.260528', 'step': 28452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:50.350346', 'step': 28452, 'epoch': 3}
{'type': 'loss', 'content': 0.035741060972213745, 'timestamp': '2025-10-02 01:01:50.360206', 'step': 28453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:50.435159', 'step': 28453, 'epoch': 3}
{'type': 'loss', 'content': 0.020340483635663986, 'timestamp': '2025-10-02 01:01:50.439558', 'step': 28454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:50.501667', 'step': 28454, 'epoch': 3}
{'type': 'loss', 'content': 0.06609789282083511, 'timestamp': '2025-10-02 01:01:50.507979', 'step': 28455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:50.582501', 'step': 28455, 'epoch': 3}
{'type': 'loss', 'content': 0.03659537807106972, 'timestamp': '2025-10-02 01:01:50.603382', 'step': 28456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:50.661699', 'step': 28456, 'epoch': 3}
{'type': 'loss', 'content': 0.04208322986960411, 'timestamp': '2025-10-02 01:01:50.667776', 'step': 28457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:50.731381', 'step': 28457, 'epoch': 3}
{'type': 'loss', 'content': 0.02225121669471264, 'timestamp': '2025-10-02 01:01:50.734944', 'step': 28458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:01:50.814123', 'step': 28458, 'epoch': 3}
{'type': 'loss', 'content': 0.008517326787114143, 'timestamp': '2025-10-02 01:01:50.824343', 'step': 28459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:50.908818', 'step': 28459, 'epoch': 3}
{'type': 'loss', 'content': 0.0591549277305603, 'timestamp': '2025-10-02 01:01:50.920070', 'step': 28460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:50.991390', 'step': 28460, 'epoch': 3}
{'type': 'loss', 'content': 0.131272092461586, 'timestamp': '2025-10-02 01:01:50.994570', 'step': 28461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:51.055916', 'step': 28461, 'epoch': 3}
{'type': 'loss', 'content': 0.03983078896999359, 'timestamp': '2025-10-02 01:01:51.062701', 'step': 28462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:51.135256', 'step': 28462, 'epoch': 3}
{'type': 'loss', 'content': 0.03534725680947304, 'timestamp': '2025-10-02 01:01:51.140608', 'step': 28463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:51.210677', 'step': 28463, 'epoch': 3}
{'type': 'loss', 'content': 0.06834030151367188, 'timestamp': '2025-10-02 01:01:51.221289', 'step': 28464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:51.279649', 'step': 28464, 'epoch': 3}
{'type': 'loss', 'content': 0.0028510664124041796, 'timestamp': '2025-10-02 01:01:51.286439', 'step': 28465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:51.366020', 'step': 28465, 'epoch': 3}
{'type': 'loss', 'content': 0.024665115401148796, 'timestamp': '2025-10-02 01:01:51.376472', 'step': 28466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:51.456215', 'step': 28466, 'epoch': 3}
{'type': 'loss', 'content': 0.02171502821147442, 'timestamp': '2025-10-02 01:01:51.466856', 'step': 28467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:51.527977', 'step': 28467, 'epoch': 3}
{'type': 'loss', 'content': 0.032650090754032135, 'timestamp': '2025-10-02 01:01:51.536199', 'step': 28468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:51.591949', 'step': 28468, 'epoch': 3}
{'type': 'loss', 'content': 0.0072751217521727085, 'timestamp': '2025-10-02 01:01:51.598095', 'step': 28469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:51.662005', 'step': 28469, 'epoch': 3}
{'type': 'loss', 'content': 0.043745916336774826, 'timestamp': '2025-10-02 01:01:51.670344', 'step': 28470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:51.729228', 'step': 28470, 'epoch': 3}
{'type': 'loss', 'content': 0.01646825484931469, 'timestamp': '2025-10-02 01:01:51.740026', 'step': 28471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:51.817297', 'step': 28471, 'epoch': 3}
{'type': 'loss', 'content': 0.03187951818108559, 'timestamp': '2025-10-02 01:01:51.828749', 'step': 28472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:51.890418', 'step': 28472, 'epoch': 3}
{'type': 'loss', 'content': 0.015724752098321915, 'timestamp': '2025-10-02 01:01:51.893656', 'step': 28473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:01:51.962429', 'step': 28473, 'epoch': 3}
{'type': 'loss', 'content': 0.021630212664604187, 'timestamp': '2025-10-02 01:01:51.967422', 'step': 28474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:52.025627', 'step': 28474, 'epoch': 3}
{'type': 'loss', 'content': 0.08347335457801819, 'timestamp': '2025-10-02 01:01:52.029646', 'step': 28475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:52.119397', 'step': 28475, 'epoch': 3}
{'type': 'loss', 'content': 0.02648642472922802, 'timestamp': '2025-10-02 01:01:52.132213', 'step': 28476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:52.192265', 'step': 28476, 'epoch': 3}
{'type': 'loss', 'content': 0.08634842187166214, 'timestamp': '2025-10-02 01:01:52.205536', 'step': 28477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:52.266733', 'step': 28477, 'epoch': 3}
{'type': 'loss', 'content': 0.04592978581786156, 'timestamp': '2025-10-02 01:01:52.277025', 'step': 28478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:52.336866', 'step': 28478, 'epoch': 3}
{'type': 'loss', 'content': 0.08607258647680283, 'timestamp': '2025-10-02 01:01:52.346722', 'step': 28479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:52.422719', 'step': 28479, 'epoch': 3}
{'type': 'loss', 'content': 0.01847539097070694, 'timestamp': '2025-10-02 01:01:52.432923', 'step': 28480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:52.502534', 'step': 28480, 'epoch': 3}
{'type': 'loss', 'content': 0.09705780446529388, 'timestamp': '2025-10-02 01:01:52.505821', 'step': 28481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:52.569986', 'step': 28481, 'epoch': 3}
{'type': 'loss', 'content': 0.08358470350503922, 'timestamp': '2025-10-02 01:01:52.578412', 'step': 28482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:01:52.638868', 'step': 28482, 'epoch': 3}
{'type': 'loss', 'content': 0.03320471569895744, 'timestamp': '2025-10-02 01:01:52.648280', 'step': 28483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:52.712110', 'step': 28483, 'epoch': 3}
{'type': 'loss', 'content': 0.04408983513712883, 'timestamp': '2025-10-02 01:01:52.721468', 'step': 28484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:52.794235', 'step': 28484, 'epoch': 3}
{'type': 'loss', 'content': 0.09112931042909622, 'timestamp': '2025-10-02 01:01:52.802232', 'step': 28485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:52.868829', 'step': 28485, 'epoch': 3}
{'type': 'loss', 'content': 0.030609482899308205, 'timestamp': '2025-10-02 01:01:52.878307', 'step': 28486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:52.949734', 'step': 28486, 'epoch': 3}
{'type': 'loss', 'content': 0.09256692975759506, 'timestamp': '2025-10-02 01:01:52.957020', 'step': 28487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:53.027670', 'step': 28487, 'epoch': 3}
{'type': 'loss', 'content': 0.03202484920620918, 'timestamp': '2025-10-02 01:01:53.041334', 'step': 28488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:01:53.109267', 'step': 28488, 'epoch': 3}
{'type': 'loss', 'content': 0.07681964337825775, 'timestamp': '2025-10-02 01:01:53.116375', 'step': 28489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:01:53.191412', 'step': 28489, 'epoch': 3}
{'type': 'loss', 'content': 0.025152873247861862, 'timestamp': '2025-10-02 01:01:53.202007', 'step': 28490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:53.267766', 'step': 28490, 'epoch': 3}
{'type': 'loss', 'content': 0.0327119454741478, 'timestamp': '2025-10-02 01:01:53.278242', 'step': 28491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:53.335380', 'step': 28491, 'epoch': 3}
{'type': 'loss', 'content': 0.03611728176474571, 'timestamp': '2025-10-02 01:01:53.343011', 'step': 28492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:53.409692', 'step': 28492, 'epoch': 3}
{'type': 'loss', 'content': 0.12592002749443054, 'timestamp': '2025-10-02 01:01:53.413179', 'step': 28493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:53.487459', 'step': 28493, 'epoch': 3}
{'type': 'loss', 'content': 0.0619378499686718, 'timestamp': '2025-10-02 01:01:53.491045', 'step': 28494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:53.556015', 'step': 28494, 'epoch': 3}
{'type': 'loss', 'content': 0.032259196043014526, 'timestamp': '2025-10-02 01:01:53.566487', 'step': 28495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:53.626612', 'step': 28495, 'epoch': 3}
{'type': 'loss', 'content': 0.09268583357334137, 'timestamp': '2025-10-02 01:01:53.634121', 'step': 28496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:53.691807', 'step': 28496, 'epoch': 3}
{'type': 'loss', 'content': 0.033983100205659866, 'timestamp': '2025-10-02 01:01:53.706533', 'step': 28497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:53.767245', 'step': 28497, 'epoch': 3}
{'type': 'loss', 'content': 0.05979372188448906, 'timestamp': '2025-10-02 01:01:53.782888', 'step': 28498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:53.859005', 'step': 28498, 'epoch': 3}
{'type': 'loss', 'content': 0.06481268256902695, 'timestamp': '2025-10-02 01:01:53.866273', 'step': 28499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:53.929676', 'step': 28499, 'epoch': 3}
{'type': 'loss', 'content': 0.07476040720939636, 'timestamp': '2025-10-02 01:01:53.936989', 'step': 28500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 28500', 'timestamp': '2025-10-02 01:01:54.344915', 'step': 28500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:54.415318', 'step': 28500, 'epoch': 3}
{'type': 'loss', 'content': 0.037981584668159485, 'timestamp': '2025-10-02 01:01:54.420732', 'step': 28501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:54.491142', 'step': 28501, 'epoch': 3}
{'type': 'loss', 'content': 0.05791759490966797, 'timestamp': '2025-10-02 01:01:54.494517', 'step': 28502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:54.586628', 'step': 28502, 'epoch': 3}
{'type': 'loss', 'content': 0.03770607337355614, 'timestamp': '2025-10-02 01:01:54.591497', 'step': 28503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:01:54.649937', 'step': 28503, 'epoch': 3}
{'type': 'loss', 'content': 0.0126741211861372, 'timestamp': '2025-10-02 01:01:54.657203', 'step': 28504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:01:54.713284', 'step': 28504, 'epoch': 3}
{'type': 'loss', 'content': 0.09827569127082825, 'timestamp': '2025-10-02 01:01:54.716489', 'step': 28505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:54.773515', 'step': 28505, 'epoch': 3}
{'type': 'loss', 'content': 0.060420554131269455, 'timestamp': '2025-10-02 01:01:54.779128', 'step': 28506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:54.842641', 'step': 28506, 'epoch': 3}
{'type': 'loss', 'content': 0.10662945359945297, 'timestamp': '2025-10-02 01:01:54.845687', 'step': 28507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:54.904037', 'step': 28507, 'epoch': 3}
{'type': 'loss', 'content': 0.055688757449388504, 'timestamp': '2025-10-02 01:01:54.919218', 'step': 28508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:54.991661', 'step': 28508, 'epoch': 3}
{'type': 'loss', 'content': 0.026442265138030052, 'timestamp': '2025-10-02 01:01:55.002980', 'step': 28509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:55.083486', 'step': 28509, 'epoch': 3}
{'type': 'loss', 'content': 0.0634821429848671, 'timestamp': '2025-10-02 01:01:55.087752', 'step': 28510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:55.160368', 'step': 28510, 'epoch': 3}
{'type': 'loss', 'content': 0.014242040924727917, 'timestamp': '2025-10-02 01:01:55.164874', 'step': 28511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:55.224174', 'step': 28511, 'epoch': 3}
{'type': 'loss', 'content': 0.003060444025322795, 'timestamp': '2025-10-02 01:01:55.252480', 'step': 28512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:55.317558', 'step': 28512, 'epoch': 3}
{'type': 'loss', 'content': 0.045037657022476196, 'timestamp': '2025-10-02 01:01:55.323487', 'step': 28513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:55.386635', 'step': 28513, 'epoch': 3}
{'type': 'loss', 'content': 0.029320508241653442, 'timestamp': '2025-10-02 01:01:55.392273', 'step': 28514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:55.465437', 'step': 28514, 'epoch': 3}
{'type': 'loss', 'content': 0.039327606558799744, 'timestamp': '2025-10-02 01:01:55.469851', 'step': 28515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:01:55.534174', 'step': 28515, 'epoch': 3}
{'type': 'loss', 'content': 0.0556441992521286, 'timestamp': '2025-10-02 01:01:55.542432', 'step': 28516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:55.609348', 'step': 28516, 'epoch': 3}
{'type': 'loss', 'content': 0.017821403220295906, 'timestamp': '2025-10-02 01:01:55.620712', 'step': 28517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:55.694264', 'step': 28517, 'epoch': 3}
{'type': 'loss', 'content': 0.05783091112971306, 'timestamp': '2025-10-02 01:01:55.701641', 'step': 28518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:55.772269', 'step': 28518, 'epoch': 3}
{'type': 'loss', 'content': 0.006123283877968788, 'timestamp': '2025-10-02 01:01:55.781147', 'step': 28519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:55.845658', 'step': 28519, 'epoch': 3}
{'type': 'loss', 'content': 0.05837995558977127, 'timestamp': '2025-10-02 01:01:55.864132', 'step': 28520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:55.925567', 'step': 28520, 'epoch': 3}
{'type': 'loss', 'content': 0.027712635695934296, 'timestamp': '2025-10-02 01:01:55.944246', 'step': 28521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:56.012450', 'step': 28521, 'epoch': 3}
{'type': 'loss', 'content': 0.042394090443849564, 'timestamp': '2025-10-02 01:01:56.015984', 'step': 28522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:56.078979', 'step': 28522, 'epoch': 3}
{'type': 'loss', 'content': 0.053200628608465195, 'timestamp': '2025-10-02 01:01:56.083550', 'step': 28523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:01:56.148833', 'step': 28523, 'epoch': 3}
{'type': 'loss', 'content': 0.06923245638608932, 'timestamp': '2025-10-02 01:01:56.159412', 'step': 28524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:01:56.230180', 'step': 28524, 'epoch': 3}
{'type': 'loss', 'content': 0.04113948717713356, 'timestamp': '2025-10-02 01:01:56.234483', 'step': 28525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:01:56.300246', 'step': 28525, 'epoch': 3}
{'type': 'loss', 'content': 0.06423453241586685, 'timestamp': '2025-10-02 01:01:56.306139', 'step': 28526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:56.382403', 'step': 28526, 'epoch': 3}
{'type': 'loss', 'content': 0.08094391971826553, 'timestamp': '2025-10-02 01:01:56.398558', 'step': 28527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:01:56.473385', 'step': 28527, 'epoch': 3}
{'type': 'loss', 'content': 0.051155269145965576, 'timestamp': '2025-10-02 01:01:56.483675', 'step': 28528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:01:56.556630', 'step': 28528, 'epoch': 3}
{'type': 'loss', 'content': 0.044837575405836105, 'timestamp': '2025-10-02 01:01:56.559738', 'step': 28529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:01:56.633574', 'step': 28529, 'epoch': 3}
{'type': 'loss', 'content': 0.02971542812883854, 'timestamp': '2025-10-02 01:01:56.644034', 'step': 28530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:01:56.713635', 'step': 28530, 'epoch': 3}
{'type': 'loss', 'content': 0.04886443167924881, 'timestamp': '2025-10-02 01:01:56.717543', 'step': 28531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:01:56.803667', 'step': 28531, 'epoch': 3}
{'type': 'loss', 'content': 0.06159214675426483, 'timestamp': '2025-10-02 01:01:56.816384', 'step': 28532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:01:56.874126', 'step': 28532, 'epoch': 3}
{'type': 'loss', 'content': 0.03561403974890709, 'timestamp': '2025-10-02 01:01:56.885876', 'step': 28533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:56.961640', 'step': 28533, 'epoch': 3}
{'type': 'loss', 'content': 0.008027410134673119, 'timestamp': '2025-10-02 01:01:56.968802', 'step': 28534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:01:57.038667', 'step': 28534, 'epoch': 3}
{'type': 'loss', 'content': 0.03772777318954468, 'timestamp': '2025-10-02 01:01:57.045965', 'step': 28535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:01:57.110739', 'step': 28535, 'epoch': 3}
{'type': 'loss', 'content': 0.11083684116601944, 'timestamp': '2025-10-02 01:01:57.118079', 'step': 28536, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 01:02:26.761686', 'step': 28536, 'epoch': 3}
{'type': 'pplx', 'content': 86.31762219807952, 'timestamp': '2025-10-02 01:02:26.767540', 'step': 28536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:26.824292', 'step': 28536, 'epoch': 3}
{'type': 'loss', 'content': 0.11459589004516602, 'timestamp': '2025-10-02 01:02:26.827234', 'step': 28537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:26.923074', 'step': 28537, 'epoch': 3}
{'type': 'loss', 'content': 0.05146018788218498, 'timestamp': '2025-10-02 01:02:26.933478', 'step': 28538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:26.995329', 'step': 28538, 'epoch': 3}
{'type': 'loss', 'content': 0.027354354038834572, 'timestamp': '2025-10-02 01:02:27.004258', 'step': 28539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:02:27.085464', 'step': 28539, 'epoch': 3}
{'type': 'loss', 'content': 0.010312100872397423, 'timestamp': '2025-10-02 01:02:27.097061', 'step': 28540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:27.196179', 'step': 28540, 'epoch': 3}
{'type': 'loss', 'content': 0.0529654435813427, 'timestamp': '2025-10-02 01:02:27.201487', 'step': 28541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:27.267630', 'step': 28541, 'epoch': 3}
{'type': 'loss', 'content': 0.04953384771943092, 'timestamp': '2025-10-02 01:02:27.270728', 'step': 28542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:27.338832', 'step': 28542, 'epoch': 3}
{'type': 'loss', 'content': 0.04989450424909592, 'timestamp': '2025-10-02 01:02:27.348335', 'step': 28543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:27.412501', 'step': 28543, 'epoch': 3}
{'type': 'loss', 'content': 0.04471435397863388, 'timestamp': '2025-10-02 01:02:27.424866', 'step': 28544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:27.493221', 'step': 28544, 'epoch': 3}
{'type': 'loss', 'content': 0.021323617547750473, 'timestamp': '2025-10-02 01:02:27.501283', 'step': 28545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:27.561843', 'step': 28545, 'epoch': 3}
{'type': 'loss', 'content': 0.03991442173719406, 'timestamp': '2025-10-02 01:02:27.567513', 'step': 28546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:27.635694', 'step': 28546, 'epoch': 3}
{'type': 'loss', 'content': 0.04174666479229927, 'timestamp': '2025-10-02 01:02:27.645251', 'step': 28547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:27.711586', 'step': 28547, 'epoch': 3}
{'type': 'loss', 'content': 0.09364012628793716, 'timestamp': '2025-10-02 01:02:27.719280', 'step': 28548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:27.788919', 'step': 28548, 'epoch': 3}
{'type': 'loss', 'content': 0.04253451153635979, 'timestamp': '2025-10-02 01:02:27.799905', 'step': 28549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:27.874924', 'step': 28549, 'epoch': 3}
{'type': 'loss', 'content': 0.045673321932554245, 'timestamp': '2025-10-02 01:02:27.878700', 'step': 28550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:27.948204', 'step': 28550, 'epoch': 3}
{'type': 'loss', 'content': 0.0010270721977576613, 'timestamp': '2025-10-02 01:02:27.958647', 'step': 28551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:28.019794', 'step': 28551, 'epoch': 3}
{'type': 'loss', 'content': 0.022521229460835457, 'timestamp': '2025-10-02 01:02:28.027622', 'step': 28552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:28.094988', 'step': 28552, 'epoch': 3}
{'type': 'loss', 'content': 0.06627058237791061, 'timestamp': '2025-10-02 01:02:28.102077', 'step': 28553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:28.172355', 'step': 28553, 'epoch': 3}
{'type': 'loss', 'content': 0.031286004930734634, 'timestamp': '2025-10-02 01:02:28.175114', 'step': 28554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:28.245659', 'step': 28554, 'epoch': 3}
{'type': 'loss', 'content': 0.043355077505111694, 'timestamp': '2025-10-02 01:02:28.251323', 'step': 28555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:28.320605', 'step': 28555, 'epoch': 3}
{'type': 'loss', 'content': 0.09016429632902145, 'timestamp': '2025-10-02 01:02:28.331046', 'step': 28556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:28.396135', 'step': 28556, 'epoch': 3}
{'type': 'loss', 'content': 0.058808255940675735, 'timestamp': '2025-10-02 01:02:28.403382', 'step': 28557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:28.475409', 'step': 28557, 'epoch': 3}
{'type': 'loss', 'content': 0.04015656188130379, 'timestamp': '2025-10-02 01:02:28.482688', 'step': 28558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:28.546855', 'step': 28558, 'epoch': 3}
{'type': 'loss', 'content': 0.028472959995269775, 'timestamp': '2025-10-02 01:02:28.551551', 'step': 28559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:28.615343', 'step': 28559, 'epoch': 3}
{'type': 'loss', 'content': 0.016741720959544182, 'timestamp': '2025-10-02 01:02:28.625621', 'step': 28560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:28.685349', 'step': 28560, 'epoch': 3}
{'type': 'loss', 'content': 0.1503283679485321, 'timestamp': '2025-10-02 01:02:28.687973', 'step': 28561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:28.744648', 'step': 28561, 'epoch': 3}
{'type': 'loss', 'content': 0.04686327651143074, 'timestamp': '2025-10-02 01:02:28.753847', 'step': 28562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:28.816646', 'step': 28562, 'epoch': 3}
{'type': 'loss', 'content': 0.03924069553613663, 'timestamp': '2025-10-02 01:02:28.820039', 'step': 28563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:28.893526', 'step': 28563, 'epoch': 3}
{'type': 'loss', 'content': 0.02031365968286991, 'timestamp': '2025-10-02 01:02:28.900773', 'step': 28564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:28.958744', 'step': 28564, 'epoch': 3}
{'type': 'loss', 'content': 0.03394487872719765, 'timestamp': '2025-10-02 01:02:28.965483', 'step': 28565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:29.032621', 'step': 28565, 'epoch': 3}
{'type': 'loss', 'content': 0.03295969218015671, 'timestamp': '2025-10-02 01:02:29.035857', 'step': 28566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:29.098245', 'step': 28566, 'epoch': 3}
{'type': 'loss', 'content': 0.04847308620810509, 'timestamp': '2025-10-02 01:02:29.100956', 'step': 28567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:29.158116', 'step': 28567, 'epoch': 3}
{'type': 'loss', 'content': 0.01251525804400444, 'timestamp': '2025-10-02 01:02:29.165024', 'step': 28568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:29.220438', 'step': 28568, 'epoch': 3}
{'type': 'loss', 'content': 0.04326784238219261, 'timestamp': '2025-10-02 01:02:29.226980', 'step': 28569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:29.287716', 'step': 28569, 'epoch': 3}
{'type': 'loss', 'content': 0.021225551143288612, 'timestamp': '2025-10-02 01:02:29.291056', 'step': 28570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:29.346929', 'step': 28570, 'epoch': 3}
{'type': 'loss', 'content': 0.07362187653779984, 'timestamp': '2025-10-02 01:02:29.349838', 'step': 28571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:29.411772', 'step': 28571, 'epoch': 3}
{'type': 'loss', 'content': 0.11860442161560059, 'timestamp': '2025-10-02 01:02:29.418442', 'step': 28572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:29.480773', 'step': 28572, 'epoch': 3}
{'type': 'loss', 'content': 0.021878061816096306, 'timestamp': '2025-10-02 01:02:29.490254', 'step': 28573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:29.557828', 'step': 28573, 'epoch': 3}
{'type': 'loss', 'content': 0.04501403868198395, 'timestamp': '2025-10-02 01:02:29.567992', 'step': 28574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:29.627837', 'step': 28574, 'epoch': 3}
{'type': 'loss', 'content': 0.055024415254592896, 'timestamp': '2025-10-02 01:02:29.630940', 'step': 28575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:29.686657', 'step': 28575, 'epoch': 3}
{'type': 'loss', 'content': 0.035465583205223083, 'timestamp': '2025-10-02 01:02:29.694577', 'step': 28576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:29.754089', 'step': 28576, 'epoch': 3}
{'type': 'loss', 'content': 0.041572410613298416, 'timestamp': '2025-10-02 01:02:29.759714', 'step': 28577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:29.817530', 'step': 28577, 'epoch': 3}
{'type': 'loss', 'content': 0.022680645808577538, 'timestamp': '2025-10-02 01:02:29.820471', 'step': 28578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:29.877894', 'step': 28578, 'epoch': 3}
{'type': 'loss', 'content': 0.002642186591401696, 'timestamp': '2025-10-02 01:02:29.881400', 'step': 28579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:29.945381', 'step': 28579, 'epoch': 3}
{'type': 'loss', 'content': 0.020586548373103142, 'timestamp': '2025-10-02 01:02:29.952633', 'step': 28580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:30.012315', 'step': 28580, 'epoch': 3}
{'type': 'loss', 'content': 0.0720885768532753, 'timestamp': '2025-10-02 01:02:30.015251', 'step': 28581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:30.072807', 'step': 28581, 'epoch': 3}
{'type': 'loss', 'content': 0.035244449973106384, 'timestamp': '2025-10-02 01:02:30.076943', 'step': 28582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:30.136866', 'step': 28582, 'epoch': 3}
{'type': 'loss', 'content': 0.11796993762254715, 'timestamp': '2025-10-02 01:02:30.144551', 'step': 28583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:30.208201', 'step': 28583, 'epoch': 3}
{'type': 'loss', 'content': 0.10142098367214203, 'timestamp': '2025-10-02 01:02:30.214481', 'step': 28584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:30.281690', 'step': 28584, 'epoch': 3}
{'type': 'loss', 'content': 0.044939517974853516, 'timestamp': '2025-10-02 01:02:30.289697', 'step': 28585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:02:30.370528', 'step': 28585, 'epoch': 3}
{'type': 'loss', 'content': 0.03359554335474968, 'timestamp': '2025-10-02 01:02:30.381226', 'step': 28586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:30.456246', 'step': 28586, 'epoch': 3}
{'type': 'loss', 'content': 0.006620569154620171, 'timestamp': '2025-10-02 01:02:30.459602', 'step': 28587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:30.522879', 'step': 28587, 'epoch': 3}
{'type': 'loss', 'content': 0.0783468708395958, 'timestamp': '2025-10-02 01:02:30.533172', 'step': 28588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:30.595911', 'step': 28588, 'epoch': 3}
{'type': 'loss', 'content': 0.025810088962316513, 'timestamp': '2025-10-02 01:02:30.605274', 'step': 28589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:30.681933', 'step': 28589, 'epoch': 3}
{'type': 'loss', 'content': 0.06699181348085403, 'timestamp': '2025-10-02 01:02:30.690386', 'step': 28590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:30.764646', 'step': 28590, 'epoch': 3}
{'type': 'loss', 'content': 0.02596130222082138, 'timestamp': '2025-10-02 01:02:30.773875', 'step': 28591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:30.836704', 'step': 28591, 'epoch': 3}
{'type': 'loss', 'content': 0.045448221266269684, 'timestamp': '2025-10-02 01:02:30.848780', 'step': 28592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:30.913024', 'step': 28592, 'epoch': 3}
{'type': 'loss', 'content': 0.0664169117808342, 'timestamp': '2025-10-02 01:02:30.923257', 'step': 28593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:30.996795', 'step': 28593, 'epoch': 3}
{'type': 'loss', 'content': 0.07413657754659653, 'timestamp': '2025-10-02 01:02:30.999417', 'step': 28594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:31.061313', 'step': 28594, 'epoch': 3}
{'type': 'loss', 'content': 0.07051746547222137, 'timestamp': '2025-10-02 01:02:31.071450', 'step': 28595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:31.142981', 'step': 28595, 'epoch': 3}
{'type': 'loss', 'content': 0.04672016203403473, 'timestamp': '2025-10-02 01:02:31.150936', 'step': 28596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:31.221289', 'step': 28596, 'epoch': 3}
{'type': 'loss', 'content': 0.06793118268251419, 'timestamp': '2025-10-02 01:02:31.230618', 'step': 28597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:31.299630', 'step': 28597, 'epoch': 3}
{'type': 'loss', 'content': 0.09210093319416046, 'timestamp': '2025-10-02 01:02:31.305793', 'step': 28598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:31.390945', 'step': 28598, 'epoch': 3}
{'type': 'loss', 'content': 0.060680802911520004, 'timestamp': '2025-10-02 01:02:31.397471', 'step': 28599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:02:31.472285', 'step': 28599, 'epoch': 3}
{'type': 'loss', 'content': 0.020114675164222717, 'timestamp': '2025-10-02 01:02:31.483720', 'step': 28600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:31.555107', 'step': 28600, 'epoch': 3}
{'type': 'loss', 'content': 0.11384914070367813, 'timestamp': '2025-10-02 01:02:31.559744', 'step': 28601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:31.642836', 'step': 28601, 'epoch': 3}
{'type': 'loss', 'content': 0.038986023515462875, 'timestamp': '2025-10-02 01:02:31.646826', 'step': 28602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:31.704326', 'step': 28602, 'epoch': 3}
{'type': 'loss', 'content': 0.08243878185749054, 'timestamp': '2025-10-02 01:02:31.707957', 'step': 28603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:31.764759', 'step': 28603, 'epoch': 3}
{'type': 'loss', 'content': 0.013805263675749302, 'timestamp': '2025-10-02 01:02:31.772512', 'step': 28604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:31.832923', 'step': 28604, 'epoch': 3}
{'type': 'loss', 'content': 0.1100492849946022, 'timestamp': '2025-10-02 01:02:31.836129', 'step': 28605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:31.893990', 'step': 28605, 'epoch': 3}
{'type': 'loss', 'content': 0.056497182697057724, 'timestamp': '2025-10-02 01:02:31.904925', 'step': 28606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:31.967826', 'step': 28606, 'epoch': 3}
{'type': 'loss', 'content': 0.1096305325627327, 'timestamp': '2025-10-02 01:02:31.971216', 'step': 28607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:32.042997', 'step': 28607, 'epoch': 3}
{'type': 'loss', 'content': 0.030880263075232506, 'timestamp': '2025-10-02 01:02:32.056001', 'step': 28608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:32.113341', 'step': 28608, 'epoch': 3}
{'type': 'loss', 'content': 0.06970366090536118, 'timestamp': '2025-10-02 01:02:32.116325', 'step': 28609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:02:32.207055', 'step': 28609, 'epoch': 3}
{'type': 'loss', 'content': 0.029113786295056343, 'timestamp': '2025-10-02 01:02:32.219375', 'step': 28610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:32.281519', 'step': 28610, 'epoch': 3}
{'type': 'loss', 'content': 0.0714118555188179, 'timestamp': '2025-10-02 01:02:32.284758', 'step': 28611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:32.344326', 'step': 28611, 'epoch': 3}
{'type': 'loss', 'content': 0.13293087482452393, 'timestamp': '2025-10-02 01:02:32.351546', 'step': 28612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:32.409071', 'step': 28612, 'epoch': 3}
{'type': 'loss', 'content': 0.06598278880119324, 'timestamp': '2025-10-02 01:02:32.418532', 'step': 28613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:32.483742', 'step': 28613, 'epoch': 3}
{'type': 'loss', 'content': 0.060896892100572586, 'timestamp': '2025-10-02 01:02:32.504531', 'step': 28614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:32.596768', 'step': 28614, 'epoch': 3}
{'type': 'loss', 'content': 0.039283715188503265, 'timestamp': '2025-10-02 01:02:32.616135', 'step': 28615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:32.722299', 'step': 28615, 'epoch': 3}
{'type': 'loss', 'content': 0.1313168853521347, 'timestamp': '2025-10-02 01:02:32.730163', 'step': 28616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:32.809051', 'step': 28616, 'epoch': 3}
{'type': 'loss', 'content': 0.06700579077005386, 'timestamp': '2025-10-02 01:02:32.811994', 'step': 28617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:32.874459', 'step': 28617, 'epoch': 3}
{'type': 'loss', 'content': 0.0619492344558239, 'timestamp': '2025-10-02 01:02:32.882851', 'step': 28618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:32.950317', 'step': 28618, 'epoch': 3}
{'type': 'loss', 'content': 0.09656135737895966, 'timestamp': '2025-10-02 01:02:32.954163', 'step': 28619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:33.022738', 'step': 28619, 'epoch': 3}
{'type': 'loss', 'content': 0.03213364630937576, 'timestamp': '2025-10-02 01:02:33.032529', 'step': 28620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:33.094403', 'step': 28620, 'epoch': 3}
{'type': 'loss', 'content': 0.04395134747028351, 'timestamp': '2025-10-02 01:02:33.105370', 'step': 28621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:33.163000', 'step': 28621, 'epoch': 3}
{'type': 'loss', 'content': 0.013656395487487316, 'timestamp': '2025-10-02 01:02:33.170116', 'step': 28622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:33.236439', 'step': 28622, 'epoch': 3}
{'type': 'loss', 'content': 0.08514401316642761, 'timestamp': '2025-10-02 01:02:33.239458', 'step': 28623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:33.295979', 'step': 28623, 'epoch': 3}
{'type': 'loss', 'content': 0.03363596647977829, 'timestamp': '2025-10-02 01:02:33.310456', 'step': 28624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:33.378849', 'step': 28624, 'epoch': 3}
{'type': 'loss', 'content': 0.08166411519050598, 'timestamp': '2025-10-02 01:02:33.383440', 'step': 28625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:33.441722', 'step': 28625, 'epoch': 3}
{'type': 'loss', 'content': 0.03944850713014603, 'timestamp': '2025-10-02 01:02:33.445256', 'step': 28626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:33.502257', 'step': 28626, 'epoch': 3}
{'type': 'loss', 'content': 0.0077865892089903355, 'timestamp': '2025-10-02 01:02:33.507884', 'step': 28627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:33.571320', 'step': 28627, 'epoch': 3}
{'type': 'loss', 'content': 0.04639175534248352, 'timestamp': '2025-10-02 01:02:33.584667', 'step': 28628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:33.643403', 'step': 28628, 'epoch': 3}
{'type': 'loss', 'content': 0.19490979611873627, 'timestamp': '2025-10-02 01:02:33.653519', 'step': 28629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:33.728263', 'step': 28629, 'epoch': 3}
{'type': 'loss', 'content': 0.05587977543473244, 'timestamp': '2025-10-02 01:02:33.732927', 'step': 28630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:33.795689', 'step': 28630, 'epoch': 3}
{'type': 'loss', 'content': 0.05372512713074684, 'timestamp': '2025-10-02 01:02:33.801184', 'step': 28631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:33.876828', 'step': 28631, 'epoch': 3}
{'type': 'loss', 'content': 0.037095747888088226, 'timestamp': '2025-10-02 01:02:33.883961', 'step': 28632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:33.941926', 'step': 28632, 'epoch': 3}
{'type': 'loss', 'content': 0.026616135612130165, 'timestamp': '2025-10-02 01:02:33.944613', 'step': 28633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:34.005226', 'step': 28633, 'epoch': 3}
{'type': 'loss', 'content': 0.12609171867370605, 'timestamp': '2025-10-02 01:02:34.013337', 'step': 28634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:34.072644', 'step': 28634, 'epoch': 3}
{'type': 'loss', 'content': 0.10811834782361984, 'timestamp': '2025-10-02 01:02:34.075229', 'step': 28635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:34.138168', 'step': 28635, 'epoch': 3}
{'type': 'loss', 'content': 0.08205251395702362, 'timestamp': '2025-10-02 01:02:34.147354', 'step': 28636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:34.206099', 'step': 28636, 'epoch': 3}
{'type': 'loss', 'content': 0.02509310282766819, 'timestamp': '2025-10-02 01:02:34.212760', 'step': 28637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:34.276245', 'step': 28637, 'epoch': 3}
{'type': 'loss', 'content': 0.019399605691432953, 'timestamp': '2025-10-02 01:02:34.286409', 'step': 28638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:34.342251', 'step': 28638, 'epoch': 3}
{'type': 'loss', 'content': 0.035295721143484116, 'timestamp': '2025-10-02 01:02:34.345890', 'step': 28639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:34.403816', 'step': 28639, 'epoch': 3}
{'type': 'loss', 'content': 0.024744788184762, 'timestamp': '2025-10-02 01:02:34.410390', 'step': 28640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:34.468865', 'step': 28640, 'epoch': 3}
{'type': 'loss', 'content': 0.04369368404150009, 'timestamp': '2025-10-02 01:02:34.471767', 'step': 28641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:34.534575', 'step': 28641, 'epoch': 3}
{'type': 'loss', 'content': 0.11772254854440689, 'timestamp': '2025-10-02 01:02:34.537649', 'step': 28642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:34.597527', 'step': 28642, 'epoch': 3}
{'type': 'loss', 'content': 0.08562126755714417, 'timestamp': '2025-10-02 01:02:34.601363', 'step': 28643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:34.666817', 'step': 28643, 'epoch': 3}
{'type': 'loss', 'content': 0.013189162127673626, 'timestamp': '2025-10-02 01:02:34.673819', 'step': 28644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:34.741064', 'step': 28644, 'epoch': 3}
{'type': 'loss', 'content': 0.03875408694148064, 'timestamp': '2025-10-02 01:02:34.746897', 'step': 28645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:34.802753', 'step': 28645, 'epoch': 3}
{'type': 'loss', 'content': 0.07313086092472076, 'timestamp': '2025-10-02 01:02:34.805430', 'step': 28646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:34.874963', 'step': 28646, 'epoch': 3}
{'type': 'loss', 'content': 0.015578220598399639, 'timestamp': '2025-10-02 01:02:34.885155', 'step': 28647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:34.947535', 'step': 28647, 'epoch': 3}
{'type': 'loss', 'content': 0.019775571301579475, 'timestamp': '2025-10-02 01:02:34.954386', 'step': 28648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:35.026371', 'step': 28648, 'epoch': 3}
{'type': 'loss', 'content': 0.025315633043646812, 'timestamp': '2025-10-02 01:02:35.037632', 'step': 28649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:35.105898', 'step': 28649, 'epoch': 3}
{'type': 'loss', 'content': 0.03461030498147011, 'timestamp': '2025-10-02 01:02:35.111797', 'step': 28650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:35.180546', 'step': 28650, 'epoch': 3}
{'type': 'loss', 'content': 0.036343999207019806, 'timestamp': '2025-10-02 01:02:35.185149', 'step': 28651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:35.240830', 'step': 28651, 'epoch': 3}
{'type': 'loss', 'content': 0.047591038048267365, 'timestamp': '2025-10-02 01:02:35.251544', 'step': 28652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:35.309526', 'step': 28652, 'epoch': 3}
{'type': 'loss', 'content': 0.05634922906756401, 'timestamp': '2025-10-02 01:02:35.316806', 'step': 28653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:35.386117', 'step': 28653, 'epoch': 3}
{'type': 'loss', 'content': 0.05066099017858505, 'timestamp': '2025-10-02 01:02:35.395401', 'step': 28654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:35.452264', 'step': 28654, 'epoch': 3}
{'type': 'loss', 'content': 0.019544027745723724, 'timestamp': '2025-10-02 01:02:35.459744', 'step': 28655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:35.529552', 'step': 28655, 'epoch': 3}
{'type': 'loss', 'content': 0.052673567086458206, 'timestamp': '2025-10-02 01:02:35.539834', 'step': 28656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:35.606199', 'step': 28656, 'epoch': 3}
{'type': 'loss', 'content': 0.05079120397567749, 'timestamp': '2025-10-02 01:02:35.608859', 'step': 28657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:35.674822', 'step': 28657, 'epoch': 3}
{'type': 'loss', 'content': 0.06502045691013336, 'timestamp': '2025-10-02 01:02:35.682763', 'step': 28658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:35.748854', 'step': 28658, 'epoch': 3}
{'type': 'loss', 'content': 0.08312296122312546, 'timestamp': '2025-10-02 01:02:35.754581', 'step': 28659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:35.815979', 'step': 28659, 'epoch': 3}
{'type': 'loss', 'content': 0.0023533639032393694, 'timestamp': '2025-10-02 01:02:35.826784', 'step': 28660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:35.892163', 'step': 28660, 'epoch': 3}
{'type': 'loss', 'content': 0.07427150011062622, 'timestamp': '2025-10-02 01:02:35.902158', 'step': 28661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:35.973838', 'step': 28661, 'epoch': 3}
{'type': 'loss', 'content': 0.017405621707439423, 'timestamp': '2025-10-02 01:02:35.983347', 'step': 28662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:36.048023', 'step': 28662, 'epoch': 3}
{'type': 'loss', 'content': 0.09787064790725708, 'timestamp': '2025-10-02 01:02:36.050578', 'step': 28663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:36.105123', 'step': 28663, 'epoch': 3}
{'type': 'loss', 'content': 0.06663595885038376, 'timestamp': '2025-10-02 01:02:36.111676', 'step': 28664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:36.166048', 'step': 28664, 'epoch': 3}
{'type': 'loss', 'content': 0.03466871753334999, 'timestamp': '2025-10-02 01:02:36.168682', 'step': 28665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:36.223873', 'step': 28665, 'epoch': 3}
{'type': 'loss', 'content': 0.013221078552305698, 'timestamp': '2025-10-02 01:02:36.226461', 'step': 28666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:36.282541', 'step': 28666, 'epoch': 3}
{'type': 'loss', 'content': 0.037866782397031784, 'timestamp': '2025-10-02 01:02:36.285346', 'step': 28667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:36.339753', 'step': 28667, 'epoch': 3}
{'type': 'loss', 'content': 0.03686120733618736, 'timestamp': '2025-10-02 01:02:36.346586', 'step': 28668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:36.401787', 'step': 28668, 'epoch': 3}
{'type': 'loss', 'content': 0.05035214498639107, 'timestamp': '2025-10-02 01:02:36.405583', 'step': 28669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:36.461126', 'step': 28669, 'epoch': 3}
{'type': 'loss', 'content': 0.0069166976027190685, 'timestamp': '2025-10-02 01:02:36.466857', 'step': 28670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:36.521526', 'step': 28670, 'epoch': 3}
{'type': 'loss', 'content': 0.011536812409758568, 'timestamp': '2025-10-02 01:02:36.528437', 'step': 28671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:36.583535', 'step': 28671, 'epoch': 3}
{'type': 'loss', 'content': 0.03279600664973259, 'timestamp': '2025-10-02 01:02:36.589989', 'step': 28672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:36.643928', 'step': 28672, 'epoch': 3}
{'type': 'loss', 'content': 0.07254085689783096, 'timestamp': '2025-10-02 01:02:36.646636', 'step': 28673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:36.702394', 'step': 28673, 'epoch': 3}
{'type': 'loss', 'content': 0.0412997268140316, 'timestamp': '2025-10-02 01:02:36.705246', 'step': 28674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:36.759450', 'step': 28674, 'epoch': 3}
{'type': 'loss', 'content': 0.08068013936281204, 'timestamp': '2025-10-02 01:02:36.762124', 'step': 28675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:36.817635', 'step': 28675, 'epoch': 3}
{'type': 'loss', 'content': 0.020236976444721222, 'timestamp': '2025-10-02 01:02:36.825623', 'step': 28676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:36.880599', 'step': 28676, 'epoch': 3}
{'type': 'loss', 'content': 0.07419329881668091, 'timestamp': '2025-10-02 01:02:36.886421', 'step': 28677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:36.941315', 'step': 28677, 'epoch': 3}
{'type': 'loss', 'content': 0.007625326979905367, 'timestamp': '2025-10-02 01:02:36.948585', 'step': 28678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:37.003954', 'step': 28678, 'epoch': 3}
{'type': 'loss', 'content': 0.04493264853954315, 'timestamp': '2025-10-02 01:02:37.006783', 'step': 28679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:37.061144', 'step': 28679, 'epoch': 3}
{'type': 'loss', 'content': 0.026185106486082077, 'timestamp': '2025-10-02 01:02:37.067997', 'step': 28680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:37.122068', 'step': 28680, 'epoch': 3}
{'type': 'loss', 'content': 0.034943971782922745, 'timestamp': '2025-10-02 01:02:37.129324', 'step': 28681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:02:37.193952', 'step': 28681, 'epoch': 3}
{'type': 'loss', 'content': 0.013467223383486271, 'timestamp': '2025-10-02 01:02:37.204816', 'step': 28682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:37.259787', 'step': 28682, 'epoch': 3}
{'type': 'loss', 'content': 0.007287635467946529, 'timestamp': '2025-10-02 01:02:37.263617', 'step': 28683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:37.321915', 'step': 28683, 'epoch': 3}
{'type': 'loss', 'content': 0.02184518426656723, 'timestamp': '2025-10-02 01:02:37.328506', 'step': 28684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:37.382920', 'step': 28684, 'epoch': 3}
{'type': 'loss', 'content': 0.05642790347337723, 'timestamp': '2025-10-02 01:02:37.385238', 'step': 28685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:37.439002', 'step': 28685, 'epoch': 3}
{'type': 'loss', 'content': 0.05183267593383789, 'timestamp': '2025-10-02 01:02:37.442579', 'step': 28686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:37.497451', 'step': 28686, 'epoch': 3}
{'type': 'loss', 'content': 0.01850755885243416, 'timestamp': '2025-10-02 01:02:37.500345', 'step': 28687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:37.555385', 'step': 28687, 'epoch': 3}
{'type': 'loss', 'content': 0.03220535069704056, 'timestamp': '2025-10-02 01:02:37.563492', 'step': 28688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:37.618168', 'step': 28688, 'epoch': 3}
{'type': 'loss', 'content': 0.07889924198389053, 'timestamp': '2025-10-02 01:02:37.620990', 'step': 28689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:37.682215', 'step': 28689, 'epoch': 3}
{'type': 'loss', 'content': 0.029791763052344322, 'timestamp': '2025-10-02 01:02:37.692635', 'step': 28690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:37.747754', 'step': 28690, 'epoch': 3}
{'type': 'loss', 'content': 0.0934370756149292, 'timestamp': '2025-10-02 01:02:37.750344', 'step': 28691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:37.805706', 'step': 28691, 'epoch': 3}
{'type': 'loss', 'content': 0.06861388683319092, 'timestamp': '2025-10-02 01:02:37.815761', 'step': 28692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:02:37.877009', 'step': 28692, 'epoch': 3}
{'type': 'loss', 'content': 0.007962996140122414, 'timestamp': '2025-10-02 01:02:37.888511', 'step': 28693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:37.944299', 'step': 28693, 'epoch': 3}
{'type': 'loss', 'content': 0.03930463641881943, 'timestamp': '2025-10-02 01:02:37.953809', 'step': 28694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:02:38.022397', 'step': 28694, 'epoch': 3}
{'type': 'loss', 'content': 0.034023601561784744, 'timestamp': '2025-10-02 01:02:38.034328', 'step': 28695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:38.089764', 'step': 28695, 'epoch': 3}
{'type': 'loss', 'content': 0.07990413904190063, 'timestamp': '2025-10-02 01:02:38.100053', 'step': 28696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:38.153671', 'step': 28696, 'epoch': 3}
{'type': 'loss', 'content': 0.06835769861936569, 'timestamp': '2025-10-02 01:02:38.156339', 'step': 28697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:38.210439', 'step': 28697, 'epoch': 3}
{'type': 'loss', 'content': 0.034220293164253235, 'timestamp': '2025-10-02 01:02:38.213025', 'step': 28698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:38.269433', 'step': 28698, 'epoch': 3}
{'type': 'loss', 'content': 0.05999641865491867, 'timestamp': '2025-10-02 01:02:38.271980', 'step': 28699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:38.326688', 'step': 28699, 'epoch': 3}
{'type': 'loss', 'content': 0.024559270590543747, 'timestamp': '2025-10-02 01:02:38.333722', 'step': 28700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:02:38.387694', 'step': 28700, 'epoch': 3}
{'type': 'loss', 'content': 0.035346247255802155, 'timestamp': '2025-10-02 01:02:38.390170', 'step': 28701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:38.445036', 'step': 28701, 'epoch': 3}
{'type': 'loss', 'content': 0.02266695350408554, 'timestamp': '2025-10-02 01:02:38.448368', 'step': 28702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:38.503658', 'step': 28702, 'epoch': 3}
{'type': 'loss', 'content': 0.0404098816215992, 'timestamp': '2025-10-02 01:02:38.506442', 'step': 28703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:38.561273', 'step': 28703, 'epoch': 3}
{'type': 'loss', 'content': 0.04798201099038124, 'timestamp': '2025-10-02 01:02:38.569204', 'step': 28704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:38.623688', 'step': 28704, 'epoch': 3}
{'type': 'loss', 'content': 0.09705082327127457, 'timestamp': '2025-10-02 01:02:38.626335', 'step': 28705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:38.680631', 'step': 28705, 'epoch': 3}
{'type': 'loss', 'content': 0.022459257394075394, 'timestamp': '2025-10-02 01:02:38.683240', 'step': 28706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:38.737775', 'step': 28706, 'epoch': 3}
{'type': 'loss', 'content': 0.08365540206432343, 'timestamp': '2025-10-02 01:02:38.743539', 'step': 28707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:38.799839', 'step': 28707, 'epoch': 3}
{'type': 'loss', 'content': 0.06530561298131943, 'timestamp': '2025-10-02 01:02:38.806796', 'step': 28708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:38.861414', 'step': 28708, 'epoch': 3}
{'type': 'loss', 'content': 0.001837483374401927, 'timestamp': '2025-10-02 01:02:38.866956', 'step': 28709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:38.921823', 'step': 28709, 'epoch': 3}
{'type': 'loss', 'content': 0.010778598487377167, 'timestamp': '2025-10-02 01:02:38.927288', 'step': 28710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:38.983457', 'step': 28710, 'epoch': 3}
{'type': 'loss', 'content': 0.0029375110752880573, 'timestamp': '2025-10-02 01:02:38.992456', 'step': 28711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:39.046524', 'step': 28711, 'epoch': 3}
{'type': 'loss', 'content': 0.06744825094938278, 'timestamp': '2025-10-02 01:02:39.052702', 'step': 28712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:39.106140', 'step': 28712, 'epoch': 3}
{'type': 'loss', 'content': 0.06511100381612778, 'timestamp': '2025-10-02 01:02:39.111774', 'step': 28713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:39.166646', 'step': 28713, 'epoch': 3}
{'type': 'loss', 'content': 0.0007573806797154248, 'timestamp': '2025-10-02 01:02:39.173924', 'step': 28714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:39.228607', 'step': 28714, 'epoch': 3}
{'type': 'loss', 'content': 0.08308552205562592, 'timestamp': '2025-10-02 01:02:39.231409', 'step': 28715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:39.285543', 'step': 28715, 'epoch': 3}
{'type': 'loss', 'content': 0.03335469961166382, 'timestamp': '2025-10-02 01:02:39.292052', 'step': 28716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:39.347683', 'step': 28716, 'epoch': 3}
{'type': 'loss', 'content': 0.04826590418815613, 'timestamp': '2025-10-02 01:02:39.350306', 'step': 28717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:39.404443', 'step': 28717, 'epoch': 3}
{'type': 'loss', 'content': 0.06579771637916565, 'timestamp': '2025-10-02 01:02:39.411654', 'step': 28718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:39.487981', 'step': 28718, 'epoch': 3}
{'type': 'loss', 'content': 0.02181869000196457, 'timestamp': '2025-10-02 01:02:39.498497', 'step': 28719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:39.595282', 'step': 28719, 'epoch': 3}
{'type': 'loss', 'content': 0.024163058027625084, 'timestamp': '2025-10-02 01:02:39.602906', 'step': 28720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:39.678567', 'step': 28720, 'epoch': 3}
{'type': 'loss', 'content': 0.021560238674283028, 'timestamp': '2025-10-02 01:02:39.689849', 'step': 28721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:39.780036', 'step': 28721, 'epoch': 3}
{'type': 'loss', 'content': 0.0247679203748703, 'timestamp': '2025-10-02 01:02:39.785734', 'step': 28722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:39.861676', 'step': 28722, 'epoch': 3}
{'type': 'loss', 'content': 0.009593264199793339, 'timestamp': '2025-10-02 01:02:39.878736', 'step': 28723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:39.967958', 'step': 28723, 'epoch': 3}
{'type': 'loss', 'content': 0.05378083139657974, 'timestamp': '2025-10-02 01:02:39.974851', 'step': 28724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:40.044159', 'step': 28724, 'epoch': 3}
{'type': 'loss', 'content': 0.07604897022247314, 'timestamp': '2025-10-02 01:02:40.048873', 'step': 28725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:40.110822', 'step': 28725, 'epoch': 3}
{'type': 'loss', 'content': 0.08189201354980469, 'timestamp': '2025-10-02 01:02:40.114112', 'step': 28726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:40.176916', 'step': 28726, 'epoch': 3}
{'type': 'loss', 'content': 0.09779316186904907, 'timestamp': '2025-10-02 01:02:40.179816', 'step': 28727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:40.250688', 'step': 28727, 'epoch': 3}
{'type': 'loss', 'content': 0.022721048444509506, 'timestamp': '2025-10-02 01:02:40.258042', 'step': 28728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:40.329932', 'step': 28728, 'epoch': 3}
{'type': 'loss', 'content': 0.09450753033161163, 'timestamp': '2025-10-02 01:02:40.335663', 'step': 28729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:02:40.417426', 'step': 28729, 'epoch': 3}
{'type': 'loss', 'content': 0.006513815373182297, 'timestamp': '2025-10-02 01:02:40.429419', 'step': 28730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:40.493434', 'step': 28730, 'epoch': 3}
{'type': 'loss', 'content': 0.038485415279865265, 'timestamp': '2025-10-02 01:02:40.496450', 'step': 28731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:40.560999', 'step': 28731, 'epoch': 3}
{'type': 'loss', 'content': 0.09170227497816086, 'timestamp': '2025-10-02 01:02:40.567782', 'step': 28732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:40.625096', 'step': 28732, 'epoch': 3}
{'type': 'loss', 'content': 0.11630089581012726, 'timestamp': '2025-10-02 01:02:40.628226', 'step': 28733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:02:40.695923', 'step': 28733, 'epoch': 3}
{'type': 'loss', 'content': 0.011763369664549828, 'timestamp': '2025-10-02 01:02:40.706531', 'step': 28734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:40.767256', 'step': 28734, 'epoch': 3}
{'type': 'loss', 'content': 7.058039773255587e-05, 'timestamp': '2025-10-02 01:02:40.770550', 'step': 28735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:40.831541', 'step': 28735, 'epoch': 3}
{'type': 'loss', 'content': 0.05966297909617424, 'timestamp': '2025-10-02 01:02:40.843992', 'step': 28736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:40.903074', 'step': 28736, 'epoch': 3}
{'type': 'loss', 'content': 0.023062385618686676, 'timestamp': '2025-10-02 01:02:40.914080', 'step': 28737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:40.979527', 'step': 28737, 'epoch': 3}
{'type': 'loss', 'content': 0.026284178718924522, 'timestamp': '2025-10-02 01:02:40.982516', 'step': 28738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:41.043380', 'step': 28738, 'epoch': 3}
{'type': 'loss', 'content': 0.0466587208211422, 'timestamp': '2025-10-02 01:02:41.050353', 'step': 28739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:41.109227', 'step': 28739, 'epoch': 3}
{'type': 'loss', 'content': 0.02175697684288025, 'timestamp': '2025-10-02 01:02:41.117749', 'step': 28740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:02:41.185005', 'step': 28740, 'epoch': 3}
{'type': 'loss', 'content': 0.040351882576942444, 'timestamp': '2025-10-02 01:02:41.196512', 'step': 28741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:41.255926', 'step': 28741, 'epoch': 3}
{'type': 'loss', 'content': 0.032245855778455734, 'timestamp': '2025-10-02 01:02:41.259340', 'step': 28742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:41.320171', 'step': 28742, 'epoch': 3}
{'type': 'loss', 'content': 0.014454590156674385, 'timestamp': '2025-10-02 01:02:41.324111', 'step': 28743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:41.393455', 'step': 28743, 'epoch': 3}
{'type': 'loss', 'content': 0.0006818969850428402, 'timestamp': '2025-10-02 01:02:41.404756', 'step': 28744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:41.462625', 'step': 28744, 'epoch': 3}
{'type': 'loss', 'content': 0.01634136028587818, 'timestamp': '2025-10-02 01:02:41.468276', 'step': 28745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:41.528056', 'step': 28745, 'epoch': 3}
{'type': 'loss', 'content': 0.016697518527507782, 'timestamp': '2025-10-02 01:02:41.537268', 'step': 28746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:41.597846', 'step': 28746, 'epoch': 3}
{'type': 'loss', 'content': 0.049193065613508224, 'timestamp': '2025-10-02 01:02:41.600739', 'step': 28747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:41.661536', 'step': 28747, 'epoch': 3}
{'type': 'loss', 'content': 0.07947554439306259, 'timestamp': '2025-10-02 01:02:41.668002', 'step': 28748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:41.724676', 'step': 28748, 'epoch': 3}
{'type': 'loss', 'content': 0.023072004318237305, 'timestamp': '2025-10-02 01:02:41.727458', 'step': 28749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:41.791534', 'step': 28749, 'epoch': 3}
{'type': 'loss', 'content': 0.026091227307915688, 'timestamp': '2025-10-02 01:02:41.801021', 'step': 28750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:41.857201', 'step': 28750, 'epoch': 3}
{'type': 'loss', 'content': 0.07809830456972122, 'timestamp': '2025-10-02 01:02:41.866700', 'step': 28751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:41.923033', 'step': 28751, 'epoch': 3}
{'type': 'loss', 'content': 0.09824840724468231, 'timestamp': '2025-10-02 01:02:41.929232', 'step': 28752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:41.995624', 'step': 28752, 'epoch': 3}
{'type': 'loss', 'content': 0.0011559956474229693, 'timestamp': '2025-10-02 01:02:42.001312', 'step': 28753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:42.070688', 'step': 28753, 'epoch': 3}
{'type': 'loss', 'content': 0.05954299494624138, 'timestamp': '2025-10-02 01:02:42.074280', 'step': 28754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:42.135808', 'step': 28754, 'epoch': 3}
{'type': 'loss', 'content': 0.02236161008477211, 'timestamp': '2025-10-02 01:02:42.139229', 'step': 28755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:42.204846', 'step': 28755, 'epoch': 3}
{'type': 'loss', 'content': 0.03251444920897484, 'timestamp': '2025-10-02 01:02:42.214575', 'step': 28756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:42.272685', 'step': 28756, 'epoch': 3}
{'type': 'loss', 'content': 0.002609977498650551, 'timestamp': '2025-10-02 01:02:42.275153', 'step': 28757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:42.330952', 'step': 28757, 'epoch': 3}
{'type': 'loss', 'content': 0.03611062839627266, 'timestamp': '2025-10-02 01:02:42.336546', 'step': 28758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:42.401818', 'step': 28758, 'epoch': 3}
{'type': 'loss', 'content': 0.039285268634557724, 'timestamp': '2025-10-02 01:02:42.411972', 'step': 28759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:42.474265', 'step': 28759, 'epoch': 3}
{'type': 'loss', 'content': 0.0181457307189703, 'timestamp': '2025-10-02 01:02:42.480936', 'step': 28760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:42.537487', 'step': 28760, 'epoch': 3}
{'type': 'loss', 'content': 0.024335045367479324, 'timestamp': '2025-10-02 01:02:42.544857', 'step': 28761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:42.608696', 'step': 28761, 'epoch': 3}
{'type': 'loss', 'content': 0.067499540746212, 'timestamp': '2025-10-02 01:02:42.613299', 'step': 28762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:42.670510', 'step': 28762, 'epoch': 3}
{'type': 'loss', 'content': 0.011729403398931026, 'timestamp': '2025-10-02 01:02:42.680036', 'step': 28763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:42.744179', 'step': 28763, 'epoch': 3}
{'type': 'loss', 'content': 0.08504107594490051, 'timestamp': '2025-10-02 01:02:42.751227', 'step': 28764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:42.813526', 'step': 28764, 'epoch': 3}
{'type': 'loss', 'content': 0.01168530248105526, 'timestamp': '2025-10-02 01:02:42.817831', 'step': 28765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:42.875867', 'step': 28765, 'epoch': 3}
{'type': 'loss', 'content': 0.0956946387887001, 'timestamp': '2025-10-02 01:02:42.885041', 'step': 28766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:42.944934', 'step': 28766, 'epoch': 3}
{'type': 'loss', 'content': 0.0167054645717144, 'timestamp': '2025-10-02 01:02:42.948895', 'step': 28767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:43.010633', 'step': 28767, 'epoch': 3}
{'type': 'loss', 'content': 0.029577603563666344, 'timestamp': '2025-10-02 01:02:43.023743', 'step': 28768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:43.080996', 'step': 28768, 'epoch': 3}
{'type': 'loss', 'content': 0.10399947315454483, 'timestamp': '2025-10-02 01:02:43.084064', 'step': 28769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:43.158208', 'step': 28769, 'epoch': 3}
{'type': 'loss', 'content': 0.05255228281021118, 'timestamp': '2025-10-02 01:02:43.167469', 'step': 28770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:43.235175', 'step': 28770, 'epoch': 3}
{'type': 'loss', 'content': 0.05423673987388611, 'timestamp': '2025-10-02 01:02:43.238910', 'step': 28771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:43.312625', 'step': 28771, 'epoch': 3}
{'type': 'loss', 'content': 0.02604288049042225, 'timestamp': '2025-10-02 01:02:43.324850', 'step': 28772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:43.394573', 'step': 28772, 'epoch': 3}
{'type': 'loss', 'content': 0.05343249440193176, 'timestamp': '2025-10-02 01:02:43.399753', 'step': 28773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:43.462350', 'step': 28773, 'epoch': 3}
{'type': 'loss', 'content': 0.05837852880358696, 'timestamp': '2025-10-02 01:02:43.466235', 'step': 28774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:43.540268', 'step': 28774, 'epoch': 3}
{'type': 'loss', 'content': 0.008551050908863544, 'timestamp': '2025-10-02 01:02:43.550728', 'step': 28775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:43.608071', 'step': 28775, 'epoch': 3}
{'type': 'loss', 'content': 0.05636747181415558, 'timestamp': '2025-10-02 01:02:43.617973', 'step': 28776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:43.684015', 'step': 28776, 'epoch': 3}
{'type': 'loss', 'content': 0.036902789026498795, 'timestamp': '2025-10-02 01:02:43.694146', 'step': 28777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:43.768037', 'step': 28777, 'epoch': 3}
{'type': 'loss', 'content': 0.0005742455250583589, 'timestamp': '2025-10-02 01:02:43.778292', 'step': 28778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:43.842338', 'step': 28778, 'epoch': 3}
{'type': 'loss', 'content': 0.027895772829651833, 'timestamp': '2025-10-02 01:02:43.851240', 'step': 28779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:02:43.906436', 'step': 28779, 'epoch': 3}
{'type': 'loss', 'content': 0.1098286584019661, 'timestamp': '2025-10-02 01:02:43.917792', 'step': 28780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:43.987157', 'step': 28780, 'epoch': 3}
{'type': 'loss', 'content': 0.024747807532548904, 'timestamp': '2025-10-02 01:02:44.003470', 'step': 28781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:44.074157', 'step': 28781, 'epoch': 3}
{'type': 'loss', 'content': 0.05041956156492233, 'timestamp': '2025-10-02 01:02:44.078382', 'step': 28782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:44.147999', 'step': 28782, 'epoch': 3}
{'type': 'loss', 'content': 0.09170269966125488, 'timestamp': '2025-10-02 01:02:44.150788', 'step': 28783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:44.219362', 'step': 28783, 'epoch': 3}
{'type': 'loss', 'content': 0.019876182079315186, 'timestamp': '2025-10-02 01:02:44.233404', 'step': 28784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:44.301499', 'step': 28784, 'epoch': 3}
{'type': 'loss', 'content': 0.045670442283153534, 'timestamp': '2025-10-02 01:02:44.310254', 'step': 28785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:44.385848', 'step': 28785, 'epoch': 3}
{'type': 'loss', 'content': 0.054743584245443344, 'timestamp': '2025-10-02 01:02:44.394850', 'step': 28786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:44.468020', 'step': 28786, 'epoch': 3}
{'type': 'loss', 'content': 0.02502473257482052, 'timestamp': '2025-10-02 01:02:44.477541', 'step': 28787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:44.545144', 'step': 28787, 'epoch': 3}
{'type': 'loss', 'content': 0.05315268039703369, 'timestamp': '2025-10-02 01:02:44.558330', 'step': 28788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:44.630690', 'step': 28788, 'epoch': 3}
{'type': 'loss', 'content': 0.011005268432199955, 'timestamp': '2025-10-02 01:02:44.641615', 'step': 28789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:44.707463', 'step': 28789, 'epoch': 3}
{'type': 'loss', 'content': 0.02205648273229599, 'timestamp': '2025-10-02 01:02:44.716755', 'step': 28790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:44.788195', 'step': 28790, 'epoch': 3}
{'type': 'loss', 'content': 0.03849326819181442, 'timestamp': '2025-10-02 01:02:44.795720', 'step': 28791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:44.865224', 'step': 28791, 'epoch': 3}
{'type': 'loss', 'content': 0.03657620772719383, 'timestamp': '2025-10-02 01:02:44.872965', 'step': 28792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:44.935937', 'step': 28792, 'epoch': 3}
{'type': 'loss', 'content': 0.042375244200229645, 'timestamp': '2025-10-02 01:02:44.945320', 'step': 28793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:45.015226', 'step': 28793, 'epoch': 3}
{'type': 'loss', 'content': 0.07641299813985825, 'timestamp': '2025-10-02 01:02:45.024100', 'step': 28794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:45.086550', 'step': 28794, 'epoch': 3}
{'type': 'loss', 'content': 0.02811017818748951, 'timestamp': '2025-10-02 01:02:45.094961', 'step': 28795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:45.158449', 'step': 28795, 'epoch': 3}
{'type': 'loss', 'content': 0.030706994235515594, 'timestamp': '2025-10-02 01:02:45.165241', 'step': 28796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:45.221761', 'step': 28796, 'epoch': 3}
{'type': 'loss', 'content': 0.0607374832034111, 'timestamp': '2025-10-02 01:02:45.224960', 'step': 28797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:45.290750', 'step': 28797, 'epoch': 3}
{'type': 'loss', 'content': 0.0055130706168711185, 'timestamp': '2025-10-02 01:02:45.301220', 'step': 28798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:45.371572', 'step': 28798, 'epoch': 3}
{'type': 'loss', 'content': 0.008556746877729893, 'timestamp': '2025-10-02 01:02:45.380386', 'step': 28799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:45.443898', 'step': 28799, 'epoch': 3}
{'type': 'loss', 'content': 0.04581942781805992, 'timestamp': '2025-10-02 01:02:45.450724', 'step': 28800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:45.519203', 'step': 28800, 'epoch': 3}
{'type': 'loss', 'content': 0.02820573002099991, 'timestamp': '2025-10-02 01:02:45.524878', 'step': 28801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:45.595533', 'step': 28801, 'epoch': 3}
{'type': 'loss', 'content': 0.06600538641214371, 'timestamp': '2025-10-02 01:02:45.605018', 'step': 28802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:45.673410', 'step': 28802, 'epoch': 3}
{'type': 'loss', 'content': 0.05432210490107536, 'timestamp': '2025-10-02 01:02:45.676646', 'step': 28803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:45.746467', 'step': 28803, 'epoch': 3}
{'type': 'loss', 'content': 0.011254395358264446, 'timestamp': '2025-10-02 01:02:45.760295', 'step': 28804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:45.818749', 'step': 28804, 'epoch': 3}
{'type': 'loss', 'content': 0.04509753733873367, 'timestamp': '2025-10-02 01:02:45.828705', 'step': 28805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:45.893440', 'step': 28805, 'epoch': 3}
{'type': 'loss', 'content': 0.027859369292855263, 'timestamp': '2025-10-02 01:02:45.900640', 'step': 28806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:45.971419', 'step': 28806, 'epoch': 3}
{'type': 'loss', 'content': 0.0672069564461708, 'timestamp': '2025-10-02 01:02:45.981610', 'step': 28807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:46.048999', 'step': 28807, 'epoch': 3}
{'type': 'loss', 'content': 0.009287195280194283, 'timestamp': '2025-10-02 01:02:46.060499', 'step': 28808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:46.116400', 'step': 28808, 'epoch': 3}
{'type': 'loss', 'content': 0.025800567120313644, 'timestamp': '2025-10-02 01:02:46.119589', 'step': 28809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:46.177557', 'step': 28809, 'epoch': 3}
{'type': 'loss', 'content': 0.04103656858205795, 'timestamp': '2025-10-02 01:02:46.185240', 'step': 28810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:46.255210', 'step': 28810, 'epoch': 3}
{'type': 'loss', 'content': 0.07766854763031006, 'timestamp': '2025-10-02 01:02:46.265775', 'step': 28811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:46.326781', 'step': 28811, 'epoch': 3}
{'type': 'loss', 'content': 0.10551906377077103, 'timestamp': '2025-10-02 01:02:46.333540', 'step': 28812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:46.393832', 'step': 28812, 'epoch': 3}
{'type': 'loss', 'content': 0.04798443615436554, 'timestamp': '2025-10-02 01:02:46.402945', 'step': 28813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:46.466807', 'step': 28813, 'epoch': 3}
{'type': 'loss', 'content': 0.04241979867219925, 'timestamp': '2025-10-02 01:02:46.475227', 'step': 28814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:46.537681', 'step': 28814, 'epoch': 3}
{'type': 'loss', 'content': 0.025481179356575012, 'timestamp': '2025-10-02 01:02:46.540924', 'step': 28815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:46.598870', 'step': 28815, 'epoch': 3}
{'type': 'loss', 'content': 0.01446449477225542, 'timestamp': '2025-10-02 01:02:46.609042', 'step': 28816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:46.671829', 'step': 28816, 'epoch': 3}
{'type': 'loss', 'content': 0.007672608830034733, 'timestamp': '2025-10-02 01:02:46.682231', 'step': 28817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:46.740168', 'step': 28817, 'epoch': 3}
{'type': 'loss', 'content': 0.03225639835000038, 'timestamp': '2025-10-02 01:02:46.743751', 'step': 28818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:46.800472', 'step': 28818, 'epoch': 3}
{'type': 'loss', 'content': 0.04439767077565193, 'timestamp': '2025-10-02 01:02:46.803131', 'step': 28819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:02:46.866913', 'step': 28819, 'epoch': 3}
{'type': 'loss', 'content': 0.04630548134446144, 'timestamp': '2025-10-02 01:02:46.873876', 'step': 28820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:46.940853', 'step': 28820, 'epoch': 3}
{'type': 'loss', 'content': 0.051341574639081955, 'timestamp': '2025-10-02 01:02:46.946581', 'step': 28821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:47.004383', 'step': 28821, 'epoch': 3}
{'type': 'loss', 'content': 0.10221970081329346, 'timestamp': '2025-10-02 01:02:47.015055', 'step': 28822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:02:47.087275', 'step': 28822, 'epoch': 3}
{'type': 'loss', 'content': 0.00341962743550539, 'timestamp': '2025-10-02 01:02:47.099660', 'step': 28823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:47.173174', 'step': 28823, 'epoch': 3}
{'type': 'loss', 'content': 0.004657831508666277, 'timestamp': '2025-10-02 01:02:47.184213', 'step': 28824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:47.245233', 'step': 28824, 'epoch': 3}
{'type': 'loss', 'content': 0.020172255113720894, 'timestamp': '2025-10-02 01:02:47.257134', 'step': 28825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:47.315919', 'step': 28825, 'epoch': 3}
{'type': 'loss', 'content': 0.009165721014142036, 'timestamp': '2025-10-02 01:02:47.324352', 'step': 28826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:47.393886', 'step': 28826, 'epoch': 3}
{'type': 'loss', 'content': 0.01695612445473671, 'timestamp': '2025-10-02 01:02:47.397093', 'step': 28827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:47.458780', 'step': 28827, 'epoch': 3}
{'type': 'loss', 'content': 0.009398498572409153, 'timestamp': '2025-10-02 01:02:47.465570', 'step': 28828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:47.540135', 'step': 28828, 'epoch': 3}
{'type': 'loss', 'content': 0.08803359419107437, 'timestamp': '2025-10-02 01:02:47.547087', 'step': 28829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:47.606433', 'step': 28829, 'epoch': 3}
{'type': 'loss', 'content': 0.13547073304653168, 'timestamp': '2025-10-02 01:02:47.609470', 'step': 28830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:47.673599', 'step': 28830, 'epoch': 3}
{'type': 'loss', 'content': 0.05218752101063728, 'timestamp': '2025-10-02 01:02:47.676625', 'step': 28831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:47.749720', 'step': 28831, 'epoch': 3}
{'type': 'loss', 'content': 0.042587414383888245, 'timestamp': '2025-10-02 01:02:47.759876', 'step': 28832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:47.820617', 'step': 28832, 'epoch': 3}
{'type': 'loss', 'content': 0.026607075706124306, 'timestamp': '2025-10-02 01:02:47.827472', 'step': 28833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:47.890825', 'step': 28833, 'epoch': 3}
{'type': 'loss', 'content': 0.011046363972127438, 'timestamp': '2025-10-02 01:02:47.896325', 'step': 28834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:47.956384', 'step': 28834, 'epoch': 3}
{'type': 'loss', 'content': 0.025251764804124832, 'timestamp': '2025-10-02 01:02:47.965921', 'step': 28835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:48.026129', 'step': 28835, 'epoch': 3}
{'type': 'loss', 'content': 0.0018266926053911448, 'timestamp': '2025-10-02 01:02:48.035750', 'step': 28836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:48.109109', 'step': 28836, 'epoch': 3}
{'type': 'loss', 'content': 0.04527725651860237, 'timestamp': '2025-10-02 01:02:48.114668', 'step': 28837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:48.178239', 'step': 28837, 'epoch': 3}
{'type': 'loss', 'content': 0.03671010956168175, 'timestamp': '2025-10-02 01:02:48.188813', 'step': 28838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:02:48.263813', 'step': 28838, 'epoch': 3}
{'type': 'loss', 'content': 0.01711251586675644, 'timestamp': '2025-10-02 01:02:48.274396', 'step': 28839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:02:48.367486', 'step': 28839, 'epoch': 3}
{'type': 'loss', 'content': 0.036136429756879807, 'timestamp': '2025-10-02 01:02:48.381862', 'step': 28840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:48.451359', 'step': 28840, 'epoch': 3}
{'type': 'loss', 'content': 0.028224879875779152, 'timestamp': '2025-10-02 01:02:48.461552', 'step': 28841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:48.524533', 'step': 28841, 'epoch': 3}
{'type': 'loss', 'content': 0.020942378789186478, 'timestamp': '2025-10-02 01:02:48.527544', 'step': 28842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:48.595083', 'step': 28842, 'epoch': 3}
{'type': 'loss', 'content': 0.05168291926383972, 'timestamp': '2025-10-02 01:02:48.603979', 'step': 28843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:48.662909', 'step': 28843, 'epoch': 3}
{'type': 'loss', 'content': 0.031020138412714005, 'timestamp': '2025-10-02 01:02:48.670562', 'step': 28844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:48.728804', 'step': 28844, 'epoch': 3}
{'type': 'loss', 'content': 0.06571254879236221, 'timestamp': '2025-10-02 01:02:48.738903', 'step': 28845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:48.804524', 'step': 28845, 'epoch': 3}
{'type': 'loss', 'content': 0.004345969296991825, 'timestamp': '2025-10-02 01:02:48.814078', 'step': 28846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:48.876739', 'step': 28846, 'epoch': 3}
{'type': 'loss', 'content': 0.05372888967394829, 'timestamp': '2025-10-02 01:02:48.886044', 'step': 28847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:48.960010', 'step': 28847, 'epoch': 3}
{'type': 'loss', 'content': 0.03796693682670593, 'timestamp': '2025-10-02 01:02:48.966484', 'step': 28848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:49.029012', 'step': 28848, 'epoch': 3}
{'type': 'loss', 'content': 0.025505080819129944, 'timestamp': '2025-10-02 01:02:49.036275', 'step': 28849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:49.098720', 'step': 28849, 'epoch': 3}
{'type': 'loss', 'content': 0.02547585219144821, 'timestamp': '2025-10-02 01:02:49.105698', 'step': 28850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:02:49.176060', 'step': 28850, 'epoch': 3}
{'type': 'loss', 'content': 0.06839949637651443, 'timestamp': '2025-10-02 01:02:49.186681', 'step': 28851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:49.254856', 'step': 28851, 'epoch': 3}
{'type': 'loss', 'content': 0.04077570512890816, 'timestamp': '2025-10-02 01:02:49.261524', 'step': 28852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:49.317608', 'step': 28852, 'epoch': 3}
{'type': 'loss', 'content': 0.03115926682949066, 'timestamp': '2025-10-02 01:02:49.325672', 'step': 28853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:49.382627', 'step': 28853, 'epoch': 3}
{'type': 'loss', 'content': 0.03838077932596207, 'timestamp': '2025-10-02 01:02:49.390336', 'step': 28854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:49.447449', 'step': 28854, 'epoch': 3}
{'type': 'loss', 'content': 0.11216919124126434, 'timestamp': '2025-10-02 01:02:49.454531', 'step': 28855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:49.521698', 'step': 28855, 'epoch': 3}
{'type': 'loss', 'content': 0.01114183384925127, 'timestamp': '2025-10-02 01:02:49.536085', 'step': 28856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:49.593831', 'step': 28856, 'epoch': 3}
{'type': 'loss', 'content': 0.016883566975593567, 'timestamp': '2025-10-02 01:02:49.597183', 'step': 28857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:49.660314', 'step': 28857, 'epoch': 3}
{'type': 'loss', 'content': 0.058531504124403, 'timestamp': '2025-10-02 01:02:49.663608', 'step': 28858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:49.723222', 'step': 28858, 'epoch': 3}
{'type': 'loss', 'content': 0.06110607460141182, 'timestamp': '2025-10-02 01:02:49.727818', 'step': 28859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:49.788440', 'step': 28859, 'epoch': 3}
{'type': 'loss', 'content': 0.05642535537481308, 'timestamp': '2025-10-02 01:02:49.796642', 'step': 28860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:49.854449', 'step': 28860, 'epoch': 3}
{'type': 'loss', 'content': 0.09312517940998077, 'timestamp': '2025-10-02 01:02:49.861735', 'step': 28861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:49.918028', 'step': 28861, 'epoch': 3}
{'type': 'loss', 'content': 0.06987045705318451, 'timestamp': '2025-10-02 01:02:49.931079', 'step': 28862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:50.015355', 'step': 28862, 'epoch': 3}
{'type': 'loss', 'content': 0.01634359359741211, 'timestamp': '2025-10-02 01:02:50.020517', 'step': 28863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:50.081455', 'step': 28863, 'epoch': 3}
{'type': 'loss', 'content': 0.05041905865073204, 'timestamp': '2025-10-02 01:02:50.088164', 'step': 28864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:50.144951', 'step': 28864, 'epoch': 3}
{'type': 'loss', 'content': 0.02265818603336811, 'timestamp': '2025-10-02 01:02:50.150601', 'step': 28865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:50.206851', 'step': 28865, 'epoch': 3}
{'type': 'loss', 'content': 0.037281863391399384, 'timestamp': '2025-10-02 01:02:50.209528', 'step': 28866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:50.268188', 'step': 28866, 'epoch': 3}
{'type': 'loss', 'content': 0.01763395592570305, 'timestamp': '2025-10-02 01:02:50.270942', 'step': 28867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:50.327272', 'step': 28867, 'epoch': 3}
{'type': 'loss', 'content': 0.06414438039064407, 'timestamp': '2025-10-02 01:02:50.333394', 'step': 28868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:50.393337', 'step': 28868, 'epoch': 3}
{'type': 'loss', 'content': 0.017993124201893806, 'timestamp': '2025-10-02 01:02:50.398496', 'step': 28869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:50.458183', 'step': 28869, 'epoch': 3}
{'type': 'loss', 'content': 0.033638957887887955, 'timestamp': '2025-10-02 01:02:50.460878', 'step': 28870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:50.517705', 'step': 28870, 'epoch': 3}
{'type': 'loss', 'content': 0.029720894992351532, 'timestamp': '2025-10-02 01:02:50.523213', 'step': 28871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:50.578900', 'step': 28871, 'epoch': 3}
{'type': 'loss', 'content': 0.0002985060855280608, 'timestamp': '2025-10-02 01:02:50.585511', 'step': 28872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:50.641032', 'step': 28872, 'epoch': 3}
{'type': 'loss', 'content': 0.031358808279037476, 'timestamp': '2025-10-02 01:02:50.643654', 'step': 28873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:50.700850', 'step': 28873, 'epoch': 3}
{'type': 'loss', 'content': 0.0186921376734972, 'timestamp': '2025-10-02 01:02:50.710359', 'step': 28874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:50.779443', 'step': 28874, 'epoch': 3}
{'type': 'loss', 'content': 0.03143971040844917, 'timestamp': '2025-10-02 01:02:50.789890', 'step': 28875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:02:50.862597', 'step': 28875, 'epoch': 3}
{'type': 'loss', 'content': 0.04002108797430992, 'timestamp': '2025-10-02 01:02:50.875917', 'step': 28876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:50.931651', 'step': 28876, 'epoch': 3}
{'type': 'loss', 'content': 0.07372701168060303, 'timestamp': '2025-10-02 01:02:50.934829', 'step': 28877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:50.997038', 'step': 28877, 'epoch': 3}
{'type': 'loss', 'content': 0.03556142374873161, 'timestamp': '2025-10-02 01:02:51.000418', 'step': 28878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:51.061565', 'step': 28878, 'epoch': 3}
{'type': 'loss', 'content': 0.08840543031692505, 'timestamp': '2025-10-02 01:02:51.065022', 'step': 28879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:51.121476', 'step': 28879, 'epoch': 3}
{'type': 'loss', 'content': 0.0016493370058014989, 'timestamp': '2025-10-02 01:02:51.128177', 'step': 28880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:51.190021', 'step': 28880, 'epoch': 3}
{'type': 'loss', 'content': 0.03195379301905632, 'timestamp': '2025-10-02 01:02:51.193018', 'step': 28881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:51.248991', 'step': 28881, 'epoch': 3}
{'type': 'loss', 'content': 0.0371481254696846, 'timestamp': '2025-10-02 01:02:51.257262', 'step': 28882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:51.327360', 'step': 28882, 'epoch': 3}
{'type': 'loss', 'content': 0.029029782861471176, 'timestamp': '2025-10-02 01:02:51.336362', 'step': 28883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:02:51.393421', 'step': 28883, 'epoch': 3}
{'type': 'loss', 'content': 0.07870291918516159, 'timestamp': '2025-10-02 01:02:51.401236', 'step': 28884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:51.468693', 'step': 28884, 'epoch': 3}
{'type': 'loss', 'content': 0.03293326124548912, 'timestamp': '2025-10-02 01:02:51.472883', 'step': 28885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:51.535730', 'step': 28885, 'epoch': 3}
{'type': 'loss', 'content': 0.04817729815840721, 'timestamp': '2025-10-02 01:02:51.542693', 'step': 28886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:51.613157', 'step': 28886, 'epoch': 3}
{'type': 'loss', 'content': 0.003093467326834798, 'timestamp': '2025-10-02 01:02:51.621939', 'step': 28887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:51.684430', 'step': 28887, 'epoch': 3}
{'type': 'loss', 'content': 0.04567195102572441, 'timestamp': '2025-10-02 01:02:51.695577', 'step': 28888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:51.755904', 'step': 28888, 'epoch': 3}
{'type': 'loss', 'content': 0.12015869468450546, 'timestamp': '2025-10-02 01:02:51.759600', 'step': 28889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:51.820800', 'step': 28889, 'epoch': 3}
{'type': 'loss', 'content': 0.04999646916985512, 'timestamp': '2025-10-02 01:02:51.824168', 'step': 28890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:51.894950', 'step': 28890, 'epoch': 3}
{'type': 'loss', 'content': 0.008967620320618153, 'timestamp': '2025-10-02 01:02:51.904102', 'step': 28891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:51.975543', 'step': 28891, 'epoch': 3}
{'type': 'loss', 'content': 0.03154454752802849, 'timestamp': '2025-10-02 01:02:51.982595', 'step': 28892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:52.052151', 'step': 28892, 'epoch': 3}
{'type': 'loss', 'content': 0.0004857383028138429, 'timestamp': '2025-10-02 01:02:52.062404', 'step': 28893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:52.120387', 'step': 28893, 'epoch': 3}
{'type': 'loss', 'content': 0.016834814101457596, 'timestamp': '2025-10-02 01:02:52.123622', 'step': 28894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:52.187788', 'step': 28894, 'epoch': 3}
{'type': 'loss', 'content': 0.044968053698539734, 'timestamp': '2025-10-02 01:02:52.196423', 'step': 28895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:52.266098', 'step': 28895, 'epoch': 3}
{'type': 'loss', 'content': 0.001117577077820897, 'timestamp': '2025-10-02 01:02:52.272875', 'step': 28896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:52.333179', 'step': 28896, 'epoch': 3}
{'type': 'loss', 'content': 0.02784113399684429, 'timestamp': '2025-10-02 01:02:52.336724', 'step': 28897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:02:52.412074', 'step': 28897, 'epoch': 3}
{'type': 'loss', 'content': 0.018793907016515732, 'timestamp': '2025-10-02 01:02:52.422854', 'step': 28898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:52.486716', 'step': 28898, 'epoch': 3}
{'type': 'loss', 'content': 0.09177585691213608, 'timestamp': '2025-10-02 01:02:52.491364', 'step': 28899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:52.557280', 'step': 28899, 'epoch': 3}
{'type': 'loss', 'content': 0.0534675158560276, 'timestamp': '2025-10-02 01:02:52.564321', 'step': 28900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:52.626366', 'step': 28900, 'epoch': 3}
{'type': 'loss', 'content': 0.0404047966003418, 'timestamp': '2025-10-02 01:02:52.632882', 'step': 28901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:52.692518', 'step': 28901, 'epoch': 3}
{'type': 'loss', 'content': 0.01614922098815441, 'timestamp': '2025-10-02 01:02:52.701816', 'step': 28902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:02:52.762843', 'step': 28902, 'epoch': 3}
{'type': 'loss', 'content': 0.07699795067310333, 'timestamp': '2025-10-02 01:02:52.766273', 'step': 28903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:52.827830', 'step': 28903, 'epoch': 3}
{'type': 'loss', 'content': 0.05024348571896553, 'timestamp': '2025-10-02 01:02:52.835321', 'step': 28904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:52.897604', 'step': 28904, 'epoch': 3}
{'type': 'loss', 'content': 0.04361903294920921, 'timestamp': '2025-10-02 01:02:52.908873', 'step': 28905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:52.964189', 'step': 28905, 'epoch': 3}
{'type': 'loss', 'content': 0.0008697722223587334, 'timestamp': '2025-10-02 01:02:52.966740', 'step': 28906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:53.022164', 'step': 28906, 'epoch': 3}
{'type': 'loss', 'content': 0.010569159872829914, 'timestamp': '2025-10-02 01:02:53.027877', 'step': 28907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:53.083784', 'step': 28907, 'epoch': 3}
{'type': 'loss', 'content': 0.0831155776977539, 'timestamp': '2025-10-02 01:02:53.090127', 'step': 28908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:53.144622', 'step': 28908, 'epoch': 3}
{'type': 'loss', 'content': 0.06489250808954239, 'timestamp': '2025-10-02 01:02:53.146915', 'step': 28909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:53.201303', 'step': 28909, 'epoch': 3}
{'type': 'loss', 'content': 0.01906711794435978, 'timestamp': '2025-10-02 01:02:53.208645', 'step': 28910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:53.263546', 'step': 28910, 'epoch': 3}
{'type': 'loss', 'content': 0.043146561831235886, 'timestamp': '2025-10-02 01:02:53.265926', 'step': 28911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:53.322881', 'step': 28911, 'epoch': 3}
{'type': 'loss', 'content': 0.03995559737086296, 'timestamp': '2025-10-02 01:02:53.329210', 'step': 28912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:53.383347', 'step': 28912, 'epoch': 3}
{'type': 'loss', 'content': 0.07679467648267746, 'timestamp': '2025-10-02 01:02:53.385833', 'step': 28913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:53.440138', 'step': 28913, 'epoch': 3}
{'type': 'loss', 'content': 0.028392579406499863, 'timestamp': '2025-10-02 01:02:53.442568', 'step': 28914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:53.497126', 'step': 28914, 'epoch': 3}
{'type': 'loss', 'content': 0.029648441821336746, 'timestamp': '2025-10-02 01:02:53.499930', 'step': 28915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:53.554959', 'step': 28915, 'epoch': 3}
{'type': 'loss', 'content': 0.04527605324983597, 'timestamp': '2025-10-02 01:02:53.561174', 'step': 28916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:53.616571', 'step': 28916, 'epoch': 3}
{'type': 'loss', 'content': 0.01892228052020073, 'timestamp': '2025-10-02 01:02:53.623636', 'step': 28917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:53.680708', 'step': 28917, 'epoch': 3}
{'type': 'loss', 'content': 0.017669543623924255, 'timestamp': '2025-10-02 01:02:53.683238', 'step': 28918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:53.739134', 'step': 28918, 'epoch': 3}
{'type': 'loss', 'content': 0.07696946710348129, 'timestamp': '2025-10-02 01:02:53.741757', 'step': 28919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:53.795773', 'step': 28919, 'epoch': 3}
{'type': 'loss', 'content': 0.015977244824171066, 'timestamp': '2025-10-02 01:02:53.802469', 'step': 28920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:53.856739', 'step': 28920, 'epoch': 3}
{'type': 'loss', 'content': 0.017617231234908104, 'timestamp': '2025-10-02 01:02:53.866976', 'step': 28921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:53.922063', 'step': 28921, 'epoch': 3}
{'type': 'loss', 'content': 0.047875020653009415, 'timestamp': '2025-10-02 01:02:53.927460', 'step': 28922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:53.982247', 'step': 28922, 'epoch': 3}
{'type': 'loss', 'content': 0.025686152279376984, 'timestamp': '2025-10-02 01:02:53.984845', 'step': 28923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:54.039636', 'step': 28923, 'epoch': 3}
{'type': 'loss', 'content': 0.1062493696808815, 'timestamp': '2025-10-02 01:02:54.045704', 'step': 28924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:54.104198', 'step': 28924, 'epoch': 3}
{'type': 'loss', 'content': 0.0015056461561471224, 'timestamp': '2025-10-02 01:02:54.115127', 'step': 28925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:54.170587', 'step': 28925, 'epoch': 3}
{'type': 'loss', 'content': 0.02114640735089779, 'timestamp': '2025-10-02 01:02:54.172897', 'step': 28926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:54.227259', 'step': 28926, 'epoch': 3}
{'type': 'loss', 'content': 0.06582129746675491, 'timestamp': '2025-10-02 01:02:54.229459', 'step': 28927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:54.283895', 'step': 28927, 'epoch': 3}
{'type': 'loss', 'content': 0.05799220874905586, 'timestamp': '2025-10-02 01:02:54.290622', 'step': 28928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:54.344439', 'step': 28928, 'epoch': 3}
{'type': 'loss', 'content': 0.060270197689533234, 'timestamp': '2025-10-02 01:02:54.347399', 'step': 28929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:54.403518', 'step': 28929, 'epoch': 3}
{'type': 'loss', 'content': 0.06881117075681686, 'timestamp': '2025-10-02 01:02:54.409196', 'step': 28930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:54.464438', 'step': 28930, 'epoch': 3}
{'type': 'loss', 'content': 0.018032999709248543, 'timestamp': '2025-10-02 01:02:54.470201', 'step': 28931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:54.524763', 'step': 28931, 'epoch': 3}
{'type': 'loss', 'content': 0.02421092800796032, 'timestamp': '2025-10-02 01:02:54.531161', 'step': 28932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:54.584671', 'step': 28932, 'epoch': 3}
{'type': 'loss', 'content': 0.07831071317195892, 'timestamp': '2025-10-02 01:02:54.589946', 'step': 28933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:54.651166', 'step': 28933, 'epoch': 3}
{'type': 'loss', 'content': 0.03469008952379227, 'timestamp': '2025-10-02 01:02:54.654733', 'step': 28934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:54.710325', 'step': 28934, 'epoch': 3}
{'type': 'loss', 'content': 0.039515767246484756, 'timestamp': '2025-10-02 01:02:54.712922', 'step': 28935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:54.772437', 'step': 28935, 'epoch': 3}
{'type': 'loss', 'content': 0.09262410551309586, 'timestamp': '2025-10-02 01:02:54.778577', 'step': 28936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:54.832984', 'step': 28936, 'epoch': 3}
{'type': 'loss', 'content': 0.006674816831946373, 'timestamp': '2025-10-02 01:02:54.842351', 'step': 28937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:54.901641', 'step': 28937, 'epoch': 3}
{'type': 'loss', 'content': 0.01981806382536888, 'timestamp': '2025-10-02 01:02:54.911114', 'step': 28938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:54.966451', 'step': 28938, 'epoch': 3}
{'type': 'loss', 'content': 0.05764051526784897, 'timestamp': '2025-10-02 01:02:54.969074', 'step': 28939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:55.023863', 'step': 28939, 'epoch': 3}
{'type': 'loss', 'content': 0.021475881338119507, 'timestamp': '2025-10-02 01:02:55.031314', 'step': 28940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:02:55.085677', 'step': 28940, 'epoch': 3}
{'type': 'loss', 'content': 0.013904253020882607, 'timestamp': '2025-10-02 01:02:55.095964', 'step': 28941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:55.157775', 'step': 28941, 'epoch': 3}
{'type': 'loss', 'content': 0.07970774173736572, 'timestamp': '2025-10-02 01:02:55.160454', 'step': 28942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:55.214656', 'step': 28942, 'epoch': 3}
{'type': 'loss', 'content': 0.10209387540817261, 'timestamp': '2025-10-02 01:02:55.217474', 'step': 28943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:55.272474', 'step': 28943, 'epoch': 3}
{'type': 'loss', 'content': 0.053613174706697464, 'timestamp': '2025-10-02 01:02:55.280299', 'step': 28944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:55.336538', 'step': 28944, 'epoch': 3}
{'type': 'loss', 'content': 0.10548423230648041, 'timestamp': '2025-10-02 01:02:55.343175', 'step': 28945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:55.401095', 'step': 28945, 'epoch': 3}
{'type': 'loss', 'content': 0.09718635678291321, 'timestamp': '2025-10-02 01:02:55.403754', 'step': 28946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:55.459320', 'step': 28946, 'epoch': 3}
{'type': 'loss', 'content': 0.03561371564865112, 'timestamp': '2025-10-02 01:02:55.461761', 'step': 28947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:55.516733', 'step': 28947, 'epoch': 3}
{'type': 'loss', 'content': 0.01899063028395176, 'timestamp': '2025-10-02 01:02:55.522873', 'step': 28948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:55.576924', 'step': 28948, 'epoch': 3}
{'type': 'loss', 'content': 0.011789803393185139, 'timestamp': '2025-10-02 01:02:55.584148', 'step': 28949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:55.639681', 'step': 28949, 'epoch': 3}
{'type': 'loss', 'content': 0.06570982187986374, 'timestamp': '2025-10-02 01:02:55.641995', 'step': 28950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:55.696832', 'step': 28950, 'epoch': 3}
{'type': 'loss', 'content': 0.060043711215257645, 'timestamp': '2025-10-02 01:02:55.699695', 'step': 28951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:55.754359', 'step': 28951, 'epoch': 3}
{'type': 'loss', 'content': 0.011584176681935787, 'timestamp': '2025-10-02 01:02:55.760230', 'step': 28952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:55.814044', 'step': 28952, 'epoch': 3}
{'type': 'loss', 'content': 0.0015291053568944335, 'timestamp': '2025-10-02 01:02:55.821444', 'step': 28953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:55.876634', 'step': 28953, 'epoch': 3}
{'type': 'loss', 'content': 0.010525539517402649, 'timestamp': '2025-10-02 01:02:55.885849', 'step': 28954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:55.939761', 'step': 28954, 'epoch': 3}
{'type': 'loss', 'content': 0.12261635065078735, 'timestamp': '2025-10-02 01:02:55.942328', 'step': 28955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:55.997947', 'step': 28955, 'epoch': 3}
{'type': 'loss', 'content': 0.04095086082816124, 'timestamp': '2025-10-02 01:02:56.004332', 'step': 28956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:56.057918', 'step': 28956, 'epoch': 3}
{'type': 'loss', 'content': 0.05199217051267624, 'timestamp': '2025-10-02 01:02:56.061363', 'step': 28957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:56.118929', 'step': 28957, 'epoch': 3}
{'type': 'loss', 'content': 0.020155390724539757, 'timestamp': '2025-10-02 01:02:56.128239', 'step': 28958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:56.183556', 'step': 28958, 'epoch': 3}
{'type': 'loss', 'content': 0.04014265164732933, 'timestamp': '2025-10-02 01:02:56.186231', 'step': 28959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:56.242245', 'step': 28959, 'epoch': 3}
{'type': 'loss', 'content': 0.07206552475690842, 'timestamp': '2025-10-02 01:02:56.252196', 'step': 28960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:56.307007', 'step': 28960, 'epoch': 3}
{'type': 'loss', 'content': 0.054225169122219086, 'timestamp': '2025-10-02 01:02:56.312770', 'step': 28961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:56.373172', 'step': 28961, 'epoch': 3}
{'type': 'loss', 'content': 0.08642350137233734, 'timestamp': '2025-10-02 01:02:56.383316', 'step': 28962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:56.439382', 'step': 28962, 'epoch': 3}
{'type': 'loss', 'content': 0.05450386554002762, 'timestamp': '2025-10-02 01:02:56.442803', 'step': 28963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:56.498794', 'step': 28963, 'epoch': 3}
{'type': 'loss', 'content': 0.06841790676116943, 'timestamp': '2025-10-02 01:02:56.506595', 'step': 28964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:56.560745', 'step': 28964, 'epoch': 3}
{'type': 'loss', 'content': 0.07860840111970901, 'timestamp': '2025-10-02 01:02:56.563450', 'step': 28965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:56.618155', 'step': 28965, 'epoch': 3}
{'type': 'loss', 'content': 0.05850713700056076, 'timestamp': '2025-10-02 01:02:56.621536', 'step': 28966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:56.677299', 'step': 28966, 'epoch': 3}
{'type': 'loss', 'content': 0.03695061057806015, 'timestamp': '2025-10-02 01:02:56.686543', 'step': 28967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:56.745848', 'step': 28967, 'epoch': 3}
{'type': 'loss', 'content': 0.08613613992929459, 'timestamp': '2025-10-02 01:02:56.752291', 'step': 28968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:56.809194', 'step': 28968, 'epoch': 3}
{'type': 'loss', 'content': 0.06813026964664459, 'timestamp': '2025-10-02 01:02:56.812404', 'step': 28969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:56.873506', 'step': 28969, 'epoch': 3}
{'type': 'loss', 'content': 0.03758705034852028, 'timestamp': '2025-10-02 01:02:56.883649', 'step': 28970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:02:56.940958', 'step': 28970, 'epoch': 3}
{'type': 'loss', 'content': 0.0720834955573082, 'timestamp': '2025-10-02 01:02:56.944887', 'step': 28971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:57.001718', 'step': 28971, 'epoch': 3}
{'type': 'loss', 'content': 0.05040694400668144, 'timestamp': '2025-10-02 01:02:57.009716', 'step': 28972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:02:57.066757', 'step': 28972, 'epoch': 3}
{'type': 'loss', 'content': 0.03874414041638374, 'timestamp': '2025-10-02 01:02:57.069933', 'step': 28973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:57.127240', 'step': 28973, 'epoch': 3}
{'type': 'loss', 'content': 0.048051416873931885, 'timestamp': '2025-10-02 01:02:57.131285', 'step': 28974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:57.188705', 'step': 28974, 'epoch': 3}
{'type': 'loss', 'content': 0.01971346139907837, 'timestamp': '2025-10-02 01:02:57.194148', 'step': 28975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:57.251795', 'step': 28975, 'epoch': 3}
{'type': 'loss', 'content': 0.019313832744956017, 'timestamp': '2025-10-02 01:02:57.261743', 'step': 28976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:57.316121', 'step': 28976, 'epoch': 3}
{'type': 'loss', 'content': 0.06860914081335068, 'timestamp': '2025-10-02 01:02:57.318992', 'step': 28977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:57.381691', 'step': 28977, 'epoch': 3}
{'type': 'loss', 'content': 0.03933003544807434, 'timestamp': '2025-10-02 01:02:57.391874', 'step': 28978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:57.446859', 'step': 28978, 'epoch': 3}
{'type': 'loss', 'content': 0.032325293868780136, 'timestamp': '2025-10-02 01:02:57.449493', 'step': 28979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:57.504870', 'step': 28979, 'epoch': 3}
{'type': 'loss', 'content': 0.06638282537460327, 'timestamp': '2025-10-02 01:02:57.511064', 'step': 28980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:57.566268', 'step': 28980, 'epoch': 3}
{'type': 'loss', 'content': 0.036146145313978195, 'timestamp': '2025-10-02 01:02:57.569137', 'step': 28981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:02:57.628266', 'step': 28981, 'epoch': 3}
{'type': 'loss', 'content': 0.018541600555181503, 'timestamp': '2025-10-02 01:02:57.630713', 'step': 28982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:57.686801', 'step': 28982, 'epoch': 3}
{'type': 'loss', 'content': 0.04782134294509888, 'timestamp': '2025-10-02 01:02:57.689708', 'step': 28983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:57.745200', 'step': 28983, 'epoch': 3}
{'type': 'loss', 'content': 0.03595161810517311, 'timestamp': '2025-10-02 01:02:57.753263', 'step': 28984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:02:57.807126', 'step': 28984, 'epoch': 3}
{'type': 'loss', 'content': 0.06048091500997543, 'timestamp': '2025-10-02 01:02:57.816445', 'step': 28985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:02:57.870695', 'step': 28985, 'epoch': 3}
{'type': 'loss', 'content': 0.018273767083883286, 'timestamp': '2025-10-02 01:02:57.873398', 'step': 28986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:02:57.935232', 'step': 28986, 'epoch': 3}
{'type': 'loss', 'content': 0.020046990364789963, 'timestamp': '2025-10-02 01:02:57.945712', 'step': 28987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:02:58.000535', 'step': 28987, 'epoch': 3}
{'type': 'loss', 'content': 0.08182181417942047, 'timestamp': '2025-10-02 01:02:58.007228', 'step': 28988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:58.061383', 'step': 28988, 'epoch': 3}
{'type': 'loss', 'content': 0.06448198854923248, 'timestamp': '2025-10-02 01:02:58.063887', 'step': 28989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:58.117944', 'step': 28989, 'epoch': 3}
{'type': 'loss', 'content': 0.03657013177871704, 'timestamp': '2025-10-02 01:02:58.120631', 'step': 28990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:58.174762', 'step': 28990, 'epoch': 3}
{'type': 'loss', 'content': 0.1061452329158783, 'timestamp': '2025-10-02 01:02:58.177039', 'step': 28991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:58.231709', 'step': 28991, 'epoch': 3}
{'type': 'loss', 'content': 0.025866840034723282, 'timestamp': '2025-10-02 01:02:58.237799', 'step': 28992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:58.291295', 'step': 28992, 'epoch': 3}
{'type': 'loss', 'content': 0.05826275050640106, 'timestamp': '2025-10-02 01:02:58.294216', 'step': 28993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:58.349317', 'step': 28993, 'epoch': 3}
{'type': 'loss', 'content': 0.0776389092206955, 'timestamp': '2025-10-02 01:02:58.351802', 'step': 28994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:58.406085', 'step': 28994, 'epoch': 3}
{'type': 'loss', 'content': 0.04435395821928978, 'timestamp': '2025-10-02 01:02:58.413403', 'step': 28995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:58.468031', 'step': 28995, 'epoch': 3}
{'type': 'loss', 'content': 0.02445807307958603, 'timestamp': '2025-10-02 01:02:58.474236', 'step': 28996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:02:58.528286', 'step': 28996, 'epoch': 3}
{'type': 'loss', 'content': 0.06408312916755676, 'timestamp': '2025-10-02 01:02:58.533660', 'step': 28997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:02:58.589535', 'step': 28997, 'epoch': 3}
{'type': 'loss', 'content': 0.04220565780997276, 'timestamp': '2025-10-02 01:02:58.596675', 'step': 28998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:02:58.652462', 'step': 28998, 'epoch': 3}
{'type': 'loss', 'content': 0.06132801994681358, 'timestamp': '2025-10-02 01:02:58.654835', 'step': 28999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:58.709788', 'step': 28999, 'epoch': 3}
{'type': 'loss', 'content': 0.04488486796617508, 'timestamp': '2025-10-02 01:02:58.716225', 'step': 29000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 29000', 'timestamp': '2025-10-02 01:02:59.133093', 'step': 29000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:02:59.190818', 'step': 29000, 'epoch': 3}
{'type': 'loss', 'content': 0.09514632821083069, 'timestamp': '2025-10-02 01:02:59.196060', 'step': 29001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:59.268408', 'step': 29001, 'epoch': 3}
{'type': 'loss', 'content': 0.014762277714908123, 'timestamp': '2025-10-02 01:02:59.273130', 'step': 29002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:59.333419', 'step': 29002, 'epoch': 3}
{'type': 'loss', 'content': 0.019047649577260017, 'timestamp': '2025-10-02 01:02:59.337711', 'step': 29003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:02:59.408206', 'step': 29003, 'epoch': 3}
{'type': 'loss', 'content': 0.09330170601606369, 'timestamp': '2025-10-02 01:02:59.415346', 'step': 29004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:02:59.470367', 'step': 29004, 'epoch': 3}
{'type': 'loss', 'content': 0.06005048006772995, 'timestamp': '2025-10-02 01:02:59.473369', 'step': 29005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:02:59.538443', 'step': 29005, 'epoch': 3}
{'type': 'loss', 'content': 0.012932954356074333, 'timestamp': '2025-10-02 01:02:59.548638', 'step': 29006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:02:59.606640', 'step': 29006, 'epoch': 3}
{'type': 'loss', 'content': 0.02564588561654091, 'timestamp': '2025-10-02 01:02:59.609659', 'step': 29007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:59.666733', 'step': 29007, 'epoch': 3}
{'type': 'loss', 'content': 0.049330249428749084, 'timestamp': '2025-10-02 01:02:59.673812', 'step': 29008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:02:59.744280', 'step': 29008, 'epoch': 3}
{'type': 'loss', 'content': 0.024739829823374748, 'timestamp': '2025-10-02 01:02:59.756064', 'step': 29009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:59.828115', 'step': 29009, 'epoch': 3}
{'type': 'loss', 'content': 0.05867268517613411, 'timestamp': '2025-10-02 01:02:59.831101', 'step': 29010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:02:59.895103', 'step': 29010, 'epoch': 3}
{'type': 'loss', 'content': 0.06738284975290298, 'timestamp': '2025-10-02 01:02:59.907791', 'step': 29011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:02:59.987877', 'step': 29011, 'epoch': 3}
{'type': 'loss', 'content': 0.0673590898513794, 'timestamp': '2025-10-02 01:02:59.997687', 'step': 29012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:00.056271', 'step': 29012, 'epoch': 3}
{'type': 'loss', 'content': 0.06762057542800903, 'timestamp': '2025-10-02 01:03:00.063481', 'step': 29013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:00.125994', 'step': 29013, 'epoch': 3}
{'type': 'loss', 'content': 0.019623806700110435, 'timestamp': '2025-10-02 01:03:00.135254', 'step': 29014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:00.193284', 'step': 29014, 'epoch': 3}
{'type': 'loss', 'content': 0.05330050364136696, 'timestamp': '2025-10-02 01:03:00.196496', 'step': 29015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:00.255380', 'step': 29015, 'epoch': 3}
{'type': 'loss', 'content': 0.027065476402640343, 'timestamp': '2025-10-02 01:03:00.265344', 'step': 29016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:00.329743', 'step': 29016, 'epoch': 3}
{'type': 'loss', 'content': 0.01751813106238842, 'timestamp': '2025-10-02 01:03:00.335514', 'step': 29017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:00.404437', 'step': 29017, 'epoch': 3}
{'type': 'loss', 'content': 0.0655159056186676, 'timestamp': '2025-10-02 01:03:00.411702', 'step': 29018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:00.479354', 'step': 29018, 'epoch': 3}
{'type': 'loss', 'content': 0.10983193665742874, 'timestamp': '2025-10-02 01:03:00.483214', 'step': 29019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:00.542788', 'step': 29019, 'epoch': 3}
{'type': 'loss', 'content': 0.06681344658136368, 'timestamp': '2025-10-02 01:03:00.554205', 'step': 29020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:00.622268', 'step': 29020, 'epoch': 3}
{'type': 'loss', 'content': 0.07571788877248764, 'timestamp': '2025-10-02 01:03:00.629202', 'step': 29021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:00.698441', 'step': 29021, 'epoch': 3}
{'type': 'loss', 'content': 0.046463269740343094, 'timestamp': '2025-10-02 01:03:00.701293', 'step': 29022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:00.761789', 'step': 29022, 'epoch': 3}
{'type': 'loss', 'content': 0.04733410105109215, 'timestamp': '2025-10-02 01:03:00.764872', 'step': 29023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:03:00.844302', 'step': 29023, 'epoch': 3}
{'type': 'loss', 'content': 0.01050079520791769, 'timestamp': '2025-10-02 01:03:00.857099', 'step': 29024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:03:00.927918', 'step': 29024, 'epoch': 3}
{'type': 'loss', 'content': 0.004508606623858213, 'timestamp': '2025-10-02 01:03:00.939684', 'step': 29025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:01.002432', 'step': 29025, 'epoch': 3}
{'type': 'loss', 'content': 0.08359915763139725, 'timestamp': '2025-10-02 01:03:01.005501', 'step': 29026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:01.072284', 'step': 29026, 'epoch': 3}
{'type': 'loss', 'content': 0.12323912233114243, 'timestamp': '2025-10-02 01:03:01.075895', 'step': 29027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:03:01.144626', 'step': 29027, 'epoch': 3}
{'type': 'loss', 'content': 0.02376423589885235, 'timestamp': '2025-10-02 01:03:01.155811', 'step': 29028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:01.216422', 'step': 29028, 'epoch': 3}
{'type': 'loss', 'content': 0.016792526468634605, 'timestamp': '2025-10-02 01:03:01.220053', 'step': 29029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:01.280989', 'step': 29029, 'epoch': 3}
{'type': 'loss', 'content': 0.04270993918180466, 'timestamp': '2025-10-02 01:03:01.284776', 'step': 29030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:01.350037', 'step': 29030, 'epoch': 3}
{'type': 'loss', 'content': 0.013541032560169697, 'timestamp': '2025-10-02 01:03:01.359571', 'step': 29031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:01.417808', 'step': 29031, 'epoch': 3}
{'type': 'loss', 'content': 0.01141657680273056, 'timestamp': '2025-10-02 01:03:01.429862', 'step': 29032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:01.486742', 'step': 29032, 'epoch': 3}
{'type': 'loss', 'content': 0.04725252091884613, 'timestamp': '2025-10-02 01:03:01.489647', 'step': 29033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:01.547216', 'step': 29033, 'epoch': 3}
{'type': 'loss', 'content': 0.0880092978477478, 'timestamp': '2025-10-02 01:03:01.552944', 'step': 29034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:01.623989', 'step': 29034, 'epoch': 3}
{'type': 'loss', 'content': 0.03586795926094055, 'timestamp': '2025-10-02 01:03:01.631481', 'step': 29035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:01.696086', 'step': 29035, 'epoch': 3}
{'type': 'loss', 'content': 0.09233344346284866, 'timestamp': '2025-10-02 01:03:01.707548', 'step': 29036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:01.772691', 'step': 29036, 'epoch': 3}
{'type': 'loss', 'content': 0.05320568382740021, 'timestamp': '2025-10-02 01:03:01.776740', 'step': 29037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:01.839544', 'step': 29037, 'epoch': 3}
{'type': 'loss', 'content': 0.051239315420389175, 'timestamp': '2025-10-02 01:03:01.844282', 'step': 29038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:01.901815', 'step': 29038, 'epoch': 3}
{'type': 'loss', 'content': 0.0385022833943367, 'timestamp': '2025-10-02 01:03:01.904894', 'step': 29039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:01.966421', 'step': 29039, 'epoch': 3}
{'type': 'loss', 'content': 0.003031305503100157, 'timestamp': '2025-10-02 01:03:01.977364', 'step': 29040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:02.032671', 'step': 29040, 'epoch': 3}
{'type': 'loss', 'content': 0.08263678848743439, 'timestamp': '2025-10-02 01:03:02.036830', 'step': 29041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:02.094095', 'step': 29041, 'epoch': 3}
{'type': 'loss', 'content': 0.05127276852726936, 'timestamp': '2025-10-02 01:03:02.103191', 'step': 29042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:02.163636', 'step': 29042, 'epoch': 3}
{'type': 'loss', 'content': 0.034598276019096375, 'timestamp': '2025-10-02 01:03:02.171038', 'step': 29043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:02.231338', 'step': 29043, 'epoch': 3}
{'type': 'loss', 'content': 0.017136503010988235, 'timestamp': '2025-10-02 01:03:02.241567', 'step': 29044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:02.298934', 'step': 29044, 'epoch': 3}
{'type': 'loss', 'content': 0.022971780970692635, 'timestamp': '2025-10-02 01:03:02.308258', 'step': 29045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:02.374532', 'step': 29045, 'epoch': 3}
{'type': 'loss', 'content': 0.099898561835289, 'timestamp': '2025-10-02 01:03:02.378256', 'step': 29046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:02.434897', 'step': 29046, 'epoch': 3}
{'type': 'loss', 'content': 0.02138610929250717, 'timestamp': '2025-10-02 01:03:02.439027', 'step': 29047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:02.496831', 'step': 29047, 'epoch': 3}
{'type': 'loss', 'content': 0.023861592635512352, 'timestamp': '2025-10-02 01:03:02.503617', 'step': 29048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:02.560713', 'step': 29048, 'epoch': 3}
{'type': 'loss', 'content': 0.05608237907290459, 'timestamp': '2025-10-02 01:03:02.563511', 'step': 29049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:02.625832', 'step': 29049, 'epoch': 3}
{'type': 'loss', 'content': 0.025864459574222565, 'timestamp': '2025-10-02 01:03:02.635972', 'step': 29050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:02.695516', 'step': 29050, 'epoch': 3}
{'type': 'loss', 'content': 0.03198579326272011, 'timestamp': '2025-10-02 01:03:02.705049', 'step': 29051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:02.762692', 'step': 29051, 'epoch': 3}
{'type': 'loss', 'content': 0.011170032434165478, 'timestamp': '2025-10-02 01:03:02.769942', 'step': 29052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:02.825528', 'step': 29052, 'epoch': 3}
{'type': 'loss', 'content': 0.06803596764802933, 'timestamp': '2025-10-02 01:03:02.828783', 'step': 29053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:02.885470', 'step': 29053, 'epoch': 3}
{'type': 'loss', 'content': 0.04257084056735039, 'timestamp': '2025-10-02 01:03:02.888897', 'step': 29054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:02.946873', 'step': 29054, 'epoch': 3}
{'type': 'loss', 'content': 0.007336612790822983, 'timestamp': '2025-10-02 01:03:02.954044', 'step': 29055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:03.016176', 'step': 29055, 'epoch': 3}
{'type': 'loss', 'content': 0.0366535410284996, 'timestamp': '2025-10-02 01:03:03.022834', 'step': 29056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:03.077877', 'step': 29056, 'epoch': 3}
{'type': 'loss', 'content': 0.04677208513021469, 'timestamp': '2025-10-02 01:03:03.081918', 'step': 29057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:03:03.158196', 'step': 29057, 'epoch': 3}
{'type': 'loss', 'content': 0.02595571056008339, 'timestamp': '2025-10-02 01:03:03.171339', 'step': 29058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:03.228040', 'step': 29058, 'epoch': 3}
{'type': 'loss', 'content': 0.08652341365814209, 'timestamp': '2025-10-02 01:03:03.231265', 'step': 29059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:03.287829', 'step': 29059, 'epoch': 3}
{'type': 'loss', 'content': 0.07460933923721313, 'timestamp': '2025-10-02 01:03:03.295153', 'step': 29060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:03.355842', 'step': 29060, 'epoch': 3}
{'type': 'loss', 'content': 0.07454144209623337, 'timestamp': '2025-10-02 01:03:03.359124', 'step': 29061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:03.420021', 'step': 29061, 'epoch': 3}
{'type': 'loss', 'content': 0.008642669767141342, 'timestamp': '2025-10-02 01:03:03.423751', 'step': 29062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:03.480527', 'step': 29062, 'epoch': 3}
{'type': 'loss', 'content': 0.028640886768698692, 'timestamp': '2025-10-02 01:03:03.484289', 'step': 29063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:03.545696', 'step': 29063, 'epoch': 3}
{'type': 'loss', 'content': 0.022917283698916435, 'timestamp': '2025-10-02 01:03:03.552570', 'step': 29064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:03.619931', 'step': 29064, 'epoch': 3}
{'type': 'loss', 'content': 0.062096498906612396, 'timestamp': '2025-10-02 01:03:03.630915', 'step': 29065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:03.694325', 'step': 29065, 'epoch': 3}
{'type': 'loss', 'content': 0.049872759729623795, 'timestamp': '2025-10-02 01:03:03.697616', 'step': 29066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:03.752430', 'step': 29066, 'epoch': 3}
{'type': 'loss', 'content': 0.036713678389787674, 'timestamp': '2025-10-02 01:03:03.756367', 'step': 29067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:03.812418', 'step': 29067, 'epoch': 3}
{'type': 'loss', 'content': 0.02452995441854, 'timestamp': '2025-10-02 01:03:03.821331', 'step': 29068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:03.881805', 'step': 29068, 'epoch': 3}
{'type': 'loss', 'content': 0.013354654423892498, 'timestamp': '2025-10-02 01:03:03.892056', 'step': 29069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:03:03.968253', 'step': 29069, 'epoch': 3}
{'type': 'loss', 'content': 0.024065537378191948, 'timestamp': '2025-10-02 01:03:03.980603', 'step': 29070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:04.036628', 'step': 29070, 'epoch': 3}
{'type': 'loss', 'content': 0.030816348269581795, 'timestamp': '2025-10-02 01:03:04.039492', 'step': 29071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:04.101779', 'step': 29071, 'epoch': 3}
{'type': 'loss', 'content': 0.07007166743278503, 'timestamp': '2025-10-02 01:03:04.108900', 'step': 29072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:04.170409', 'step': 29072, 'epoch': 3}
{'type': 'loss', 'content': 0.02439991757273674, 'timestamp': '2025-10-02 01:03:04.180639', 'step': 29073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:04.255443', 'step': 29073, 'epoch': 3}
{'type': 'loss', 'content': 0.05917385220527649, 'timestamp': '2025-10-02 01:03:04.265648', 'step': 29074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:04.328812', 'step': 29074, 'epoch': 3}
{'type': 'loss', 'content': 0.013600612990558147, 'timestamp': '2025-10-02 01:03:04.331838', 'step': 29075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:04.394581', 'step': 29075, 'epoch': 3}
{'type': 'loss', 'content': 0.058844950050115585, 'timestamp': '2025-10-02 01:03:04.401011', 'step': 29076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:04.457905', 'step': 29076, 'epoch': 3}
{'type': 'loss', 'content': 0.024136099964380264, 'timestamp': '2025-10-02 01:03:04.461302', 'step': 29077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:04.538924', 'step': 29077, 'epoch': 3}
{'type': 'loss', 'content': 0.03523780032992363, 'timestamp': '2025-10-02 01:03:04.544259', 'step': 29078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:04.628256', 'step': 29078, 'epoch': 3}
{'type': 'loss', 'content': 0.03831496462225914, 'timestamp': '2025-10-02 01:03:04.646476', 'step': 29079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:04.738761', 'step': 29079, 'epoch': 3}
{'type': 'loss', 'content': 0.009918003343045712, 'timestamp': '2025-10-02 01:03:04.748075', 'step': 29080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:04.821651', 'step': 29080, 'epoch': 3}
{'type': 'loss', 'content': 0.05037960410118103, 'timestamp': '2025-10-02 01:03:04.827364', 'step': 29081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:04.913809', 'step': 29081, 'epoch': 3}
{'type': 'loss', 'content': 0.010319220833480358, 'timestamp': '2025-10-02 01:03:04.921038', 'step': 29082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:04.992358', 'step': 29082, 'epoch': 3}
{'type': 'loss', 'content': 0.07410278171300888, 'timestamp': '2025-10-02 01:03:04.997857', 'step': 29083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:03:05.071411', 'step': 29083, 'epoch': 3}
{'type': 'loss', 'content': 0.005421859212219715, 'timestamp': '2025-10-02 01:03:05.084139', 'step': 29084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:05.140453', 'step': 29084, 'epoch': 3}
{'type': 'loss', 'content': 0.1282511204481125, 'timestamp': '2025-10-02 01:03:05.143278', 'step': 29085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:05.199890', 'step': 29085, 'epoch': 3}
{'type': 'loss', 'content': 0.05649897828698158, 'timestamp': '2025-10-02 01:03:05.202486', 'step': 29086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:05.259714', 'step': 29086, 'epoch': 3}
{'type': 'loss', 'content': 0.02263757586479187, 'timestamp': '2025-10-02 01:03:05.263957', 'step': 29087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:05.325285', 'step': 29087, 'epoch': 3}
{'type': 'loss', 'content': 0.05918721482157707, 'timestamp': '2025-10-02 01:03:05.332220', 'step': 29088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:05.392838', 'step': 29088, 'epoch': 3}
{'type': 'loss', 'content': 0.033663004636764526, 'timestamp': '2025-10-02 01:03:05.397118', 'step': 29089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:05.455271', 'step': 29089, 'epoch': 3}
{'type': 'loss', 'content': 0.05104469507932663, 'timestamp': '2025-10-02 01:03:05.471627', 'step': 29090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:05.541588', 'step': 29090, 'epoch': 3}
{'type': 'loss', 'content': 0.05188608542084694, 'timestamp': '2025-10-02 01:03:05.553104', 'step': 29091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:05.635858', 'step': 29091, 'epoch': 3}
{'type': 'loss', 'content': 0.0981091782450676, 'timestamp': '2025-10-02 01:03:05.643337', 'step': 29092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:05.713812', 'step': 29092, 'epoch': 3}
{'type': 'loss', 'content': 0.08525041490793228, 'timestamp': '2025-10-02 01:03:05.729240', 'step': 29093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:05.798776', 'step': 29093, 'epoch': 3}
{'type': 'loss', 'content': 0.14093804359436035, 'timestamp': '2025-10-02 01:03:05.802919', 'step': 29094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:05.885177', 'step': 29094, 'epoch': 3}
{'type': 'loss', 'content': 0.06125955283641815, 'timestamp': '2025-10-02 01:03:05.901810', 'step': 29095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:05.972764', 'step': 29095, 'epoch': 3}
{'type': 'loss', 'content': 0.06842166930437088, 'timestamp': '2025-10-02 01:03:05.993009', 'step': 29096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:06.081289', 'step': 29096, 'epoch': 3}
{'type': 'loss', 'content': 0.025880003347992897, 'timestamp': '2025-10-02 01:03:06.086922', 'step': 29097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:06.147234', 'step': 29097, 'epoch': 3}
{'type': 'loss', 'content': 0.10599436610937119, 'timestamp': '2025-10-02 01:03:06.165569', 'step': 29098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:06.239749', 'step': 29098, 'epoch': 3}
{'type': 'loss', 'content': 0.023197857663035393, 'timestamp': '2025-10-02 01:03:06.244867', 'step': 29099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:06.323338', 'step': 29099, 'epoch': 3}
{'type': 'loss', 'content': 0.019142204895615578, 'timestamp': '2025-10-02 01:03:06.344179', 'step': 29100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:06.430400', 'step': 29100, 'epoch': 3}
{'type': 'loss', 'content': 0.006584827322512865, 'timestamp': '2025-10-02 01:03:06.437834', 'step': 29101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:06.500531', 'step': 29101, 'epoch': 3}
{'type': 'loss', 'content': 0.014555269852280617, 'timestamp': '2025-10-02 01:03:06.523931', 'step': 29102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:06.627722', 'step': 29102, 'epoch': 3}
{'type': 'loss', 'content': 0.08345963805913925, 'timestamp': '2025-10-02 01:03:06.634242', 'step': 29103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:06.698422', 'step': 29103, 'epoch': 3}
{'type': 'loss', 'content': 0.057259783148765564, 'timestamp': '2025-10-02 01:03:06.707106', 'step': 29104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:06.777501', 'step': 29104, 'epoch': 3}
{'type': 'loss', 'content': 0.031530436128377914, 'timestamp': '2025-10-02 01:03:06.783180', 'step': 29105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:06.848350', 'step': 29105, 'epoch': 3}
{'type': 'loss', 'content': 0.08283334970474243, 'timestamp': '2025-10-02 01:03:06.854388', 'step': 29106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:06.929303', 'step': 29106, 'epoch': 3}
{'type': 'loss', 'content': 0.13414296507835388, 'timestamp': '2025-10-02 01:03:06.933553', 'step': 29107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:07.026575', 'step': 29107, 'epoch': 3}
{'type': 'loss', 'content': 0.02340882457792759, 'timestamp': '2025-10-02 01:03:07.045349', 'step': 29108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:07.114842', 'step': 29108, 'epoch': 3}
{'type': 'loss', 'content': 0.15075749158859253, 'timestamp': '2025-10-02 01:03:07.119718', 'step': 29109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:07.211584', 'step': 29109, 'epoch': 3}
{'type': 'loss', 'content': 0.02642827481031418, 'timestamp': '2025-10-02 01:03:07.228053', 'step': 29110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:07.300216', 'step': 29110, 'epoch': 3}
{'type': 'loss', 'content': 0.015783892944455147, 'timestamp': '2025-10-02 01:03:07.304552', 'step': 29111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:07.364229', 'step': 29111, 'epoch': 3}
{'type': 'loss', 'content': 0.007615636568516493, 'timestamp': '2025-10-02 01:03:07.381160', 'step': 29112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:07.448508', 'step': 29112, 'epoch': 3}
{'type': 'loss', 'content': 0.049114763736724854, 'timestamp': '2025-10-02 01:03:07.454497', 'step': 29113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:07.516640', 'step': 29113, 'epoch': 3}
{'type': 'loss', 'content': 0.027775505557656288, 'timestamp': '2025-10-02 01:03:07.520963', 'step': 29114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:07.580042', 'step': 29114, 'epoch': 3}
{'type': 'loss', 'content': 0.06298767030239105, 'timestamp': '2025-10-02 01:03:07.594410', 'step': 29115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:07.664299', 'step': 29115, 'epoch': 3}
{'type': 'loss', 'content': 0.10039529204368591, 'timestamp': '2025-10-02 01:03:07.676308', 'step': 29116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:07.735760', 'step': 29116, 'epoch': 3}
{'type': 'loss', 'content': 0.07301375269889832, 'timestamp': '2025-10-02 01:03:07.741112', 'step': 29117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:07.798803', 'step': 29117, 'epoch': 3}
{'type': 'loss', 'content': 0.0679231658577919, 'timestamp': '2025-10-02 01:03:07.802882', 'step': 29118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:07.862408', 'step': 29118, 'epoch': 3}
{'type': 'loss', 'content': 0.03948962315917015, 'timestamp': '2025-10-02 01:03:07.866592', 'step': 29119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:07.928000', 'step': 29119, 'epoch': 3}
{'type': 'loss', 'content': 0.012789162807166576, 'timestamp': '2025-10-02 01:03:07.947538', 'step': 29120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:08.009694', 'step': 29120, 'epoch': 3}
{'type': 'loss', 'content': 0.019411901012063026, 'timestamp': '2025-10-02 01:03:08.017107', 'step': 29121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:08.086055', 'step': 29121, 'epoch': 3}
{'type': 'loss', 'content': 0.0672827959060669, 'timestamp': '2025-10-02 01:03:08.090234', 'step': 29122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:08.160660', 'step': 29122, 'epoch': 3}
{'type': 'loss', 'content': 0.0017860812367871404, 'timestamp': '2025-10-02 01:03:08.170188', 'step': 29123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:03:08.236633', 'step': 29123, 'epoch': 3}
{'type': 'loss', 'content': 0.005682684015482664, 'timestamp': '2025-10-02 01:03:08.248000', 'step': 29124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:08.307112', 'step': 29124, 'epoch': 3}
{'type': 'loss', 'content': 0.034391093999147415, 'timestamp': '2025-10-02 01:03:08.311205', 'step': 29125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:08.398256', 'step': 29125, 'epoch': 3}
{'type': 'loss', 'content': 0.01615162193775177, 'timestamp': '2025-10-02 01:03:08.408429', 'step': 29126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:08.477080', 'step': 29126, 'epoch': 3}
{'type': 'loss', 'content': 0.03445059061050415, 'timestamp': '2025-10-02 01:03:08.481625', 'step': 29127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:08.542513', 'step': 29127, 'epoch': 3}
{'type': 'loss', 'content': 0.03105010651051998, 'timestamp': '2025-10-02 01:03:08.552834', 'step': 29128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:08.621945', 'step': 29128, 'epoch': 3}
{'type': 'loss', 'content': 0.025611335411667824, 'timestamp': '2025-10-02 01:03:08.627634', 'step': 29129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:08.688008', 'step': 29129, 'epoch': 3}
{'type': 'loss', 'content': 0.029639188200235367, 'timestamp': '2025-10-02 01:03:08.703361', 'step': 29130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:08.785937', 'step': 29130, 'epoch': 3}
{'type': 'loss', 'content': 0.04956882819533348, 'timestamp': '2025-10-02 01:03:08.790685', 'step': 29131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:08.851788', 'step': 29131, 'epoch': 3}
{'type': 'loss', 'content': 0.06966928392648697, 'timestamp': '2025-10-02 01:03:08.861732', 'step': 29132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:08.934249', 'step': 29132, 'epoch': 3}
{'type': 'loss', 'content': 0.026119910180568695, 'timestamp': '2025-10-02 01:03:08.937844', 'step': 29133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:09.006058', 'step': 29133, 'epoch': 3}
{'type': 'loss', 'content': 0.05177504941821098, 'timestamp': '2025-10-02 01:03:09.019607', 'step': 29134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:09.090208', 'step': 29134, 'epoch': 3}
{'type': 'loss', 'content': 0.039764102548360825, 'timestamp': '2025-10-02 01:03:09.101624', 'step': 29135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:09.185691', 'step': 29135, 'epoch': 3}
{'type': 'loss', 'content': 0.05426115542650223, 'timestamp': '2025-10-02 01:03:09.192610', 'step': 29136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:09.260637', 'step': 29136, 'epoch': 3}
{'type': 'loss', 'content': 0.06139202415943146, 'timestamp': '2025-10-02 01:03:09.265990', 'step': 29137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:09.323838', 'step': 29137, 'epoch': 3}
{'type': 'loss', 'content': 0.04928294196724892, 'timestamp': '2025-10-02 01:03:09.338563', 'step': 29138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:09.427518', 'step': 29138, 'epoch': 3}
{'type': 'loss', 'content': 0.059727348387241364, 'timestamp': '2025-10-02 01:03:09.431464', 'step': 29139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:09.522689', 'step': 29139, 'epoch': 3}
{'type': 'loss', 'content': 0.0006993159186094999, 'timestamp': '2025-10-02 01:03:09.540574', 'step': 29140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:09.611703', 'step': 29140, 'epoch': 3}
{'type': 'loss', 'content': 0.07262703031301498, 'timestamp': '2025-10-02 01:03:09.622318', 'step': 29141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:09.710758', 'step': 29141, 'epoch': 3}
{'type': 'loss', 'content': 0.011694683693349361, 'timestamp': '2025-10-02 01:03:09.718015', 'step': 29142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:09.796232', 'step': 29142, 'epoch': 3}
{'type': 'loss', 'content': 0.048469237983226776, 'timestamp': '2025-10-02 01:03:09.801858', 'step': 29143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:09.879917', 'step': 29143, 'epoch': 3}
{'type': 'loss', 'content': 0.031126586720347404, 'timestamp': '2025-10-02 01:03:09.890079', 'step': 29144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:09.947234', 'step': 29144, 'epoch': 3}
{'type': 'loss', 'content': 0.023416979238390923, 'timestamp': '2025-10-02 01:03:09.952348', 'step': 29145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:10.023533', 'step': 29145, 'epoch': 3}
{'type': 'loss', 'content': 0.04905995726585388, 'timestamp': '2025-10-02 01:03:10.032792', 'step': 29146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:10.092274', 'step': 29146, 'epoch': 3}
{'type': 'loss', 'content': 0.03684857487678528, 'timestamp': '2025-10-02 01:03:10.106455', 'step': 29147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:10.171516', 'step': 29147, 'epoch': 3}
{'type': 'loss', 'content': 0.046931423246860504, 'timestamp': '2025-10-02 01:03:10.179313', 'step': 29148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:03:10.272957', 'step': 29148, 'epoch': 3}
{'type': 'loss', 'content': 0.00901686493307352, 'timestamp': '2025-10-02 01:03:10.287636', 'step': 29149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:10.346708', 'step': 29149, 'epoch': 3}
{'type': 'loss', 'content': 0.03913691267371178, 'timestamp': '2025-10-02 01:03:10.349944', 'step': 29150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:10.425391', 'step': 29150, 'epoch': 3}
{'type': 'loss', 'content': 0.07125264406204224, 'timestamp': '2025-10-02 01:03:10.437006', 'step': 29151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:10.512290', 'step': 29151, 'epoch': 3}
{'type': 'loss', 'content': 0.022021707147359848, 'timestamp': '2025-10-02 01:03:10.526089', 'step': 29152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:10.592363', 'step': 29152, 'epoch': 3}
{'type': 'loss', 'content': 0.022332241758704185, 'timestamp': '2025-10-02 01:03:10.605717', 'step': 29153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:10.668654', 'step': 29153, 'epoch': 3}
{'type': 'loss', 'content': 0.004659646190702915, 'timestamp': '2025-10-02 01:03:10.678843', 'step': 29154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:10.745206', 'step': 29154, 'epoch': 3}
{'type': 'loss', 'content': 0.018303649500012398, 'timestamp': '2025-10-02 01:03:10.748486', 'step': 29155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:10.823306', 'step': 29155, 'epoch': 3}
{'type': 'loss', 'content': 0.0660538375377655, 'timestamp': '2025-10-02 01:03:10.830639', 'step': 29156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:10.914097', 'step': 29156, 'epoch': 3}
{'type': 'loss', 'content': 0.019414879381656647, 'timestamp': '2025-10-02 01:03:10.926086', 'step': 29157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:10.997621', 'step': 29157, 'epoch': 3}
{'type': 'loss', 'content': 0.09438615292310715, 'timestamp': '2025-10-02 01:03:11.011953', 'step': 29158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:11.087884', 'step': 29158, 'epoch': 3}
{'type': 'loss', 'content': 0.024946441873908043, 'timestamp': '2025-10-02 01:03:11.092166', 'step': 29159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:11.152289', 'step': 29159, 'epoch': 3}
{'type': 'loss', 'content': 0.06531184166669846, 'timestamp': '2025-10-02 01:03:11.164477', 'step': 29160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:03:11.254390', 'step': 29160, 'epoch': 3}
{'type': 'loss', 'content': 0.04056207090616226, 'timestamp': '2025-10-02 01:03:11.268784', 'step': 29161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:11.336241', 'step': 29161, 'epoch': 3}
{'type': 'loss', 'content': 0.07689612358808517, 'timestamp': '2025-10-02 01:03:11.346515', 'step': 29162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:11.413815', 'step': 29162, 'epoch': 3}
{'type': 'loss', 'content': 0.06694385409355164, 'timestamp': '2025-10-02 01:03:11.423676', 'step': 29163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:11.495146', 'step': 29163, 'epoch': 3}
{'type': 'loss', 'content': 0.047311119735240936, 'timestamp': '2025-10-02 01:03:11.502140', 'step': 29164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:11.561093', 'step': 29164, 'epoch': 3}
{'type': 'loss', 'content': 0.0932706743478775, 'timestamp': '2025-10-02 01:03:11.570684', 'step': 29165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:11.635437', 'step': 29165, 'epoch': 3}
{'type': 'loss', 'content': 0.11522848904132843, 'timestamp': '2025-10-02 01:03:11.638375', 'step': 29166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:11.715217', 'step': 29166, 'epoch': 3}
{'type': 'loss', 'content': 0.06833828240633011, 'timestamp': '2025-10-02 01:03:11.723459', 'step': 29167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:11.791033', 'step': 29167, 'epoch': 3}
{'type': 'loss', 'content': 0.05819655582308769, 'timestamp': '2025-10-02 01:03:11.798456', 'step': 29168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:11.853279', 'step': 29168, 'epoch': 3}
{'type': 'loss', 'content': 0.04230416193604469, 'timestamp': '2025-10-02 01:03:11.860961', 'step': 29169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:03:11.934287', 'step': 29169, 'epoch': 3}
{'type': 'loss', 'content': 0.008026767522096634, 'timestamp': '2025-10-02 01:03:11.945149', 'step': 29170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:12.010645', 'step': 29170, 'epoch': 3}
{'type': 'loss', 'content': 0.04486140236258507, 'timestamp': '2025-10-02 01:03:12.019587', 'step': 29171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:03:12.083704', 'step': 29171, 'epoch': 3}
{'type': 'loss', 'content': 0.024282215163111687, 'timestamp': '2025-10-02 01:03:12.094980', 'step': 29172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:12.157926', 'step': 29172, 'epoch': 3}
{'type': 'loss', 'content': 0.05253206938505173, 'timestamp': '2025-10-02 01:03:12.161348', 'step': 29173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:12.223772', 'step': 29173, 'epoch': 3}
{'type': 'loss', 'content': 0.029027210548520088, 'timestamp': '2025-10-02 01:03:12.226634', 'step': 29174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:12.285775', 'step': 29174, 'epoch': 3}
{'type': 'loss', 'content': 0.07371468842029572, 'timestamp': '2025-10-02 01:03:12.293305', 'step': 29175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:12.365657', 'step': 29175, 'epoch': 3}
{'type': 'loss', 'content': 0.0148762883618474, 'timestamp': '2025-10-02 01:03:12.376586', 'step': 29176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:12.431439', 'step': 29176, 'epoch': 3}
{'type': 'loss', 'content': 0.05278715863823891, 'timestamp': '2025-10-02 01:03:12.439228', 'step': 29177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:12.509423', 'step': 29177, 'epoch': 3}
{'type': 'loss', 'content': 0.015470133163034916, 'timestamp': '2025-10-02 01:03:12.518784', 'step': 29178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:12.574034', 'step': 29178, 'epoch': 3}
{'type': 'loss', 'content': 0.033697281032800674, 'timestamp': '2025-10-02 01:03:12.576638', 'step': 29179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:12.635205', 'step': 29179, 'epoch': 3}
{'type': 'loss', 'content': 0.021108411252498627, 'timestamp': '2025-10-02 01:03:12.645102', 'step': 29180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:12.710002', 'step': 29180, 'epoch': 3}
{'type': 'loss', 'content': 0.22600331902503967, 'timestamp': '2025-10-02 01:03:12.716636', 'step': 29181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:12.774906', 'step': 29181, 'epoch': 3}
{'type': 'loss', 'content': 0.06441546231508255, 'timestamp': '2025-10-02 01:03:12.780990', 'step': 29182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:12.844127', 'step': 29182, 'epoch': 3}
{'type': 'loss', 'content': 0.056880008429288864, 'timestamp': '2025-10-02 01:03:12.846695', 'step': 29183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:12.910849', 'step': 29183, 'epoch': 3}
{'type': 'loss', 'content': 0.07276249676942825, 'timestamp': '2025-10-02 01:03:12.920982', 'step': 29184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:13.000409', 'step': 29184, 'epoch': 3}
{'type': 'loss', 'content': 0.012696408666670322, 'timestamp': '2025-10-02 01:03:13.003795', 'step': 29185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:13.060868', 'step': 29185, 'epoch': 3}
{'type': 'loss', 'content': 0.02494989149272442, 'timestamp': '2025-10-02 01:03:13.068580', 'step': 29186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:13.125206', 'step': 29186, 'epoch': 3}
{'type': 'loss', 'content': 0.08005695790052414, 'timestamp': '2025-10-02 01:03:13.128446', 'step': 29187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:13.191401', 'step': 29187, 'epoch': 3}
{'type': 'loss', 'content': 0.04131488502025604, 'timestamp': '2025-10-02 01:03:13.201905', 'step': 29188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:13.266521', 'step': 29188, 'epoch': 3}
{'type': 'loss', 'content': 0.11259853094816208, 'timestamp': '2025-10-02 01:03:13.270420', 'step': 29189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:13.331273', 'step': 29189, 'epoch': 3}
{'type': 'loss', 'content': 0.008073195815086365, 'timestamp': '2025-10-02 01:03:13.340811', 'step': 29190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:13.403303', 'step': 29190, 'epoch': 3}
{'type': 'loss', 'content': 0.11176376044750214, 'timestamp': '2025-10-02 01:03:13.406142', 'step': 29191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:13.468247', 'step': 29191, 'epoch': 3}
{'type': 'loss', 'content': 0.03786633908748627, 'timestamp': '2025-10-02 01:03:13.474782', 'step': 29192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:13.537994', 'step': 29192, 'epoch': 3}
{'type': 'loss', 'content': 0.07836029678583145, 'timestamp': '2025-10-02 01:03:13.540685', 'step': 29193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:13.596228', 'step': 29193, 'epoch': 3}
{'type': 'loss', 'content': 0.028709720820188522, 'timestamp': '2025-10-02 01:03:13.602250', 'step': 29194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:13.659070', 'step': 29194, 'epoch': 3}
{'type': 'loss', 'content': 0.07379181683063507, 'timestamp': '2025-10-02 01:03:13.662326', 'step': 29195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:03:13.723613', 'step': 29195, 'epoch': 3}
{'type': 'loss', 'content': 0.050986647605895996, 'timestamp': '2025-10-02 01:03:13.734815', 'step': 29196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:13.793828', 'step': 29196, 'epoch': 3}
{'type': 'loss', 'content': 0.0679134652018547, 'timestamp': '2025-10-02 01:03:13.796517', 'step': 29197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:03:13.859110', 'step': 29197, 'epoch': 3}
{'type': 'loss', 'content': 0.02829783596098423, 'timestamp': '2025-10-02 01:03:13.869585', 'step': 29198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:03:13.947838', 'step': 29198, 'epoch': 3}
{'type': 'loss', 'content': 0.0016074852319434285, 'timestamp': '2025-10-02 01:03:13.961290', 'step': 29199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:14.022662', 'step': 29199, 'epoch': 3}
{'type': 'loss', 'content': 0.02948753535747528, 'timestamp': '2025-10-02 01:03:14.033324', 'step': 29200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:14.096306', 'step': 29200, 'epoch': 3}
{'type': 'loss', 'content': 0.04534443840384483, 'timestamp': '2025-10-02 01:03:14.101147', 'step': 29201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:14.164854', 'step': 29201, 'epoch': 3}
{'type': 'loss', 'content': 0.052501652389764786, 'timestamp': '2025-10-02 01:03:14.171235', 'step': 29202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:14.230671', 'step': 29202, 'epoch': 3}
{'type': 'loss', 'content': 0.05978086218237877, 'timestamp': '2025-10-02 01:03:14.239959', 'step': 29203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:14.301547', 'step': 29203, 'epoch': 3}
{'type': 'loss', 'content': 0.028627928346395493, 'timestamp': '2025-10-02 01:03:14.307615', 'step': 29204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:14.365982', 'step': 29204, 'epoch': 3}
{'type': 'loss', 'content': 0.008473974652588367, 'timestamp': '2025-10-02 01:03:14.376250', 'step': 29205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:14.436202', 'step': 29205, 'epoch': 3}
{'type': 'loss', 'content': 0.02448912337422371, 'timestamp': '2025-10-02 01:03:14.439601', 'step': 29206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:14.496836', 'step': 29206, 'epoch': 3}
{'type': 'loss', 'content': 0.08333366364240646, 'timestamp': '2025-10-02 01:03:14.501071', 'step': 29207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:14.560166', 'step': 29207, 'epoch': 3}
{'type': 'loss', 'content': 0.04036783427000046, 'timestamp': '2025-10-02 01:03:14.570335', 'step': 29208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:14.625622', 'step': 29208, 'epoch': 3}
{'type': 'loss', 'content': 0.03378855064511299, 'timestamp': '2025-10-02 01:03:14.628113', 'step': 29209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:14.682524', 'step': 29209, 'epoch': 3}
{'type': 'loss', 'content': 0.05988224223256111, 'timestamp': '2025-10-02 01:03:14.685135', 'step': 29210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:14.742003', 'step': 29210, 'epoch': 3}
{'type': 'loss', 'content': 0.044844452291727066, 'timestamp': '2025-10-02 01:03:14.744369', 'step': 29211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:14.799271', 'step': 29211, 'epoch': 3}
{'type': 'loss', 'content': 0.06619387865066528, 'timestamp': '2025-10-02 01:03:14.805400', 'step': 29212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:14.860050', 'step': 29212, 'epoch': 3}
{'type': 'loss', 'content': 0.09857263416051865, 'timestamp': '2025-10-02 01:03:14.862542', 'step': 29213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:14.917224', 'step': 29213, 'epoch': 3}
{'type': 'loss', 'content': 0.003743827808648348, 'timestamp': '2025-10-02 01:03:14.926580', 'step': 29214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:14.985883', 'step': 29214, 'epoch': 3}
{'type': 'loss', 'content': 0.051730792969465256, 'timestamp': '2025-10-02 01:03:14.991907', 'step': 29215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:15.051015', 'step': 29215, 'epoch': 3}
{'type': 'loss', 'content': 0.011481211520731449, 'timestamp': '2025-10-02 01:03:15.061320', 'step': 29216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:15.118027', 'step': 29216, 'epoch': 3}
{'type': 'loss', 'content': 0.048421166837215424, 'timestamp': '2025-10-02 01:03:15.122560', 'step': 29217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:15.182251', 'step': 29217, 'epoch': 3}
{'type': 'loss', 'content': 0.0417461097240448, 'timestamp': '2025-10-02 01:03:15.188365', 'step': 29218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:03:15.251629', 'step': 29218, 'epoch': 3}
{'type': 'loss', 'content': 0.07028386741876602, 'timestamp': '2025-10-02 01:03:15.254573', 'step': 29219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:15.311345', 'step': 29219, 'epoch': 3}
{'type': 'loss', 'content': 0.03837737813591957, 'timestamp': '2025-10-02 01:03:15.319741', 'step': 29220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:15.374331', 'step': 29220, 'epoch': 3}
{'type': 'loss', 'content': 0.044052854180336, 'timestamp': '2025-10-02 01:03:15.376763', 'step': 29221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:15.431310', 'step': 29221, 'epoch': 3}
{'type': 'loss', 'content': 0.0022879482712596655, 'timestamp': '2025-10-02 01:03:15.433742', 'step': 29222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:15.491965', 'step': 29222, 'epoch': 3}
{'type': 'loss', 'content': 0.0338209830224514, 'timestamp': '2025-10-02 01:03:15.496628', 'step': 29223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:15.550650', 'step': 29223, 'epoch': 3}
{'type': 'loss', 'content': 0.029523547738790512, 'timestamp': '2025-10-02 01:03:15.565011', 'step': 29224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:15.650379', 'step': 29224, 'epoch': 3}
{'type': 'loss', 'content': 0.07869467884302139, 'timestamp': '2025-10-02 01:03:15.663816', 'step': 29225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:15.742680', 'step': 29225, 'epoch': 3}
{'type': 'loss', 'content': 0.046897560358047485, 'timestamp': '2025-10-02 01:03:15.751222', 'step': 29226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:15.826954', 'step': 29226, 'epoch': 3}
{'type': 'loss', 'content': 0.011571994982659817, 'timestamp': '2025-10-02 01:03:15.837148', 'step': 29227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:15.921918', 'step': 29227, 'epoch': 3}
{'type': 'loss', 'content': 0.015811052173376083, 'timestamp': '2025-10-02 01:03:15.931960', 'step': 29228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:16.015029', 'step': 29228, 'epoch': 3}
{'type': 'loss', 'content': 0.09082023799419403, 'timestamp': '2025-10-02 01:03:16.020773', 'step': 29229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:16.109017', 'step': 29229, 'epoch': 3}
{'type': 'loss', 'content': 0.041348084807395935, 'timestamp': '2025-10-02 01:03:16.117244', 'step': 29230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:16.192256', 'step': 29230, 'epoch': 3}
{'type': 'loss', 'content': 0.06477610766887665, 'timestamp': '2025-10-02 01:03:16.201248', 'step': 29231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:16.277932', 'step': 29231, 'epoch': 3}
{'type': 'loss', 'content': 0.06089470162987709, 'timestamp': '2025-10-02 01:03:16.289502', 'step': 29232, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 01:03:43.500591', 'step': 29232, 'epoch': 3}
{'type': 'pplx', 'content': 82.46614030352465, 'timestamp': '2025-10-02 01:03:43.505162', 'step': 29232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:43.559800', 'step': 29232, 'epoch': 3}
{'type': 'loss', 'content': 0.15811264514923096, 'timestamp': '2025-10-02 01:03:43.563005', 'step': 29233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:43.628541', 'step': 29233, 'epoch': 3}
{'type': 'loss', 'content': 0.024946441873908043, 'timestamp': '2025-10-02 01:03:43.632287', 'step': 29234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:43.698874', 'step': 29234, 'epoch': 3}
{'type': 'loss', 'content': 0.07089129090309143, 'timestamp': '2025-10-02 01:03:43.701618', 'step': 29235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:43.757428', 'step': 29235, 'epoch': 3}
{'type': 'loss', 'content': 0.03727734461426735, 'timestamp': '2025-10-02 01:03:43.764208', 'step': 29236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:43.819087', 'step': 29236, 'epoch': 3}
{'type': 'loss', 'content': 0.044473208487033844, 'timestamp': '2025-10-02 01:03:43.821670', 'step': 29237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:43.877848', 'step': 29237, 'epoch': 3}
{'type': 'loss', 'content': 0.022205587476491928, 'timestamp': '2025-10-02 01:03:43.884934', 'step': 29238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:43.940683', 'step': 29238, 'epoch': 3}
{'type': 'loss', 'content': 0.005770477466285229, 'timestamp': '2025-10-02 01:03:43.943155', 'step': 29239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:43.997745', 'step': 29239, 'epoch': 3}
{'type': 'loss', 'content': 0.045343052595853806, 'timestamp': '2025-10-02 01:03:44.008752', 'step': 29240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:44.063469', 'step': 29240, 'epoch': 3}
{'type': 'loss', 'content': 0.05136464908719063, 'timestamp': '2025-10-02 01:03:44.065886', 'step': 29241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:44.120915', 'step': 29241, 'epoch': 3}
{'type': 'loss', 'content': 0.08362257480621338, 'timestamp': '2025-10-02 01:03:44.124910', 'step': 29242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:44.183302', 'step': 29242, 'epoch': 3}
{'type': 'loss', 'content': 0.020989608019590378, 'timestamp': '2025-10-02 01:03:44.188383', 'step': 29243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:44.243299', 'step': 29243, 'epoch': 3}
{'type': 'loss', 'content': 0.07657719403505325, 'timestamp': '2025-10-02 01:03:44.249776', 'step': 29244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:44.303886', 'step': 29244, 'epoch': 3}
{'type': 'loss', 'content': 0.06110600754618645, 'timestamp': '2025-10-02 01:03:44.306288', 'step': 29245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:44.360448', 'step': 29245, 'epoch': 3}
{'type': 'loss', 'content': 0.1009482890367508, 'timestamp': '2025-10-02 01:03:44.362703', 'step': 29246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:44.420084', 'step': 29246, 'epoch': 3}
{'type': 'loss', 'content': 0.036346063017845154, 'timestamp': '2025-10-02 01:03:44.422318', 'step': 29247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:44.477738', 'step': 29247, 'epoch': 3}
{'type': 'loss', 'content': 0.07487422227859497, 'timestamp': '2025-10-02 01:03:44.483507', 'step': 29248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:44.537819', 'step': 29248, 'epoch': 3}
{'type': 'loss', 'content': 0.10325682163238525, 'timestamp': '2025-10-02 01:03:44.540414', 'step': 29249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:44.599878', 'step': 29249, 'epoch': 3}
{'type': 'loss', 'content': 0.02051505632698536, 'timestamp': '2025-10-02 01:03:44.609441', 'step': 29250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:44.668762', 'step': 29250, 'epoch': 3}
{'type': 'loss', 'content': 0.041300591081380844, 'timestamp': '2025-10-02 01:03:44.678958', 'step': 29251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:44.736072', 'step': 29251, 'epoch': 3}
{'type': 'loss', 'content': 0.02034350484609604, 'timestamp': '2025-10-02 01:03:44.746212', 'step': 29252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:44.801651', 'step': 29252, 'epoch': 3}
{'type': 'loss', 'content': 0.013446451164782047, 'timestamp': '2025-10-02 01:03:44.803903', 'step': 29253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:44.859313', 'step': 29253, 'epoch': 3}
{'type': 'loss', 'content': 0.039905790239572525, 'timestamp': '2025-10-02 01:03:44.862117', 'step': 29254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:44.916682', 'step': 29254, 'epoch': 3}
{'type': 'loss', 'content': 0.05909060686826706, 'timestamp': '2025-10-02 01:03:44.919051', 'step': 29255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:44.972879', 'step': 29255, 'epoch': 3}
{'type': 'loss', 'content': 0.01559695228934288, 'timestamp': '2025-10-02 01:03:44.979176', 'step': 29256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:03:45.033577', 'step': 29256, 'epoch': 3}
{'type': 'loss', 'content': 0.08900071680545807, 'timestamp': '2025-10-02 01:03:45.036318', 'step': 29257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:45.090676', 'step': 29257, 'epoch': 3}
{'type': 'loss', 'content': 0.042741477489471436, 'timestamp': '2025-10-02 01:03:45.098284', 'step': 29258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:45.152513', 'step': 29258, 'epoch': 3}
{'type': 'loss', 'content': 0.09610667079687119, 'timestamp': '2025-10-02 01:03:45.155110', 'step': 29259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:45.209129', 'step': 29259, 'epoch': 3}
{'type': 'loss', 'content': 0.04895327612757683, 'timestamp': '2025-10-02 01:03:45.214896', 'step': 29260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:45.268829', 'step': 29260, 'epoch': 3}
{'type': 'loss', 'content': 0.04735071212053299, 'timestamp': '2025-10-02 01:03:45.271542', 'step': 29261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:45.327169', 'step': 29261, 'epoch': 3}
{'type': 'loss', 'content': 0.0893065482378006, 'timestamp': '2025-10-02 01:03:45.330443', 'step': 29262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:45.386673', 'step': 29262, 'epoch': 3}
{'type': 'loss', 'content': 0.012304306030273438, 'timestamp': '2025-10-02 01:03:45.392652', 'step': 29263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:45.447363', 'step': 29263, 'epoch': 3}
{'type': 'loss', 'content': 0.06132262572646141, 'timestamp': '2025-10-02 01:03:45.453559', 'step': 29264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:45.507280', 'step': 29264, 'epoch': 3}
{'type': 'loss', 'content': 0.01739308051764965, 'timestamp': '2025-10-02 01:03:45.513217', 'step': 29265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:45.567668', 'step': 29265, 'epoch': 3}
{'type': 'loss', 'content': 0.03685298562049866, 'timestamp': '2025-10-02 01:03:45.575300', 'step': 29266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:45.631209', 'step': 29266, 'epoch': 3}
{'type': 'loss', 'content': 0.06587851047515869, 'timestamp': '2025-10-02 01:03:45.633911', 'step': 29267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:45.688844', 'step': 29267, 'epoch': 3}
{'type': 'loss', 'content': 0.030744722113013268, 'timestamp': '2025-10-02 01:03:45.694891', 'step': 29268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:45.749292', 'step': 29268, 'epoch': 3}
{'type': 'loss', 'content': 0.001724539790302515, 'timestamp': '2025-10-02 01:03:45.757034', 'step': 29269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:45.813917', 'step': 29269, 'epoch': 3}
{'type': 'loss', 'content': 0.03827192261815071, 'timestamp': '2025-10-02 01:03:45.819872', 'step': 29270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:45.874227', 'step': 29270, 'epoch': 3}
{'type': 'loss', 'content': 0.004837718326598406, 'timestamp': '2025-10-02 01:03:45.880096', 'step': 29271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:45.937262', 'step': 29271, 'epoch': 3}
{'type': 'loss', 'content': 0.014119012281298637, 'timestamp': '2025-10-02 01:03:45.947582', 'step': 29272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:46.001614', 'step': 29272, 'epoch': 3}
{'type': 'loss', 'content': 0.022424139082431793, 'timestamp': '2025-10-02 01:03:46.011408', 'step': 29273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:46.066593', 'step': 29273, 'epoch': 3}
{'type': 'loss', 'content': 0.006193638313561678, 'timestamp': '2025-10-02 01:03:46.075979', 'step': 29274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:03:46.138546', 'step': 29274, 'epoch': 3}
{'type': 'loss', 'content': 0.0005239598103798926, 'timestamp': '2025-10-02 01:03:46.149230', 'step': 29275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:46.203537', 'step': 29275, 'epoch': 3}
{'type': 'loss', 'content': 0.09459425508975983, 'timestamp': '2025-10-02 01:03:46.209797', 'step': 29276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:46.264401', 'step': 29276, 'epoch': 3}
{'type': 'loss', 'content': 0.029647115617990494, 'timestamp': '2025-10-02 01:03:46.267033', 'step': 29277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:46.322325', 'step': 29277, 'epoch': 3}
{'type': 'loss', 'content': 0.016499873250722885, 'timestamp': '2025-10-02 01:03:46.330098', 'step': 29278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:46.383990', 'step': 29278, 'epoch': 3}
{'type': 'loss', 'content': 0.08635263890028, 'timestamp': '2025-10-02 01:03:46.386183', 'step': 29279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:46.440103', 'step': 29279, 'epoch': 3}
{'type': 'loss', 'content': 0.05575481429696083, 'timestamp': '2025-10-02 01:03:46.448555', 'step': 29280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:46.501786', 'step': 29280, 'epoch': 3}
{'type': 'loss', 'content': 0.02459387667477131, 'timestamp': '2025-10-02 01:03:46.504407', 'step': 29281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:46.559685', 'step': 29281, 'epoch': 3}
{'type': 'loss', 'content': 0.01642601378262043, 'timestamp': '2025-10-02 01:03:46.569261', 'step': 29282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:46.623872', 'step': 29282, 'epoch': 3}
{'type': 'loss', 'content': 0.08520350605249405, 'timestamp': '2025-10-02 01:03:46.626980', 'step': 29283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:46.681844', 'step': 29283, 'epoch': 3}
{'type': 'loss', 'content': 0.04471791535615921, 'timestamp': '2025-10-02 01:03:46.688711', 'step': 29284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:46.742313', 'step': 29284, 'epoch': 3}
{'type': 'loss', 'content': 0.00910592544823885, 'timestamp': '2025-10-02 01:03:46.745654', 'step': 29285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:46.802283', 'step': 29285, 'epoch': 3}
{'type': 'loss', 'content': 0.04024980589747429, 'timestamp': '2025-10-02 01:03:46.805002', 'step': 29286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:46.859778', 'step': 29286, 'epoch': 3}
{'type': 'loss', 'content': 0.04740173742175102, 'timestamp': '2025-10-02 01:03:46.867314', 'step': 29287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:46.923046', 'step': 29287, 'epoch': 3}
{'type': 'loss', 'content': 0.03348354995250702, 'timestamp': '2025-10-02 01:03:46.933379', 'step': 29288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:46.987405', 'step': 29288, 'epoch': 3}
{'type': 'loss', 'content': 0.0461244136095047, 'timestamp': '2025-10-02 01:03:46.989780', 'step': 29289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:47.044006', 'step': 29289, 'epoch': 3}
{'type': 'loss', 'content': 0.0462181381881237, 'timestamp': '2025-10-02 01:03:47.046647', 'step': 29290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:47.102201', 'step': 29290, 'epoch': 3}
{'type': 'loss', 'content': 0.018782131373882294, 'timestamp': '2025-10-02 01:03:47.111668', 'step': 29291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:47.165943', 'step': 29291, 'epoch': 3}
{'type': 'loss', 'content': 0.0010393784614279866, 'timestamp': '2025-10-02 01:03:47.172187', 'step': 29292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:47.225584', 'step': 29292, 'epoch': 3}
{'type': 'loss', 'content': 0.059023089706897736, 'timestamp': '2025-10-02 01:03:47.228250', 'step': 29293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:47.283069', 'step': 29293, 'epoch': 3}
{'type': 'loss', 'content': 0.02733328379690647, 'timestamp': '2025-10-02 01:03:47.285485', 'step': 29294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:47.339935', 'step': 29294, 'epoch': 3}
{'type': 'loss', 'content': 0.021337276324629784, 'timestamp': '2025-10-02 01:03:47.346071', 'step': 29295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:47.400200', 'step': 29295, 'epoch': 3}
{'type': 'loss', 'content': 0.09137839078903198, 'timestamp': '2025-10-02 01:03:47.407568', 'step': 29296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:03:47.473381', 'step': 29296, 'epoch': 3}
{'type': 'loss', 'content': 0.03929797187447548, 'timestamp': '2025-10-02 01:03:47.486340', 'step': 29297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:47.541379', 'step': 29297, 'epoch': 3}
{'type': 'loss', 'content': 0.01035074982792139, 'timestamp': '2025-10-02 01:03:47.550757', 'step': 29298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:47.605725', 'step': 29298, 'epoch': 3}
{'type': 'loss', 'content': 0.01819365657866001, 'timestamp': '2025-10-02 01:03:47.610946', 'step': 29299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:47.666836', 'step': 29299, 'epoch': 3}
{'type': 'loss', 'content': 0.08639266341924667, 'timestamp': '2025-10-02 01:03:47.674947', 'step': 29300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:47.729669', 'step': 29300, 'epoch': 3}
{'type': 'loss', 'content': 0.030255623161792755, 'timestamp': '2025-10-02 01:03:47.732631', 'step': 29301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:47.787914', 'step': 29301, 'epoch': 3}
{'type': 'loss', 'content': 0.025869062170386314, 'timestamp': '2025-10-02 01:03:47.797240', 'step': 29302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:47.860820', 'step': 29302, 'epoch': 3}
{'type': 'loss', 'content': 0.1056964248418808, 'timestamp': '2025-10-02 01:03:47.863170', 'step': 29303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:47.917113', 'step': 29303, 'epoch': 3}
{'type': 'loss', 'content': 0.020435914397239685, 'timestamp': '2025-10-02 01:03:47.925443', 'step': 29304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:47.980437', 'step': 29304, 'epoch': 3}
{'type': 'loss', 'content': 0.05333983153104782, 'timestamp': '2025-10-02 01:03:47.983156', 'step': 29305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:48.037914', 'step': 29305, 'epoch': 3}
{'type': 'loss', 'content': 0.06298176944255829, 'timestamp': '2025-10-02 01:03:48.043981', 'step': 29306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:48.101672', 'step': 29306, 'epoch': 3}
{'type': 'loss', 'content': 0.03865735977888107, 'timestamp': '2025-10-02 01:03:48.110963', 'step': 29307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:48.166981', 'step': 29307, 'epoch': 3}
{'type': 'loss', 'content': 0.019438324496150017, 'timestamp': '2025-10-02 01:03:48.173091', 'step': 29308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:48.227310', 'step': 29308, 'epoch': 3}
{'type': 'loss', 'content': 0.07980095595121384, 'timestamp': '2025-10-02 01:03:48.233398', 'step': 29309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:48.297294', 'step': 29309, 'epoch': 3}
{'type': 'loss', 'content': 0.056798942387104034, 'timestamp': '2025-10-02 01:03:48.299600', 'step': 29310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:48.354982', 'step': 29310, 'epoch': 3}
{'type': 'loss', 'content': 0.0015509361401200294, 'timestamp': '2025-10-02 01:03:48.358227', 'step': 29311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:48.413394', 'step': 29311, 'epoch': 3}
{'type': 'loss', 'content': 0.08159635961055756, 'timestamp': '2025-10-02 01:03:48.419729', 'step': 29312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:48.474495', 'step': 29312, 'epoch': 3}
{'type': 'loss', 'content': 0.06342076510190964, 'timestamp': '2025-10-02 01:03:48.477445', 'step': 29313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:48.531554', 'step': 29313, 'epoch': 3}
{'type': 'loss', 'content': 0.03470015525817871, 'timestamp': '2025-10-02 01:03:48.539251', 'step': 29314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:48.614560', 'step': 29314, 'epoch': 3}
{'type': 'loss', 'content': 0.08417650312185287, 'timestamp': '2025-10-02 01:03:48.617538', 'step': 29315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:48.673650', 'step': 29315, 'epoch': 3}
{'type': 'loss', 'content': 0.01681242138147354, 'timestamp': '2025-10-02 01:03:48.683970', 'step': 29316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:48.737220', 'step': 29316, 'epoch': 3}
{'type': 'loss', 'content': 0.032312292605638504, 'timestamp': '2025-10-02 01:03:48.739715', 'step': 29317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:48.794208', 'step': 29317, 'epoch': 3}
{'type': 'loss', 'content': 0.04109584167599678, 'timestamp': '2025-10-02 01:03:48.800273', 'step': 29318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:48.856253', 'step': 29318, 'epoch': 3}
{'type': 'loss', 'content': 0.04848863184452057, 'timestamp': '2025-10-02 01:03:48.865734', 'step': 29319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:48.918816', 'step': 29319, 'epoch': 3}
{'type': 'loss', 'content': 0.050863172858953476, 'timestamp': '2025-10-02 01:03:48.924805', 'step': 29320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:48.978584', 'step': 29320, 'epoch': 3}
{'type': 'loss', 'content': 0.018430333584547043, 'timestamp': '2025-10-02 01:03:48.986136', 'step': 29321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:49.042316', 'step': 29321, 'epoch': 3}
{'type': 'loss', 'content': 0.0615699328482151, 'timestamp': '2025-10-02 01:03:49.044589', 'step': 29322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:49.099224', 'step': 29322, 'epoch': 3}
{'type': 'loss', 'content': 0.03470070660114288, 'timestamp': '2025-10-02 01:03:49.103326', 'step': 29323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:49.158994', 'step': 29323, 'epoch': 3}
{'type': 'loss', 'content': 0.06935708969831467, 'timestamp': '2025-10-02 01:03:49.165293', 'step': 29324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:49.219308', 'step': 29324, 'epoch': 3}
{'type': 'loss', 'content': 0.029611090198159218, 'timestamp': '2025-10-02 01:03:49.222325', 'step': 29325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:49.280152', 'step': 29325, 'epoch': 3}
{'type': 'loss', 'content': 0.04544792324304581, 'timestamp': '2025-10-02 01:03:49.282773', 'step': 29326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:49.340347', 'step': 29326, 'epoch': 3}
{'type': 'loss', 'content': 0.008419053629040718, 'timestamp': '2025-10-02 01:03:49.346356', 'step': 29327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:49.402428', 'step': 29327, 'epoch': 3}
{'type': 'loss', 'content': 0.07738767564296722, 'timestamp': '2025-10-02 01:03:49.410638', 'step': 29328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:49.468651', 'step': 29328, 'epoch': 3}
{'type': 'loss', 'content': 0.04016583785414696, 'timestamp': '2025-10-02 01:03:49.471381', 'step': 29329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:49.533178', 'step': 29329, 'epoch': 3}
{'type': 'loss', 'content': 0.020538920536637306, 'timestamp': '2025-10-02 01:03:49.542516', 'step': 29330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:49.597620', 'step': 29330, 'epoch': 3}
{'type': 'loss', 'content': 0.024188769981265068, 'timestamp': '2025-10-02 01:03:49.601222', 'step': 29331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:03:49.675676', 'step': 29331, 'epoch': 3}
{'type': 'loss', 'content': 0.04068625345826149, 'timestamp': '2025-10-02 01:03:49.689909', 'step': 29332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:49.746494', 'step': 29332, 'epoch': 3}
{'type': 'loss', 'content': 0.04129723459482193, 'timestamp': '2025-10-02 01:03:49.749864', 'step': 29333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:49.807133', 'step': 29333, 'epoch': 3}
{'type': 'loss', 'content': 0.023539476096630096, 'timestamp': '2025-10-02 01:03:49.809642', 'step': 29334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:49.865921', 'step': 29334, 'epoch': 3}
{'type': 'loss', 'content': 0.1077701672911644, 'timestamp': '2025-10-02 01:03:49.869003', 'step': 29335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:49.927409', 'step': 29335, 'epoch': 3}
{'type': 'loss', 'content': 0.024892384186387062, 'timestamp': '2025-10-02 01:03:49.933374', 'step': 29336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:49.990944', 'step': 29336, 'epoch': 3}
{'type': 'loss', 'content': 0.018107721582055092, 'timestamp': '2025-10-02 01:03:49.997042', 'step': 29337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:50.052673', 'step': 29337, 'epoch': 3}
{'type': 'loss', 'content': 0.1514928936958313, 'timestamp': '2025-10-02 01:03:50.056139', 'step': 29338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:50.110903', 'step': 29338, 'epoch': 3}
{'type': 'loss', 'content': 0.1325155645608902, 'timestamp': '2025-10-02 01:03:50.114536', 'step': 29339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:50.177188', 'step': 29339, 'epoch': 3}
{'type': 'loss', 'content': 0.07862206548452377, 'timestamp': '2025-10-02 01:03:50.183859', 'step': 29340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:50.237589', 'step': 29340, 'epoch': 3}
{'type': 'loss', 'content': 0.11229223012924194, 'timestamp': '2025-10-02 01:03:50.242307', 'step': 29341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:50.298097', 'step': 29341, 'epoch': 3}
{'type': 'loss', 'content': 0.03545151278376579, 'timestamp': '2025-10-02 01:03:50.302219', 'step': 29342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:50.362312', 'step': 29342, 'epoch': 3}
{'type': 'loss', 'content': 0.03820382431149483, 'timestamp': '2025-10-02 01:03:50.366141', 'step': 29343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:50.421660', 'step': 29343, 'epoch': 3}
{'type': 'loss', 'content': 0.012598625384271145, 'timestamp': '2025-10-02 01:03:50.427976', 'step': 29344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:50.481604', 'step': 29344, 'epoch': 3}
{'type': 'loss', 'content': 0.07491548359394073, 'timestamp': '2025-10-02 01:03:50.486411', 'step': 29345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:50.543303', 'step': 29345, 'epoch': 3}
{'type': 'loss', 'content': 0.0034945926163345575, 'timestamp': '2025-10-02 01:03:50.551043', 'step': 29346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:50.605538', 'step': 29346, 'epoch': 3}
{'type': 'loss', 'content': 0.034965455532073975, 'timestamp': '2025-10-02 01:03:50.608141', 'step': 29347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:50.661734', 'step': 29347, 'epoch': 3}
{'type': 'loss', 'content': 0.07388213276863098, 'timestamp': '2025-10-02 01:03:50.667625', 'step': 29348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:03:50.735084', 'step': 29348, 'epoch': 3}
{'type': 'loss', 'content': 0.01970502734184265, 'timestamp': '2025-10-02 01:03:50.748046', 'step': 29349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:50.803656', 'step': 29349, 'epoch': 3}
{'type': 'loss', 'content': 0.009038777090609074, 'timestamp': '2025-10-02 01:03:50.809632', 'step': 29350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:50.871185', 'step': 29350, 'epoch': 3}
{'type': 'loss', 'content': 0.03854304179549217, 'timestamp': '2025-10-02 01:03:50.873858', 'step': 29351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:50.929919', 'step': 29351, 'epoch': 3}
{'type': 'loss', 'content': 0.004916096571832895, 'timestamp': '2025-10-02 01:03:50.936694', 'step': 29352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:50.993943', 'step': 29352, 'epoch': 3}
{'type': 'loss', 'content': 0.04081437364220619, 'timestamp': '2025-10-02 01:03:50.999923', 'step': 29353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:51.056410', 'step': 29353, 'epoch': 3}
{'type': 'loss', 'content': 0.03867756947875023, 'timestamp': '2025-10-02 01:03:51.058998', 'step': 29354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:51.114052', 'step': 29354, 'epoch': 3}
{'type': 'loss', 'content': 0.03980126976966858, 'timestamp': '2025-10-02 01:03:51.119859', 'step': 29355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:51.176380', 'step': 29355, 'epoch': 3}
{'type': 'loss', 'content': 0.01605663076043129, 'timestamp': '2025-10-02 01:03:51.182652', 'step': 29356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:51.237336', 'step': 29356, 'epoch': 3}
{'type': 'loss', 'content': 0.01815495267510414, 'timestamp': '2025-10-02 01:03:51.244914', 'step': 29357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:51.299603', 'step': 29357, 'epoch': 3}
{'type': 'loss', 'content': 0.06936384737491608, 'timestamp': '2025-10-02 01:03:51.302128', 'step': 29358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:51.356530', 'step': 29358, 'epoch': 3}
{'type': 'loss', 'content': 0.05244874209165573, 'timestamp': '2025-10-02 01:03:51.362897', 'step': 29359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:51.432169', 'step': 29359, 'epoch': 3}
{'type': 'loss', 'content': 0.04979629069566727, 'timestamp': '2025-10-02 01:03:51.442478', 'step': 29360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:51.497338', 'step': 29360, 'epoch': 3}
{'type': 'loss', 'content': 0.07933655381202698, 'timestamp': '2025-10-02 01:03:51.499786', 'step': 29361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:51.554863', 'step': 29361, 'epoch': 3}
{'type': 'loss', 'content': 0.01883760094642639, 'timestamp': '2025-10-02 01:03:51.557290', 'step': 29362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:51.612252', 'step': 29362, 'epoch': 3}
{'type': 'loss', 'content': 0.03162583336234093, 'timestamp': '2025-10-02 01:03:51.615087', 'step': 29363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:51.670859', 'step': 29363, 'epoch': 3}
{'type': 'loss', 'content': 0.047745879739522934, 'timestamp': '2025-10-02 01:03:51.677682', 'step': 29364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:51.733492', 'step': 29364, 'epoch': 3}
{'type': 'loss', 'content': 0.00023456464987248182, 'timestamp': '2025-10-02 01:03:51.743092', 'step': 29365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:03:51.815502', 'step': 29365, 'epoch': 3}
{'type': 'loss', 'content': 0.014125066809356213, 'timestamp': '2025-10-02 01:03:51.828092', 'step': 29366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:51.883093', 'step': 29366, 'epoch': 3}
{'type': 'loss', 'content': 0.049363378435373306, 'timestamp': '2025-10-02 01:03:51.885440', 'step': 29367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:51.940300', 'step': 29367, 'epoch': 3}
{'type': 'loss', 'content': 0.06504310667514801, 'timestamp': '2025-10-02 01:03:51.946699', 'step': 29368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:52.001520', 'step': 29368, 'epoch': 3}
{'type': 'loss', 'content': 0.09980465471744537, 'timestamp': '2025-10-02 01:03:52.004106', 'step': 29369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:52.058994', 'step': 29369, 'epoch': 3}
{'type': 'loss', 'content': 0.06467802822589874, 'timestamp': '2025-10-02 01:03:52.061793', 'step': 29370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:52.117658', 'step': 29370, 'epoch': 3}
{'type': 'loss', 'content': 0.015668125823140144, 'timestamp': '2025-10-02 01:03:52.120258', 'step': 29371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:52.175234', 'step': 29371, 'epoch': 3}
{'type': 'loss', 'content': 0.03277556225657463, 'timestamp': '2025-10-02 01:03:52.181196', 'step': 29372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:52.236235', 'step': 29372, 'epoch': 3}
{'type': 'loss', 'content': 0.037689026445150375, 'timestamp': '2025-10-02 01:03:52.238403', 'step': 29373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:52.293224', 'step': 29373, 'epoch': 3}
{'type': 'loss', 'content': 0.03613043949007988, 'timestamp': '2025-10-02 01:03:52.295563', 'step': 29374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:52.350244', 'step': 29374, 'epoch': 3}
{'type': 'loss', 'content': 0.04337708652019501, 'timestamp': '2025-10-02 01:03:52.353537', 'step': 29375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:52.409025', 'step': 29375, 'epoch': 3}
{'type': 'loss', 'content': 0.04159294068813324, 'timestamp': '2025-10-02 01:03:52.415909', 'step': 29376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:52.470802', 'step': 29376, 'epoch': 3}
{'type': 'loss', 'content': 0.08922117948532104, 'timestamp': '2025-10-02 01:03:52.473246', 'step': 29377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:52.528036', 'step': 29377, 'epoch': 3}
{'type': 'loss', 'content': 0.03279305621981621, 'timestamp': '2025-10-02 01:03:52.530653', 'step': 29378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:52.587514', 'step': 29378, 'epoch': 3}
{'type': 'loss', 'content': 0.028875064104795456, 'timestamp': '2025-10-02 01:03:52.589916', 'step': 29379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:52.644941', 'step': 29379, 'epoch': 3}
{'type': 'loss', 'content': 0.06668473035097122, 'timestamp': '2025-10-02 01:03:52.655086', 'step': 29380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:52.709402', 'step': 29380, 'epoch': 3}
{'type': 'loss', 'content': 0.037202268838882446, 'timestamp': '2025-10-02 01:03:52.711498', 'step': 29381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:52.767728', 'step': 29381, 'epoch': 3}
{'type': 'loss', 'content': 0.017993422225117683, 'timestamp': '2025-10-02 01:03:52.773625', 'step': 29382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:52.829760', 'step': 29382, 'epoch': 3}
{'type': 'loss', 'content': 0.01468435674905777, 'timestamp': '2025-10-02 01:03:52.835678', 'step': 29383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:52.890889', 'step': 29383, 'epoch': 3}
{'type': 'loss', 'content': 0.026278430595993996, 'timestamp': '2025-10-02 01:03:52.897745', 'step': 29384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:52.952818', 'step': 29384, 'epoch': 3}
{'type': 'loss', 'content': 0.03196866065263748, 'timestamp': '2025-10-02 01:03:52.962455', 'step': 29385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:53.018233', 'step': 29385, 'epoch': 3}
{'type': 'loss', 'content': 0.04811977595090866, 'timestamp': '2025-10-02 01:03:53.027611', 'step': 29386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:53.082960', 'step': 29386, 'epoch': 3}
{'type': 'loss', 'content': 0.06733720749616623, 'timestamp': '2025-10-02 01:03:53.090547', 'step': 29387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:53.146500', 'step': 29387, 'epoch': 3}
{'type': 'loss', 'content': 0.033850379288196564, 'timestamp': '2025-10-02 01:03:53.153295', 'step': 29388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:53.208289', 'step': 29388, 'epoch': 3}
{'type': 'loss', 'content': 0.04409853741526604, 'timestamp': '2025-10-02 01:03:53.216015', 'step': 29389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:53.271539', 'step': 29389, 'epoch': 3}
{'type': 'loss', 'content': 0.09944519400596619, 'timestamp': '2025-10-02 01:03:53.273525', 'step': 29390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:53.328870', 'step': 29390, 'epoch': 3}
{'type': 'loss', 'content': 0.0072452593594789505, 'timestamp': '2025-10-02 01:03:53.331550', 'step': 29391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:53.388026', 'step': 29391, 'epoch': 3}
{'type': 'loss', 'content': 0.05311230942606926, 'timestamp': '2025-10-02 01:03:53.394513', 'step': 29392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:53.449986', 'step': 29392, 'epoch': 3}
{'type': 'loss', 'content': 0.0036759551148861647, 'timestamp': '2025-10-02 01:03:53.460270', 'step': 29393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:53.515087', 'step': 29393, 'epoch': 3}
{'type': 'loss', 'content': 0.009215987287461758, 'timestamp': '2025-10-02 01:03:53.517910', 'step': 29394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:03:53.585826', 'step': 29394, 'epoch': 3}
{'type': 'loss', 'content': 0.05607874318957329, 'timestamp': '2025-10-02 01:03:53.597789', 'step': 29395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:53.652141', 'step': 29395, 'epoch': 3}
{'type': 'loss', 'content': 0.07392276078462601, 'timestamp': '2025-10-02 01:03:53.658153', 'step': 29396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:53.713655', 'step': 29396, 'epoch': 3}
{'type': 'loss', 'content': 0.12151196599006653, 'timestamp': '2025-10-02 01:03:53.715901', 'step': 29397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:53.770920', 'step': 29397, 'epoch': 3}
{'type': 'loss', 'content': 0.09125050157308578, 'timestamp': '2025-10-02 01:03:53.776904', 'step': 29398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:53.831580', 'step': 29398, 'epoch': 3}
{'type': 'loss', 'content': 0.01597464829683304, 'timestamp': '2025-10-02 01:03:53.833685', 'step': 29399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:53.887785', 'step': 29399, 'epoch': 3}
{'type': 'loss', 'content': 0.018700815737247467, 'timestamp': '2025-10-02 01:03:53.896086', 'step': 29400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:53.950454', 'step': 29400, 'epoch': 3}
{'type': 'loss', 'content': 0.03360580652952194, 'timestamp': '2025-10-02 01:03:53.956436', 'step': 29401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:03:54.019358', 'step': 29401, 'epoch': 3}
{'type': 'loss', 'content': 0.021676629781723022, 'timestamp': '2025-10-02 01:03:54.029812', 'step': 29402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:54.085219', 'step': 29402, 'epoch': 3}
{'type': 'loss', 'content': 0.01597234606742859, 'timestamp': '2025-10-02 01:03:54.092690', 'step': 29403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:54.149112', 'step': 29403, 'epoch': 3}
{'type': 'loss', 'content': 0.1606624573469162, 'timestamp': '2025-10-02 01:03:54.154879', 'step': 29404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:54.208899', 'step': 29404, 'epoch': 3}
{'type': 'loss', 'content': 0.07881758362054825, 'timestamp': '2025-10-02 01:03:54.210948', 'step': 29405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:54.264789', 'step': 29405, 'epoch': 3}
{'type': 'loss', 'content': 0.11003420501947403, 'timestamp': '2025-10-02 01:03:54.267235', 'step': 29406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:54.321368', 'step': 29406, 'epoch': 3}
{'type': 'loss', 'content': 0.013936308212578297, 'timestamp': '2025-10-02 01:03:54.323956', 'step': 29407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:54.379207', 'step': 29407, 'epoch': 3}
{'type': 'loss', 'content': 0.009586640633642673, 'timestamp': '2025-10-02 01:03:54.385276', 'step': 29408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:54.438915', 'step': 29408, 'epoch': 3}
{'type': 'loss', 'content': 0.03511183336377144, 'timestamp': '2025-10-02 01:03:54.441746', 'step': 29409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:54.497164', 'step': 29409, 'epoch': 3}
{'type': 'loss', 'content': 0.036426570266485214, 'timestamp': '2025-10-02 01:03:54.499784', 'step': 29410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:54.555880', 'step': 29410, 'epoch': 3}
{'type': 'loss', 'content': 0.015173071064054966, 'timestamp': '2025-10-02 01:03:54.558773', 'step': 29411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:54.616455', 'step': 29411, 'epoch': 3}
{'type': 'loss', 'content': 0.04707097262144089, 'timestamp': '2025-10-02 01:03:54.622112', 'step': 29412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:54.679893', 'step': 29412, 'epoch': 3}
{'type': 'loss', 'content': 0.007815969176590443, 'timestamp': '2025-10-02 01:03:54.690995', 'step': 29413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:54.746464', 'step': 29413, 'epoch': 3}
{'type': 'loss', 'content': 0.03667782247066498, 'timestamp': '2025-10-02 01:03:54.749297', 'step': 29414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:54.805121', 'step': 29414, 'epoch': 3}
{'type': 'loss', 'content': 0.023428115993738174, 'timestamp': '2025-10-02 01:03:54.812279', 'step': 29415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:54.867363', 'step': 29415, 'epoch': 3}
{'type': 'loss', 'content': 0.04877123236656189, 'timestamp': '2025-10-02 01:03:54.873417', 'step': 29416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:54.931252', 'step': 29416, 'epoch': 3}
{'type': 'loss', 'content': 0.0532105527818203, 'timestamp': '2025-10-02 01:03:54.942226', 'step': 29417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:54.997136', 'step': 29417, 'epoch': 3}
{'type': 'loss', 'content': 0.017243297770619392, 'timestamp': '2025-10-02 01:03:54.999803', 'step': 29418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:55.055745', 'step': 29418, 'epoch': 3}
{'type': 'loss', 'content': 0.09167764335870743, 'timestamp': '2025-10-02 01:03:55.058127', 'step': 29419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:55.113391', 'step': 29419, 'epoch': 3}
{'type': 'loss', 'content': 0.022156041115522385, 'timestamp': '2025-10-02 01:03:55.121709', 'step': 29420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:03:55.183152', 'step': 29420, 'epoch': 3}
{'type': 'loss', 'content': 0.04407793655991554, 'timestamp': '2025-10-02 01:03:55.194462', 'step': 29421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:55.249485', 'step': 29421, 'epoch': 3}
{'type': 'loss', 'content': 0.04342620447278023, 'timestamp': '2025-10-02 01:03:55.251904', 'step': 29422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:55.311639', 'step': 29422, 'epoch': 3}
{'type': 'loss', 'content': 0.014436429366469383, 'timestamp': '2025-10-02 01:03:55.321810', 'step': 29423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:55.376566', 'step': 29423, 'epoch': 3}
{'type': 'loss', 'content': 0.062242716550827026, 'timestamp': '2025-10-02 01:03:55.383026', 'step': 29424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:55.436978', 'step': 29424, 'epoch': 3}
{'type': 'loss', 'content': 0.028334878385066986, 'timestamp': '2025-10-02 01:03:55.447242', 'step': 29425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:55.501995', 'step': 29425, 'epoch': 3}
{'type': 'loss', 'content': 0.006787541788071394, 'timestamp': '2025-10-02 01:03:55.504374', 'step': 29426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:55.559678', 'step': 29426, 'epoch': 3}
{'type': 'loss', 'content': 0.06473995745182037, 'timestamp': '2025-10-02 01:03:55.562321', 'step': 29427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:55.617038', 'step': 29427, 'epoch': 3}
{'type': 'loss', 'content': 0.0671488419175148, 'timestamp': '2025-10-02 01:03:55.625266', 'step': 29428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:55.679454', 'step': 29428, 'epoch': 3}
{'type': 'loss', 'content': 0.04833417385816574, 'timestamp': '2025-10-02 01:03:55.682241', 'step': 29429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:55.736898', 'step': 29429, 'epoch': 3}
{'type': 'loss', 'content': 0.04079219326376915, 'timestamp': '2025-10-02 01:03:55.739361', 'step': 29430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:55.794339', 'step': 29430, 'epoch': 3}
{'type': 'loss', 'content': 0.004098794888705015, 'timestamp': '2025-10-02 01:03:55.803688', 'step': 29431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:55.858057', 'step': 29431, 'epoch': 3}
{'type': 'loss', 'content': 0.12279189378023148, 'timestamp': '2025-10-02 01:03:55.864427', 'step': 29432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:03:55.918814', 'step': 29432, 'epoch': 3}
{'type': 'loss', 'content': 0.167804554104805, 'timestamp': '2025-10-02 01:03:55.921315', 'step': 29433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:55.976328', 'step': 29433, 'epoch': 3}
{'type': 'loss', 'content': 0.051406510174274445, 'timestamp': '2025-10-02 01:03:55.979294', 'step': 29434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:03:56.040615', 'step': 29434, 'epoch': 3}
{'type': 'loss', 'content': 0.009430019184947014, 'timestamp': '2025-10-02 01:03:56.051093', 'step': 29435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:56.106418', 'step': 29435, 'epoch': 3}
{'type': 'loss', 'content': 0.0103581128641963, 'timestamp': '2025-10-02 01:03:56.114426', 'step': 29436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:56.172923', 'step': 29436, 'epoch': 3}
{'type': 'loss', 'content': 0.008131526410579681, 'timestamp': '2025-10-02 01:03:56.183275', 'step': 29437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:03:56.241850', 'step': 29437, 'epoch': 3}
{'type': 'loss', 'content': 0.10389486700296402, 'timestamp': '2025-10-02 01:03:56.252069', 'step': 29438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:56.306356', 'step': 29438, 'epoch': 3}
{'type': 'loss', 'content': 0.04191814735531807, 'timestamp': '2025-10-02 01:03:56.309049', 'step': 29439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:56.364068', 'step': 29439, 'epoch': 3}
{'type': 'loss', 'content': 0.05298256874084473, 'timestamp': '2025-10-02 01:03:56.374191', 'step': 29440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:56.428393', 'step': 29440, 'epoch': 3}
{'type': 'loss', 'content': 0.0346577949821949, 'timestamp': '2025-10-02 01:03:56.430808', 'step': 29441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:56.487410', 'step': 29441, 'epoch': 3}
{'type': 'loss', 'content': 0.001619228278286755, 'timestamp': '2025-10-02 01:03:56.496939', 'step': 29442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:56.554674', 'step': 29442, 'epoch': 3}
{'type': 'loss', 'content': 0.09464763849973679, 'timestamp': '2025-10-02 01:03:56.556669', 'step': 29443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:56.611259', 'step': 29443, 'epoch': 3}
{'type': 'loss', 'content': 0.058507222682237625, 'timestamp': '2025-10-02 01:03:56.619449', 'step': 29444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:56.674423', 'step': 29444, 'epoch': 3}
{'type': 'loss', 'content': 0.028409913182258606, 'timestamp': '2025-10-02 01:03:56.682028', 'step': 29445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:56.736164', 'step': 29445, 'epoch': 3}
{'type': 'loss', 'content': 0.014701472595334053, 'timestamp': '2025-10-02 01:03:56.738454', 'step': 29446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:56.793402', 'step': 29446, 'epoch': 3}
{'type': 'loss', 'content': 0.020999126136302948, 'timestamp': '2025-10-02 01:03:56.795832', 'step': 29447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:56.851440', 'step': 29447, 'epoch': 3}
{'type': 'loss', 'content': 0.025798194110393524, 'timestamp': '2025-10-02 01:03:56.857557', 'step': 29448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:56.912425', 'step': 29448, 'epoch': 3}
{'type': 'loss', 'content': 0.0010818942682817578, 'timestamp': '2025-10-02 01:03:56.915230', 'step': 29449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:03:56.978045', 'step': 29449, 'epoch': 3}
{'type': 'loss', 'content': 0.04471340775489807, 'timestamp': '2025-10-02 01:03:56.988904', 'step': 29450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:57.043209', 'step': 29450, 'epoch': 3}
{'type': 'loss', 'content': 0.105919748544693, 'timestamp': '2025-10-02 01:03:57.045458', 'step': 29451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:57.100730', 'step': 29451, 'epoch': 3}
{'type': 'loss', 'content': 0.004103517159819603, 'timestamp': '2025-10-02 01:03:57.109053', 'step': 29452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:57.163938', 'step': 29452, 'epoch': 3}
{'type': 'loss', 'content': 0.007761136628687382, 'timestamp': '2025-10-02 01:03:57.167145', 'step': 29453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:57.223523', 'step': 29453, 'epoch': 3}
{'type': 'loss', 'content': 0.03036840818822384, 'timestamp': '2025-10-02 01:03:57.226122', 'step': 29454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:57.282280', 'step': 29454, 'epoch': 3}
{'type': 'loss', 'content': 0.005271080415695906, 'timestamp': '2025-10-02 01:03:57.284947', 'step': 29455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:57.339450', 'step': 29455, 'epoch': 3}
{'type': 'loss', 'content': 0.021185094490647316, 'timestamp': '2025-10-02 01:03:57.345762', 'step': 29456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:57.400824', 'step': 29456, 'epoch': 3}
{'type': 'loss', 'content': 0.028516093268990517, 'timestamp': '2025-10-02 01:03:57.403579', 'step': 29457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:03:57.467319', 'step': 29457, 'epoch': 3}
{'type': 'loss', 'content': 0.018744278699159622, 'timestamp': '2025-10-02 01:03:57.478161', 'step': 29458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:03:57.534494', 'step': 29458, 'epoch': 3}
{'type': 'loss', 'content': 0.1479814499616623, 'timestamp': '2025-10-02 01:03:57.537200', 'step': 29459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:03:57.591632', 'step': 29459, 'epoch': 3}
{'type': 'loss', 'content': 0.04422278329730034, 'timestamp': '2025-10-02 01:03:57.598197', 'step': 29460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:57.652946', 'step': 29460, 'epoch': 3}
{'type': 'loss', 'content': 0.08017851412296295, 'timestamp': '2025-10-02 01:03:57.655447', 'step': 29461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:57.710153', 'step': 29461, 'epoch': 3}
{'type': 'loss', 'content': 0.03417646512389183, 'timestamp': '2025-10-02 01:03:57.713662', 'step': 29462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:03:57.770529', 'step': 29462, 'epoch': 3}
{'type': 'loss', 'content': 0.038612134754657745, 'timestamp': '2025-10-02 01:03:57.780081', 'step': 29463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:57.835885', 'step': 29463, 'epoch': 3}
{'type': 'loss', 'content': 0.03975249081850052, 'timestamp': '2025-10-02 01:03:57.842547', 'step': 29464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:03:57.897055', 'step': 29464, 'epoch': 3}
{'type': 'loss', 'content': 0.12761950492858887, 'timestamp': '2025-10-02 01:03:57.899593', 'step': 29465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:57.955343', 'step': 29465, 'epoch': 3}
{'type': 'loss', 'content': 0.030031047761440277, 'timestamp': '2025-10-02 01:03:57.963082', 'step': 29466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:58.018566', 'step': 29466, 'epoch': 3}
{'type': 'loss', 'content': 0.05745881795883179, 'timestamp': '2025-10-02 01:03:58.024496', 'step': 29467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:58.079158', 'step': 29467, 'epoch': 3}
{'type': 'loss', 'content': 0.061915960162878036, 'timestamp': '2025-10-02 01:03:58.085929', 'step': 29468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:03:58.140171', 'step': 29468, 'epoch': 3}
{'type': 'loss', 'content': 0.06418938934803009, 'timestamp': '2025-10-02 01:03:58.142712', 'step': 29469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:58.197920', 'step': 29469, 'epoch': 3}
{'type': 'loss', 'content': 0.03655993565917015, 'timestamp': '2025-10-02 01:03:58.200380', 'step': 29470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:58.255758', 'step': 29470, 'epoch': 3}
{'type': 'loss', 'content': 0.03272111713886261, 'timestamp': '2025-10-02 01:03:58.258383', 'step': 29471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:58.313385', 'step': 29471, 'epoch': 3}
{'type': 'loss', 'content': 0.03704993799328804, 'timestamp': '2025-10-02 01:03:58.319575', 'step': 29472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:58.375615', 'step': 29472, 'epoch': 3}
{'type': 'loss', 'content': 0.048331305384635925, 'timestamp': '2025-10-02 01:03:58.381371', 'step': 29473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:58.439798', 'step': 29473, 'epoch': 3}
{'type': 'loss', 'content': 0.06846161931753159, 'timestamp': '2025-10-02 01:03:58.443240', 'step': 29474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:58.502080', 'step': 29474, 'epoch': 3}
{'type': 'loss', 'content': 0.01728641986846924, 'timestamp': '2025-10-02 01:03:58.508314', 'step': 29475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:58.568809', 'step': 29475, 'epoch': 3}
{'type': 'loss', 'content': 0.008820820599794388, 'timestamp': '2025-10-02 01:03:58.576158', 'step': 29476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:58.635753', 'step': 29476, 'epoch': 3}
{'type': 'loss', 'content': 0.0051731145940721035, 'timestamp': '2025-10-02 01:03:58.641535', 'step': 29477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:03:58.702007', 'step': 29477, 'epoch': 3}
{'type': 'loss', 'content': 0.019311854615807533, 'timestamp': '2025-10-02 01:03:58.705992', 'step': 29478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:58.765073', 'step': 29478, 'epoch': 3}
{'type': 'loss', 'content': 0.06278762966394424, 'timestamp': '2025-10-02 01:03:58.774428', 'step': 29479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:58.832677', 'step': 29479, 'epoch': 3}
{'type': 'loss', 'content': 0.026337532326579094, 'timestamp': '2025-10-02 01:03:58.839899', 'step': 29480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:03:58.897112', 'step': 29480, 'epoch': 3}
{'type': 'loss', 'content': 0.0371856652200222, 'timestamp': '2025-10-02 01:03:58.899570', 'step': 29481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:58.957176', 'step': 29481, 'epoch': 3}
{'type': 'loss', 'content': 0.007690194994211197, 'timestamp': '2025-10-02 01:03:58.966523', 'step': 29482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:03:59.023621', 'step': 29482, 'epoch': 3}
{'type': 'loss', 'content': 0.08071086555719376, 'timestamp': '2025-10-02 01:03:59.027116', 'step': 29483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:59.087438', 'step': 29483, 'epoch': 3}
{'type': 'loss', 'content': 0.108790822327137, 'timestamp': '2025-10-02 01:03:59.094291', 'step': 29484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:03:59.164276', 'step': 29484, 'epoch': 3}
{'type': 'loss', 'content': 0.011521360836923122, 'timestamp': '2025-10-02 01:03:59.177655', 'step': 29485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:59.237050', 'step': 29485, 'epoch': 3}
{'type': 'loss', 'content': 0.06791894137859344, 'timestamp': '2025-10-02 01:03:59.240105', 'step': 29486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:03:59.306047', 'step': 29486, 'epoch': 3}
{'type': 'loss', 'content': 0.009360474534332752, 'timestamp': '2025-10-02 01:03:59.316534', 'step': 29487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:03:59.373554', 'step': 29487, 'epoch': 3}
{'type': 'loss', 'content': 0.04890937730669975, 'timestamp': '2025-10-02 01:03:59.380064', 'step': 29488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:59.435371', 'step': 29488, 'epoch': 3}
{'type': 'loss', 'content': 0.029668500646948814, 'timestamp': '2025-10-02 01:03:59.438277', 'step': 29489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:59.492653', 'step': 29489, 'epoch': 3}
{'type': 'loss', 'content': 0.0875718891620636, 'timestamp': '2025-10-02 01:03:59.495177', 'step': 29490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:03:59.549346', 'step': 29490, 'epoch': 3}
{'type': 'loss', 'content': 0.027418499812483788, 'timestamp': '2025-10-02 01:03:59.558687', 'step': 29491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:03:59.614131', 'step': 29491, 'epoch': 3}
{'type': 'loss', 'content': 0.0025773823726922274, 'timestamp': '2025-10-02 01:03:59.620413', 'step': 29492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:03:59.675555', 'step': 29492, 'epoch': 3}
{'type': 'loss', 'content': 0.02415149100124836, 'timestamp': '2025-10-02 01:03:59.683125', 'step': 29493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:03:59.738091', 'step': 29493, 'epoch': 3}
{'type': 'loss', 'content': 0.04518478363752365, 'timestamp': '2025-10-02 01:03:59.740537', 'step': 29494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:03:59.808679', 'step': 29494, 'epoch': 3}
{'type': 'loss', 'content': 0.009249030612409115, 'timestamp': '2025-10-02 01:03:59.814611', 'step': 29495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:59.869793', 'step': 29495, 'epoch': 3}
{'type': 'loss', 'content': 0.11546637862920761, 'timestamp': '2025-10-02 01:03:59.876282', 'step': 29496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:03:59.930672', 'step': 29496, 'epoch': 3}
{'type': 'loss', 'content': 0.045561015605926514, 'timestamp': '2025-10-02 01:03:59.933219', 'step': 29497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:03:59.988492', 'step': 29497, 'epoch': 3}
{'type': 'loss', 'content': 0.009963033720850945, 'timestamp': '2025-10-02 01:03:59.991275', 'step': 29498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:00.046515', 'step': 29498, 'epoch': 3}
{'type': 'loss', 'content': 0.055932898074388504, 'timestamp': '2025-10-02 01:04:00.049452', 'step': 29499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:00.105219', 'step': 29499, 'epoch': 3}
{'type': 'loss', 'content': 0.03892931714653969, 'timestamp': '2025-10-02 01:04:00.111883', 'step': 29500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 29500', 'timestamp': '2025-10-02 01:04:00.531980', 'step': 29500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:00.586004', 'step': 29500, 'epoch': 3}
{'type': 'loss', 'content': 0.09709787368774414, 'timestamp': '2025-10-02 01:04:00.588727', 'step': 29501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:00.644854', 'step': 29501, 'epoch': 3}
{'type': 'loss', 'content': 0.009042014367878437, 'timestamp': '2025-10-02 01:04:00.652243', 'step': 29502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:00.707610', 'step': 29502, 'epoch': 3}
{'type': 'loss', 'content': 0.05029299110174179, 'timestamp': '2025-10-02 01:04:00.710050', 'step': 29503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:00.765056', 'step': 29503, 'epoch': 3}
{'type': 'loss', 'content': 0.09604841470718384, 'timestamp': '2025-10-02 01:04:00.771853', 'step': 29504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:00.827839', 'step': 29504, 'epoch': 3}
{'type': 'loss', 'content': 0.02887973003089428, 'timestamp': '2025-10-02 01:04:00.830540', 'step': 29505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:00.886022', 'step': 29505, 'epoch': 3}
{'type': 'loss', 'content': 0.0012893055099993944, 'timestamp': '2025-10-02 01:04:00.895234', 'step': 29506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:00.952664', 'step': 29506, 'epoch': 3}
{'type': 'loss', 'content': 0.009042861871421337, 'timestamp': '2025-10-02 01:04:00.956112', 'step': 29507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:01.011843', 'step': 29507, 'epoch': 3}
{'type': 'loss', 'content': 0.014098349027335644, 'timestamp': '2025-10-02 01:04:01.017953', 'step': 29508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:01.072476', 'step': 29508, 'epoch': 3}
{'type': 'loss', 'content': 0.15713703632354736, 'timestamp': '2025-10-02 01:04:01.075211', 'step': 29509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:01.135429', 'step': 29509, 'epoch': 3}
{'type': 'loss', 'content': 0.007849184796214104, 'timestamp': '2025-10-02 01:04:01.145567', 'step': 29510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:01.209367', 'step': 29510, 'epoch': 3}
{'type': 'loss', 'content': 0.015864092856645584, 'timestamp': '2025-10-02 01:04:01.215036', 'step': 29511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:01.281172', 'step': 29511, 'epoch': 3}
{'type': 'loss', 'content': 0.01487429253757, 'timestamp': '2025-10-02 01:04:01.289127', 'step': 29512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:01.344965', 'step': 29512, 'epoch': 3}
{'type': 'loss', 'content': 0.018407676368951797, 'timestamp': '2025-10-02 01:04:01.350900', 'step': 29513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:01.406617', 'step': 29513, 'epoch': 3}
{'type': 'loss', 'content': 0.019048094749450684, 'timestamp': '2025-10-02 01:04:01.412699', 'step': 29514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:01.468280', 'step': 29514, 'epoch': 3}
{'type': 'loss', 'content': 0.00917360745370388, 'timestamp': '2025-10-02 01:04:01.471752', 'step': 29515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:01.527330', 'step': 29515, 'epoch': 3}
{'type': 'loss', 'content': 0.04827427491545677, 'timestamp': '2025-10-02 01:04:01.534612', 'step': 29516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:01.590436', 'step': 29516, 'epoch': 3}
{'type': 'loss', 'content': 0.04533287510275841, 'timestamp': '2025-10-02 01:04:01.593363', 'step': 29517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:01.649055', 'step': 29517, 'epoch': 3}
{'type': 'loss', 'content': 0.026359975337982178, 'timestamp': '2025-10-02 01:04:01.655192', 'step': 29518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:01.711006', 'step': 29518, 'epoch': 3}
{'type': 'loss', 'content': 0.05418557673692703, 'timestamp': '2025-10-02 01:04:01.713772', 'step': 29519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:01.769206', 'step': 29519, 'epoch': 3}
{'type': 'loss', 'content': 0.052939414978027344, 'timestamp': '2025-10-02 01:04:01.779317', 'step': 29520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:01.834478', 'step': 29520, 'epoch': 3}
{'type': 'loss', 'content': 0.04674755036830902, 'timestamp': '2025-10-02 01:04:01.841989', 'step': 29521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:04:01.905677', 'step': 29521, 'epoch': 3}
{'type': 'loss', 'content': 0.00046968142851255834, 'timestamp': '2025-10-02 01:04:01.916547', 'step': 29522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:01.971662', 'step': 29522, 'epoch': 3}
{'type': 'loss', 'content': 0.017596431076526642, 'timestamp': '2025-10-02 01:04:01.974575', 'step': 29523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:02.029686', 'step': 29523, 'epoch': 3}
{'type': 'loss', 'content': 0.040999606251716614, 'timestamp': '2025-10-02 01:04:02.035704', 'step': 29524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:02.090771', 'step': 29524, 'epoch': 3}
{'type': 'loss', 'content': 0.0239702295511961, 'timestamp': '2025-10-02 01:04:02.093221', 'step': 29525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:02.152610', 'step': 29525, 'epoch': 3}
{'type': 'loss', 'content': 0.009822223335504532, 'timestamp': '2025-10-02 01:04:02.162756', 'step': 29526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:02.220611', 'step': 29526, 'epoch': 3}
{'type': 'loss', 'content': 0.024975968524813652, 'timestamp': '2025-10-02 01:04:02.223411', 'step': 29527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:02.279046', 'step': 29527, 'epoch': 3}
{'type': 'loss', 'content': 0.046974699944257736, 'timestamp': '2025-10-02 01:04:02.285691', 'step': 29528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:04:02.339899', 'step': 29528, 'epoch': 3}
{'type': 'loss', 'content': 0.009018225595355034, 'timestamp': '2025-10-02 01:04:02.342628', 'step': 29529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:02.399069', 'step': 29529, 'epoch': 3}
{'type': 'loss', 'content': 0.011663619428873062, 'timestamp': '2025-10-02 01:04:02.408607', 'step': 29530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:02.464621', 'step': 29530, 'epoch': 3}
{'type': 'loss', 'content': 0.011769046075642109, 'timestamp': '2025-10-02 01:04:02.466917', 'step': 29531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:02.520761', 'step': 29531, 'epoch': 3}
{'type': 'loss', 'content': 0.042424436658620834, 'timestamp': '2025-10-02 01:04:02.528809', 'step': 29532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:02.584245', 'step': 29532, 'epoch': 3}
{'type': 'loss', 'content': 0.023196591064333916, 'timestamp': '2025-10-02 01:04:02.587250', 'step': 29533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:02.641925', 'step': 29533, 'epoch': 3}
{'type': 'loss', 'content': 0.09181138873100281, 'timestamp': '2025-10-02 01:04:02.644988', 'step': 29534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:02.699373', 'step': 29534, 'epoch': 3}
{'type': 'loss', 'content': 0.04378467798233032, 'timestamp': '2025-10-02 01:04:02.701921', 'step': 29535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:02.756787', 'step': 29535, 'epoch': 3}
{'type': 'loss', 'content': 0.08009692281484604, 'timestamp': '2025-10-02 01:04:02.763151', 'step': 29536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:02.816833', 'step': 29536, 'epoch': 3}
{'type': 'loss', 'content': 0.033662132918834686, 'timestamp': '2025-10-02 01:04:02.823086', 'step': 29537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:02.877966', 'step': 29537, 'epoch': 3}
{'type': 'loss', 'content': 0.03247697651386261, 'timestamp': '2025-10-02 01:04:02.880675', 'step': 29538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:02.935861', 'step': 29538, 'epoch': 3}
{'type': 'loss', 'content': 0.050406381487846375, 'timestamp': '2025-10-02 01:04:02.938392', 'step': 29539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:02.993223', 'step': 29539, 'epoch': 3}
{'type': 'loss', 'content': 0.048452869057655334, 'timestamp': '2025-10-02 01:04:02.999315', 'step': 29540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:03.055680', 'step': 29540, 'epoch': 3}
{'type': 'loss', 'content': 0.043499987572431564, 'timestamp': '2025-10-02 01:04:03.058023', 'step': 29541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:03.129080', 'step': 29541, 'epoch': 3}
{'type': 'loss', 'content': 0.0031499098986387253, 'timestamp': '2025-10-02 01:04:03.139226', 'step': 29542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:03.194238', 'step': 29542, 'epoch': 3}
{'type': 'loss', 'content': 0.09621736407279968, 'timestamp': '2025-10-02 01:04:03.196919', 'step': 29543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:03.250925', 'step': 29543, 'epoch': 3}
{'type': 'loss', 'content': 0.09787631779909134, 'timestamp': '2025-10-02 01:04:03.257303', 'step': 29544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:03.310780', 'step': 29544, 'epoch': 3}
{'type': 'loss', 'content': 0.041483547538518906, 'timestamp': '2025-10-02 01:04:03.313462', 'step': 29545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:03.373662', 'step': 29545, 'epoch': 3}
{'type': 'loss', 'content': 0.009647506289184093, 'timestamp': '2025-10-02 01:04:03.383862', 'step': 29546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:03.447513', 'step': 29546, 'epoch': 3}
{'type': 'loss', 'content': 0.012430977076292038, 'timestamp': '2025-10-02 01:04:03.458155', 'step': 29547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:03.516301', 'step': 29547, 'epoch': 3}
{'type': 'loss', 'content': 0.035711731761693954, 'timestamp': '2025-10-02 01:04:03.522220', 'step': 29548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:03.576591', 'step': 29548, 'epoch': 3}
{'type': 'loss', 'content': 0.07588273286819458, 'timestamp': '2025-10-02 01:04:03.579021', 'step': 29549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:03.633367', 'step': 29549, 'epoch': 3}
{'type': 'loss', 'content': 0.0971866175532341, 'timestamp': '2025-10-02 01:04:03.636306', 'step': 29550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:03.692301', 'step': 29550, 'epoch': 3}
{'type': 'loss', 'content': 0.01046450063586235, 'timestamp': '2025-10-02 01:04:03.701845', 'step': 29551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:03.758298', 'step': 29551, 'epoch': 3}
{'type': 'loss', 'content': 0.018556080758571625, 'timestamp': '2025-10-02 01:04:03.768625', 'step': 29552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:03.823142', 'step': 29552, 'epoch': 3}
{'type': 'loss', 'content': 0.011530593037605286, 'timestamp': '2025-10-02 01:04:03.825458', 'step': 29553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:03.879224', 'step': 29553, 'epoch': 3}
{'type': 'loss', 'content': 0.08711536973714828, 'timestamp': '2025-10-02 01:04:03.881823', 'step': 29554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:03.937119', 'step': 29554, 'epoch': 3}
{'type': 'loss', 'content': 0.019335763528943062, 'timestamp': '2025-10-02 01:04:03.939728', 'step': 29555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:04.002499', 'step': 29555, 'epoch': 3}
{'type': 'loss', 'content': 0.028052156791090965, 'timestamp': '2025-10-02 01:04:04.013901', 'step': 29556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:04.068652', 'step': 29556, 'epoch': 3}
{'type': 'loss', 'content': 0.07146266102790833, 'timestamp': '2025-10-02 01:04:04.071728', 'step': 29557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:04.128601', 'step': 29557, 'epoch': 3}
{'type': 'loss', 'content': 0.04810133948922157, 'timestamp': '2025-10-02 01:04:04.131420', 'step': 29558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:04.186615', 'step': 29558, 'epoch': 3}
{'type': 'loss', 'content': 0.03577499836683273, 'timestamp': '2025-10-02 01:04:04.188841', 'step': 29559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:04.243867', 'step': 29559, 'epoch': 3}
{'type': 'loss', 'content': 0.03844841942191124, 'timestamp': '2025-10-02 01:04:04.250080', 'step': 29560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:04.304220', 'step': 29560, 'epoch': 3}
{'type': 'loss', 'content': 0.05340474098920822, 'timestamp': '2025-10-02 01:04:04.306802', 'step': 29561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:04.368600', 'step': 29561, 'epoch': 3}
{'type': 'loss', 'content': 0.009608977474272251, 'timestamp': '2025-10-02 01:04:04.379233', 'step': 29562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:04.433749', 'step': 29562, 'epoch': 3}
{'type': 'loss', 'content': 0.10436692088842392, 'timestamp': '2025-10-02 01:04:04.436589', 'step': 29563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:04.491771', 'step': 29563, 'epoch': 3}
{'type': 'loss', 'content': 0.0762178972363472, 'timestamp': '2025-10-02 01:04:04.497857', 'step': 29564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:04.551653', 'step': 29564, 'epoch': 3}
{'type': 'loss', 'content': 0.07290733605623245, 'timestamp': '2025-10-02 01:04:04.554286', 'step': 29565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:04.609663', 'step': 29565, 'epoch': 3}
{'type': 'loss', 'content': 0.057122401893138885, 'timestamp': '2025-10-02 01:04:04.612009', 'step': 29566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:04.666458', 'step': 29566, 'epoch': 3}
{'type': 'loss', 'content': 0.015438570640981197, 'timestamp': '2025-10-02 01:04:04.672238', 'step': 29567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:04.727457', 'step': 29567, 'epoch': 3}
{'type': 'loss', 'content': 0.07721342891454697, 'timestamp': '2025-10-02 01:04:04.733510', 'step': 29568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:04.787224', 'step': 29568, 'epoch': 3}
{'type': 'loss', 'content': 0.09966292232275009, 'timestamp': '2025-10-02 01:04:04.790558', 'step': 29569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:04.846778', 'step': 29569, 'epoch': 3}
{'type': 'loss', 'content': 0.017903437837958336, 'timestamp': '2025-10-02 01:04:04.856350', 'step': 29570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:04.911520', 'step': 29570, 'epoch': 3}
{'type': 'loss', 'content': 0.023958902806043625, 'timestamp': '2025-10-02 01:04:04.920828', 'step': 29571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:04.975791', 'step': 29571, 'epoch': 3}
{'type': 'loss', 'content': 0.04277229309082031, 'timestamp': '2025-10-02 01:04:04.981911', 'step': 29572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:04:05.043653', 'step': 29572, 'epoch': 3}
{'type': 'loss', 'content': 0.010728048160672188, 'timestamp': '2025-10-02 01:04:05.055425', 'step': 29573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:04:05.117528', 'step': 29573, 'epoch': 3}
{'type': 'loss', 'content': 0.07248823344707489, 'timestamp': '2025-10-02 01:04:05.127990', 'step': 29574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:05.183259', 'step': 29574, 'epoch': 3}
{'type': 'loss', 'content': 0.08919910341501236, 'timestamp': '2025-10-02 01:04:05.185687', 'step': 29575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:05.241149', 'step': 29575, 'epoch': 3}
{'type': 'loss', 'content': 0.01908893696963787, 'timestamp': '2025-10-02 01:04:05.247148', 'step': 29576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:05.300184', 'step': 29576, 'epoch': 3}
{'type': 'loss', 'content': 0.0396769642829895, 'timestamp': '2025-10-02 01:04:05.302638', 'step': 29577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:05.357211', 'step': 29577, 'epoch': 3}
{'type': 'loss', 'content': 0.04548316076397896, 'timestamp': '2025-10-02 01:04:05.359823', 'step': 29578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:05.414825', 'step': 29578, 'epoch': 3}
{'type': 'loss', 'content': 0.04181547090411186, 'timestamp': '2025-10-02 01:04:05.417440', 'step': 29579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:05.472004', 'step': 29579, 'epoch': 3}
{'type': 'loss', 'content': 0.0647306740283966, 'timestamp': '2025-10-02 01:04:05.478658', 'step': 29580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:05.532832', 'step': 29580, 'epoch': 3}
{'type': 'loss', 'content': 0.028243524953722954, 'timestamp': '2025-10-02 01:04:05.536520', 'step': 29581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:05.592390', 'step': 29581, 'epoch': 3}
{'type': 'loss', 'content': 0.04536311328411102, 'timestamp': '2025-10-02 01:04:05.594889', 'step': 29582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:05.649758', 'step': 29582, 'epoch': 3}
{'type': 'loss', 'content': 0.04314086586236954, 'timestamp': '2025-10-02 01:04:05.659044', 'step': 29583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:05.721717', 'step': 29583, 'epoch': 3}
{'type': 'loss', 'content': 0.05198228731751442, 'timestamp': '2025-10-02 01:04:05.733088', 'step': 29584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:04:05.794599', 'step': 29584, 'epoch': 3}
{'type': 'loss', 'content': 0.03771474212408066, 'timestamp': '2025-10-02 01:04:05.805961', 'step': 29585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:05.860861', 'step': 29585, 'epoch': 3}
{'type': 'loss', 'content': 0.056695181876420975, 'timestamp': '2025-10-02 01:04:05.863406', 'step': 29586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:05.919325', 'step': 29586, 'epoch': 3}
{'type': 'loss', 'content': 0.005477606318891048, 'timestamp': '2025-10-02 01:04:05.921883', 'step': 29587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:05.976953', 'step': 29587, 'epoch': 3}
{'type': 'loss', 'content': 0.05180983617901802, 'timestamp': '2025-10-02 01:04:05.985388', 'step': 29588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:06.039176', 'step': 29588, 'epoch': 3}
{'type': 'loss', 'content': 0.04817173629999161, 'timestamp': '2025-10-02 01:04:06.041763', 'step': 29589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:06.096818', 'step': 29589, 'epoch': 3}
{'type': 'loss', 'content': 0.03713317960500717, 'timestamp': '2025-10-02 01:04:06.099460', 'step': 29590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:06.155729', 'step': 29590, 'epoch': 3}
{'type': 'loss', 'content': 0.05266626551747322, 'timestamp': '2025-10-02 01:04:06.165269', 'step': 29591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:04:06.234514', 'step': 29591, 'epoch': 3}
{'type': 'loss', 'content': 0.01003813836723566, 'timestamp': '2025-10-02 01:04:06.247249', 'step': 29592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:06.302936', 'step': 29592, 'epoch': 3}
{'type': 'loss', 'content': 0.002940128557384014, 'timestamp': '2025-10-02 01:04:06.305996', 'step': 29593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:06.361267', 'step': 29593, 'epoch': 3}
{'type': 'loss', 'content': 0.029291098937392235, 'timestamp': '2025-10-02 01:04:06.363689', 'step': 29594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:06.420245', 'step': 29594, 'epoch': 3}
{'type': 'loss', 'content': 0.015843892470002174, 'timestamp': '2025-10-02 01:04:06.422837', 'step': 29595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:06.476992', 'step': 29595, 'epoch': 3}
{'type': 'loss', 'content': 0.016231952235102654, 'timestamp': '2025-10-02 01:04:06.483012', 'step': 29596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:06.536881', 'step': 29596, 'epoch': 3}
{'type': 'loss', 'content': 0.01783178187906742, 'timestamp': '2025-10-02 01:04:06.540335', 'step': 29597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:06.594390', 'step': 29597, 'epoch': 3}
{'type': 'loss', 'content': 0.057647742331027985, 'timestamp': '2025-10-02 01:04:06.597367', 'step': 29598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:06.651774', 'step': 29598, 'epoch': 3}
{'type': 'loss', 'content': 0.03145918622612953, 'timestamp': '2025-10-02 01:04:06.657797', 'step': 29599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:04:06.715539', 'step': 29599, 'epoch': 3}
{'type': 'loss', 'content': 0.05447160080075264, 'timestamp': '2025-10-02 01:04:06.722086', 'step': 29600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:06.781546', 'step': 29600, 'epoch': 3}
{'type': 'loss', 'content': 0.018855784088373184, 'timestamp': '2025-10-02 01:04:06.792509', 'step': 29601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:06.847183', 'step': 29601, 'epoch': 3}
{'type': 'loss', 'content': 0.06900518387556076, 'timestamp': '2025-10-02 01:04:06.853169', 'step': 29602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:06.908382', 'step': 29602, 'epoch': 3}
{'type': 'loss', 'content': 0.11202529817819595, 'timestamp': '2025-10-02 01:04:06.910733', 'step': 29603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:06.965311', 'step': 29603, 'epoch': 3}
{'type': 'loss', 'content': 0.03489292040467262, 'timestamp': '2025-10-02 01:04:06.971388', 'step': 29604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:07.025653', 'step': 29604, 'epoch': 3}
{'type': 'loss', 'content': 0.017015447840094566, 'timestamp': '2025-10-02 01:04:07.031617', 'step': 29605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:07.088047', 'step': 29605, 'epoch': 3}
{'type': 'loss', 'content': 0.015347283333539963, 'timestamp': '2025-10-02 01:04:07.095600', 'step': 29606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:07.150934', 'step': 29606, 'epoch': 3}
{'type': 'loss', 'content': 0.04228975251317024, 'timestamp': '2025-10-02 01:04:07.153384', 'step': 29607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:07.208189', 'step': 29607, 'epoch': 3}
{'type': 'loss', 'content': 0.04288338124752045, 'timestamp': '2025-10-02 01:04:07.216697', 'step': 29608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:07.271012', 'step': 29608, 'epoch': 3}
{'type': 'loss', 'content': 0.033750977367162704, 'timestamp': '2025-10-02 01:04:07.273683', 'step': 29609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:07.328425', 'step': 29609, 'epoch': 3}
{'type': 'loss', 'content': 0.04533839970827103, 'timestamp': '2025-10-02 01:04:07.331934', 'step': 29610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:07.390597', 'step': 29610, 'epoch': 3}
{'type': 'loss', 'content': 0.012705306522548199, 'timestamp': '2025-10-02 01:04:07.396720', 'step': 29611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:07.455056', 'step': 29611, 'epoch': 3}
{'type': 'loss', 'content': 0.011232242919504642, 'timestamp': '2025-10-02 01:04:07.464199', 'step': 29612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:07.524650', 'step': 29612, 'epoch': 3}
{'type': 'loss', 'content': 0.006797707639634609, 'timestamp': '2025-10-02 01:04:07.528488', 'step': 29613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:07.584961', 'step': 29613, 'epoch': 3}
{'type': 'loss', 'content': 0.08079791069030762, 'timestamp': '2025-10-02 01:04:07.590637', 'step': 29614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:07.647534', 'step': 29614, 'epoch': 3}
{'type': 'loss', 'content': 0.019224068149924278, 'timestamp': '2025-10-02 01:04:07.650250', 'step': 29615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:07.705807', 'step': 29615, 'epoch': 3}
{'type': 'loss', 'content': 0.07068590819835663, 'timestamp': '2025-10-02 01:04:07.712794', 'step': 29616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:07.769715', 'step': 29616, 'epoch': 3}
{'type': 'loss', 'content': 0.029052559286355972, 'timestamp': '2025-10-02 01:04:07.773477', 'step': 29617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:04:07.843124', 'step': 29617, 'epoch': 3}
{'type': 'loss', 'content': 0.04679236561059952, 'timestamp': '2025-10-02 01:04:07.855076', 'step': 29618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:07.913034', 'step': 29618, 'epoch': 3}
{'type': 'loss', 'content': 0.031184164807200432, 'timestamp': '2025-10-02 01:04:07.922400', 'step': 29619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:07.980502', 'step': 29619, 'epoch': 3}
{'type': 'loss', 'content': 0.05264050140976906, 'timestamp': '2025-10-02 01:04:07.990783', 'step': 29620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:04:08.053954', 'step': 29620, 'epoch': 3}
{'type': 'loss', 'content': 0.02405490167438984, 'timestamp': '2025-10-02 01:04:08.065344', 'step': 29621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:08.121320', 'step': 29621, 'epoch': 3}
{'type': 'loss', 'content': 0.055684249848127365, 'timestamp': '2025-10-02 01:04:08.126184', 'step': 29622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:08.184571', 'step': 29622, 'epoch': 3}
{'type': 'loss', 'content': 0.07564939558506012, 'timestamp': '2025-10-02 01:04:08.187920', 'step': 29623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:08.244445', 'step': 29623, 'epoch': 3}
{'type': 'loss', 'content': 0.010466287843883038, 'timestamp': '2025-10-02 01:04:08.251222', 'step': 29624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:04:08.307251', 'step': 29624, 'epoch': 3}
{'type': 'loss', 'content': 0.06788841634988785, 'timestamp': '2025-10-02 01:04:08.310051', 'step': 29625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:08.366751', 'step': 29625, 'epoch': 3}
{'type': 'loss', 'content': 0.04070693254470825, 'timestamp': '2025-10-02 01:04:08.369397', 'step': 29626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:08.428050', 'step': 29626, 'epoch': 3}
{'type': 'loss', 'content': 0.013282577507197857, 'timestamp': '2025-10-02 01:04:08.437384', 'step': 29627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:08.493877', 'step': 29627, 'epoch': 3}
{'type': 'loss', 'content': 0.047095589339733124, 'timestamp': '2025-10-02 01:04:08.502109', 'step': 29628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:08.557309', 'step': 29628, 'epoch': 3}
{'type': 'loss', 'content': 0.046865444630384445, 'timestamp': '2025-10-02 01:04:08.561893', 'step': 29629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:08.618926', 'step': 29629, 'epoch': 3}
{'type': 'loss', 'content': 0.008824005722999573, 'timestamp': '2025-10-02 01:04:08.622477', 'step': 29630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:08.678905', 'step': 29630, 'epoch': 3}
{'type': 'loss', 'content': 0.06493061780929565, 'timestamp': '2025-10-02 01:04:08.682354', 'step': 29631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:08.738832', 'step': 29631, 'epoch': 3}
{'type': 'loss', 'content': 0.047232020646333694, 'timestamp': '2025-10-02 01:04:08.747875', 'step': 29632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:08.811060', 'step': 29632, 'epoch': 3}
{'type': 'loss', 'content': 0.048319507390260696, 'timestamp': '2025-10-02 01:04:08.822558', 'step': 29633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 01:04:08.901094', 'step': 29633, 'epoch': 3}
{'type': 'loss', 'content': 0.0345597118139267, 'timestamp': '2025-10-02 01:04:08.914758', 'step': 29634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:08.973056', 'step': 29634, 'epoch': 3}
{'type': 'loss', 'content': 0.028067108243703842, 'timestamp': '2025-10-02 01:04:08.976048', 'step': 29635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:09.031921', 'step': 29635, 'epoch': 3}
{'type': 'loss', 'content': 0.008843174204230309, 'timestamp': '2025-10-02 01:04:09.038295', 'step': 29636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:09.094950', 'step': 29636, 'epoch': 3}
{'type': 'loss', 'content': 0.12271731346845627, 'timestamp': '2025-10-02 01:04:09.098487', 'step': 29637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:09.164959', 'step': 29637, 'epoch': 3}
{'type': 'loss', 'content': 0.008381476625800133, 'timestamp': '2025-10-02 01:04:09.175653', 'step': 29638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:09.237004', 'step': 29638, 'epoch': 3}
{'type': 'loss', 'content': 0.0017893178155645728, 'timestamp': '2025-10-02 01:04:09.240780', 'step': 29639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:09.296941', 'step': 29639, 'epoch': 3}
{'type': 'loss', 'content': 0.08149315416812897, 'timestamp': '2025-10-02 01:04:09.304057', 'step': 29640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:09.361499', 'step': 29640, 'epoch': 3}
{'type': 'loss', 'content': 0.050691694021224976, 'timestamp': '2025-10-02 01:04:09.364772', 'step': 29641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:09.420076', 'step': 29641, 'epoch': 3}
{'type': 'loss', 'content': 0.06435300409793854, 'timestamp': '2025-10-02 01:04:09.424045', 'step': 29642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:09.482909', 'step': 29642, 'epoch': 3}
{'type': 'loss', 'content': 0.03423076868057251, 'timestamp': '2025-10-02 01:04:09.496799', 'step': 29643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:09.551336', 'step': 29643, 'epoch': 3}
{'type': 'loss', 'content': 0.028927991166710854, 'timestamp': '2025-10-02 01:04:09.557754', 'step': 29644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:09.611549', 'step': 29644, 'epoch': 3}
{'type': 'loss', 'content': 0.031380776315927505, 'timestamp': '2025-10-02 01:04:09.614690', 'step': 29645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:09.676975', 'step': 29645, 'epoch': 3}
{'type': 'loss', 'content': 0.004558364395052195, 'timestamp': '2025-10-02 01:04:09.687177', 'step': 29646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:04:09.760867', 'step': 29646, 'epoch': 3}
{'type': 'loss', 'content': 0.023106427863240242, 'timestamp': '2025-10-02 01:04:09.773194', 'step': 29647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:09.830247', 'step': 29647, 'epoch': 3}
{'type': 'loss', 'content': 0.03789568692445755, 'timestamp': '2025-10-02 01:04:09.836351', 'step': 29648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:09.890360', 'step': 29648, 'epoch': 3}
{'type': 'loss', 'content': 0.018915066495537758, 'timestamp': '2025-10-02 01:04:09.893408', 'step': 29649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:09.948631', 'step': 29649, 'epoch': 3}
{'type': 'loss', 'content': 0.013037839904427528, 'timestamp': '2025-10-02 01:04:09.951002', 'step': 29650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:10.005383', 'step': 29650, 'epoch': 3}
{'type': 'loss', 'content': 0.03895349055528641, 'timestamp': '2025-10-02 01:04:10.007903', 'step': 29651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:04:10.061503', 'step': 29651, 'epoch': 3}
{'type': 'loss', 'content': 0.028643546625971794, 'timestamp': '2025-10-02 01:04:10.067431', 'step': 29652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:10.121532', 'step': 29652, 'epoch': 3}
{'type': 'loss', 'content': 0.01716640591621399, 'timestamp': '2025-10-02 01:04:10.123958', 'step': 29653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:10.177604', 'step': 29653, 'epoch': 3}
{'type': 'loss', 'content': 0.10208833962678909, 'timestamp': '2025-10-02 01:04:10.181717', 'step': 29654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:10.237187', 'step': 29654, 'epoch': 3}
{'type': 'loss', 'content': 0.002637038240209222, 'timestamp': '2025-10-02 01:04:10.243165', 'step': 29655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:10.298944', 'step': 29655, 'epoch': 3}
{'type': 'loss', 'content': 0.03874743729829788, 'timestamp': '2025-10-02 01:04:10.309262', 'step': 29656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:10.363771', 'step': 29656, 'epoch': 3}
{'type': 'loss', 'content': 0.01064529549330473, 'timestamp': '2025-10-02 01:04:10.366317', 'step': 29657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:10.433684', 'step': 29657, 'epoch': 3}
{'type': 'loss', 'content': 0.03444143012166023, 'timestamp': '2025-10-02 01:04:10.448740', 'step': 29658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:10.530540', 'step': 29658, 'epoch': 3}
{'type': 'loss', 'content': 0.06890738010406494, 'timestamp': '2025-10-02 01:04:10.545000', 'step': 29659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:10.618699', 'step': 29659, 'epoch': 3}
{'type': 'loss', 'content': 0.049604687839746475, 'timestamp': '2025-10-02 01:04:10.633603', 'step': 29660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:10.714915', 'step': 29660, 'epoch': 3}
{'type': 'loss', 'content': 0.02510322444140911, 'timestamp': '2025-10-02 01:04:10.739175', 'step': 29661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:10.834232', 'step': 29661, 'epoch': 3}
{'type': 'loss', 'content': 0.025676056742668152, 'timestamp': '2025-10-02 01:04:10.843896', 'step': 29662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:10.927531', 'step': 29662, 'epoch': 3}
{'type': 'loss', 'content': 0.009332447312772274, 'timestamp': '2025-10-02 01:04:10.937839', 'step': 29663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:04:11.029577', 'step': 29663, 'epoch': 3}
{'type': 'loss', 'content': 0.046112239360809326, 'timestamp': '2025-10-02 01:04:11.042139', 'step': 29664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:11.111501', 'step': 29664, 'epoch': 3}
{'type': 'loss', 'content': 0.01756947860121727, 'timestamp': '2025-10-02 01:04:11.144038', 'step': 29665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:11.232878', 'step': 29665, 'epoch': 3}
{'type': 'loss', 'content': 0.018159864470362663, 'timestamp': '2025-10-02 01:04:11.244566', 'step': 29666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:11.332003', 'step': 29666, 'epoch': 3}
{'type': 'loss', 'content': 0.011129738762974739, 'timestamp': '2025-10-02 01:04:11.351580', 'step': 29667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:11.443435', 'step': 29667, 'epoch': 3}
{'type': 'loss', 'content': 0.08426480740308762, 'timestamp': '2025-10-02 01:04:11.455609', 'step': 29668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:11.550741', 'step': 29668, 'epoch': 3}
{'type': 'loss', 'content': 0.007123157382011414, 'timestamp': '2025-10-02 01:04:11.557648', 'step': 29669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:11.650806', 'step': 29669, 'epoch': 3}
{'type': 'loss', 'content': 0.03774028643965721, 'timestamp': '2025-10-02 01:04:11.670164', 'step': 29670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:11.726285', 'step': 29670, 'epoch': 3}
{'type': 'loss', 'content': 0.02370283380150795, 'timestamp': '2025-10-02 01:04:11.735312', 'step': 29671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:11.794684', 'step': 29671, 'epoch': 3}
{'type': 'loss', 'content': 0.01878541149199009, 'timestamp': '2025-10-02 01:04:11.805660', 'step': 29672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:11.860375', 'step': 29672, 'epoch': 3}
{'type': 'loss', 'content': 0.09725610166788101, 'timestamp': '2025-10-02 01:04:11.870788', 'step': 29673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:11.925999', 'step': 29673, 'epoch': 3}
{'type': 'loss', 'content': 0.09815187007188797, 'timestamp': '2025-10-02 01:04:11.929068', 'step': 29674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:04:11.992659', 'step': 29674, 'epoch': 3}
{'type': 'loss', 'content': 0.008190738037228584, 'timestamp': '2025-10-02 01:04:12.003118', 'step': 29675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:12.058362', 'step': 29675, 'epoch': 3}
{'type': 'loss', 'content': 0.033326659351587296, 'timestamp': '2025-10-02 01:04:12.064551', 'step': 29676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:12.118195', 'step': 29676, 'epoch': 3}
{'type': 'loss', 'content': 0.015337992459535599, 'timestamp': '2025-10-02 01:04:12.120816', 'step': 29677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:12.176844', 'step': 29677, 'epoch': 3}
{'type': 'loss', 'content': 0.004550815559923649, 'timestamp': '2025-10-02 01:04:12.179748', 'step': 29678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:12.236051', 'step': 29678, 'epoch': 3}
{'type': 'loss', 'content': 0.0049698506481945515, 'timestamp': '2025-10-02 01:04:12.239478', 'step': 29679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:12.294258', 'step': 29679, 'epoch': 3}
{'type': 'loss', 'content': 0.04250794276595116, 'timestamp': '2025-10-02 01:04:12.311556', 'step': 29680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:12.365065', 'step': 29680, 'epoch': 3}
{'type': 'loss', 'content': 0.07465970516204834, 'timestamp': '2025-10-02 01:04:12.367845', 'step': 29681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:12.422675', 'step': 29681, 'epoch': 3}
{'type': 'loss', 'content': 0.04060926288366318, 'timestamp': '2025-10-02 01:04:12.428561', 'step': 29682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:12.484983', 'step': 29682, 'epoch': 3}
{'type': 'loss', 'content': 0.03128552436828613, 'timestamp': '2025-10-02 01:04:12.494338', 'step': 29683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:12.549304', 'step': 29683, 'epoch': 3}
{'type': 'loss', 'content': 0.0350489504635334, 'timestamp': '2025-10-02 01:04:12.555934', 'step': 29684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:04:12.618653', 'step': 29684, 'epoch': 3}
{'type': 'loss', 'content': 0.10556218028068542, 'timestamp': '2025-10-02 01:04:12.629979', 'step': 29685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:12.685993', 'step': 29685, 'epoch': 3}
{'type': 'loss', 'content': 0.037376463413238525, 'timestamp': '2025-10-02 01:04:12.688395', 'step': 29686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:12.747442', 'step': 29686, 'epoch': 3}
{'type': 'loss', 'content': 0.006818835623562336, 'timestamp': '2025-10-02 01:04:12.757754', 'step': 29687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:12.814020', 'step': 29687, 'epoch': 3}
{'type': 'loss', 'content': 0.06158631667494774, 'timestamp': '2025-10-02 01:04:12.820449', 'step': 29688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:12.874306', 'step': 29688, 'epoch': 3}
{'type': 'loss', 'content': 0.019682178273797035, 'timestamp': '2025-10-02 01:04:12.881922', 'step': 29689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:12.937007', 'step': 29689, 'epoch': 3}
{'type': 'loss', 'content': 0.035682376474142075, 'timestamp': '2025-10-02 01:04:12.939566', 'step': 29690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:12.994666', 'step': 29690, 'epoch': 3}
{'type': 'loss', 'content': 0.07381869852542877, 'timestamp': '2025-10-02 01:04:12.997719', 'step': 29691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:13.052978', 'step': 29691, 'epoch': 3}
{'type': 'loss', 'content': 0.046093493700027466, 'timestamp': '2025-10-02 01:04:13.061236', 'step': 29692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:13.114808', 'step': 29692, 'epoch': 3}
{'type': 'loss', 'content': 0.07921328395605087, 'timestamp': '2025-10-02 01:04:13.117014', 'step': 29693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:13.171074', 'step': 29693, 'epoch': 3}
{'type': 'loss', 'content': 0.10782711207866669, 'timestamp': '2025-10-02 01:04:13.173295', 'step': 29694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:13.228015', 'step': 29694, 'epoch': 3}
{'type': 'loss', 'content': 0.08019820600748062, 'timestamp': '2025-10-02 01:04:13.230691', 'step': 29695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:13.286105', 'step': 29695, 'epoch': 3}
{'type': 'loss', 'content': 0.03013012930750847, 'timestamp': '2025-10-02 01:04:13.296177', 'step': 29696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:13.351074', 'step': 29696, 'epoch': 3}
{'type': 'loss', 'content': 0.02607104368507862, 'timestamp': '2025-10-02 01:04:13.353382', 'step': 29697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:13.416461', 'step': 29697, 'epoch': 3}
{'type': 'loss', 'content': 0.053902767598629, 'timestamp': '2025-10-02 01:04:13.427201', 'step': 29698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:13.481557', 'step': 29698, 'epoch': 3}
{'type': 'loss', 'content': 0.04831009730696678, 'timestamp': '2025-10-02 01:04:13.484566', 'step': 29699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:13.538982', 'step': 29699, 'epoch': 3}
{'type': 'loss', 'content': 0.011420009657740593, 'timestamp': '2025-10-02 01:04:13.545354', 'step': 29700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:13.599713', 'step': 29700, 'epoch': 3}
{'type': 'loss', 'content': 0.1241692304611206, 'timestamp': '2025-10-02 01:04:13.601668', 'step': 29701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:13.656276', 'step': 29701, 'epoch': 3}
{'type': 'loss', 'content': 0.031509798020124435, 'timestamp': '2025-10-02 01:04:13.658829', 'step': 29702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:13.713933', 'step': 29702, 'epoch': 3}
{'type': 'loss', 'content': 0.0916774570941925, 'timestamp': '2025-10-02 01:04:13.716291', 'step': 29703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:13.770330', 'step': 29703, 'epoch': 3}
{'type': 'loss', 'content': 0.05851386487483978, 'timestamp': '2025-10-02 01:04:13.775986', 'step': 29704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:13.830154', 'step': 29704, 'epoch': 3}
{'type': 'loss', 'content': 0.05757136642932892, 'timestamp': '2025-10-02 01:04:13.837689', 'step': 29705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:13.891197', 'step': 29705, 'epoch': 3}
{'type': 'loss', 'content': 0.09865971654653549, 'timestamp': '2025-10-02 01:04:13.894032', 'step': 29706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:13.953416', 'step': 29706, 'epoch': 3}
{'type': 'loss', 'content': 0.011077802628278732, 'timestamp': '2025-10-02 01:04:13.963588', 'step': 29707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:14.018728', 'step': 29707, 'epoch': 3}
{'type': 'loss', 'content': 0.01566551811993122, 'timestamp': '2025-10-02 01:04:14.024694', 'step': 29708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:14.078854', 'step': 29708, 'epoch': 3}
{'type': 'loss', 'content': 0.0318286307156086, 'timestamp': '2025-10-02 01:04:14.081306', 'step': 29709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:14.136603', 'step': 29709, 'epoch': 3}
{'type': 'loss', 'content': 0.011154938489198685, 'timestamp': '2025-10-02 01:04:14.146153', 'step': 29710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:14.200507', 'step': 29710, 'epoch': 3}
{'type': 'loss', 'content': 0.01135927066206932, 'timestamp': '2025-10-02 01:04:14.202669', 'step': 29711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:14.256504', 'step': 29711, 'epoch': 3}
{'type': 'loss', 'content': 0.05986492708325386, 'timestamp': '2025-10-02 01:04:14.263253', 'step': 29712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:14.319356', 'step': 29712, 'epoch': 3}
{'type': 'loss', 'content': 0.015366564504802227, 'timestamp': '2025-10-02 01:04:14.321553', 'step': 29713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:14.376660', 'step': 29713, 'epoch': 3}
{'type': 'loss', 'content': 0.011641434393823147, 'timestamp': '2025-10-02 01:04:14.385962', 'step': 29714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:14.441645', 'step': 29714, 'epoch': 3}
{'type': 'loss', 'content': 0.08059731870889664, 'timestamp': '2025-10-02 01:04:14.444252', 'step': 29715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:14.500294', 'step': 29715, 'epoch': 3}
{'type': 'loss', 'content': 0.15804815292358398, 'timestamp': '2025-10-02 01:04:14.506347', 'step': 29716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:14.560886', 'step': 29716, 'epoch': 3}
{'type': 'loss', 'content': 0.026187287643551826, 'timestamp': '2025-10-02 01:04:14.563399', 'step': 29717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:14.620621', 'step': 29717, 'epoch': 3}
{'type': 'loss', 'content': 0.04801127314567566, 'timestamp': '2025-10-02 01:04:14.622883', 'step': 29718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:14.677691', 'step': 29718, 'epoch': 3}
{'type': 'loss', 'content': 0.06930938363075256, 'timestamp': '2025-10-02 01:04:14.680127', 'step': 29719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:14.736415', 'step': 29719, 'epoch': 3}
{'type': 'loss', 'content': 0.0013264714507386088, 'timestamp': '2025-10-02 01:04:14.742235', 'step': 29720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:14.795931', 'step': 29720, 'epoch': 3}
{'type': 'loss', 'content': 0.07815852761268616, 'timestamp': '2025-10-02 01:04:14.798376', 'step': 29721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:14.853394', 'step': 29721, 'epoch': 3}
{'type': 'loss', 'content': 0.08084619790315628, 'timestamp': '2025-10-02 01:04:14.862702', 'step': 29722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:14.923799', 'step': 29722, 'epoch': 3}
{'type': 'loss', 'content': 0.0026804208755493164, 'timestamp': '2025-10-02 01:04:14.934094', 'step': 29723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:14.988928', 'step': 29723, 'epoch': 3}
{'type': 'loss', 'content': 0.05178896710276604, 'timestamp': '2025-10-02 01:04:14.995395', 'step': 29724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:15.049844', 'step': 29724, 'epoch': 3}
{'type': 'loss', 'content': 0.04133957624435425, 'timestamp': '2025-10-02 01:04:15.052168', 'step': 29725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:15.106887', 'step': 29725, 'epoch': 3}
{'type': 'loss', 'content': 0.000938260112889111, 'timestamp': '2025-10-02 01:04:15.112891', 'step': 29726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:15.167615', 'step': 29726, 'epoch': 3}
{'type': 'loss', 'content': 0.012136108241975307, 'timestamp': '2025-10-02 01:04:15.173614', 'step': 29727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:15.228519', 'step': 29727, 'epoch': 3}
{'type': 'loss', 'content': 0.031484976410865784, 'timestamp': '2025-10-02 01:04:15.235146', 'step': 29728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:15.293058', 'step': 29728, 'epoch': 3}
{'type': 'loss', 'content': 0.0060841296799480915, 'timestamp': '2025-10-02 01:04:15.304031', 'step': 29729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:15.360017', 'step': 29729, 'epoch': 3}
{'type': 'loss', 'content': 0.032361458986997604, 'timestamp': '2025-10-02 01:04:15.369504', 'step': 29730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:15.424622', 'step': 29730, 'epoch': 3}
{'type': 'loss', 'content': 0.09079845994710922, 'timestamp': '2025-10-02 01:04:15.426928', 'step': 29731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:15.482511', 'step': 29731, 'epoch': 3}
{'type': 'loss', 'content': 0.03169635683298111, 'timestamp': '2025-10-02 01:04:15.489046', 'step': 29732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:15.543095', 'step': 29732, 'epoch': 3}
{'type': 'loss', 'content': 0.10694273561239243, 'timestamp': '2025-10-02 01:04:15.546410', 'step': 29733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:15.602692', 'step': 29733, 'epoch': 3}
{'type': 'loss', 'content': 0.035838983952999115, 'timestamp': '2025-10-02 01:04:15.612187', 'step': 29734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:15.666454', 'step': 29734, 'epoch': 3}
{'type': 'loss', 'content': 0.09355618804693222, 'timestamp': '2025-10-02 01:04:15.668962', 'step': 29735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:15.723837', 'step': 29735, 'epoch': 3}
{'type': 'loss', 'content': 0.057588376104831696, 'timestamp': '2025-10-02 01:04:15.730360', 'step': 29736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:15.784476', 'step': 29736, 'epoch': 3}
{'type': 'loss', 'content': 0.12266259640455246, 'timestamp': '2025-10-02 01:04:15.786639', 'step': 29737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:15.841687', 'step': 29737, 'epoch': 3}
{'type': 'loss', 'content': 0.07513472437858582, 'timestamp': '2025-10-02 01:04:15.847507', 'step': 29738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:15.902191', 'step': 29738, 'epoch': 3}
{'type': 'loss', 'content': 0.05338452383875847, 'timestamp': '2025-10-02 01:04:15.904980', 'step': 29739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:04:15.967742', 'step': 29739, 'epoch': 3}
{'type': 'loss', 'content': 0.007089284248650074, 'timestamp': '2025-10-02 01:04:15.979204', 'step': 29740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:16.034369', 'step': 29740, 'epoch': 3}
{'type': 'loss', 'content': 0.11800035089254379, 'timestamp': '2025-10-02 01:04:16.037315', 'step': 29741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:16.091079', 'step': 29741, 'epoch': 3}
{'type': 'loss', 'content': 0.11497682332992554, 'timestamp': '2025-10-02 01:04:16.093919', 'step': 29742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:16.149327', 'step': 29742, 'epoch': 3}
{'type': 'loss', 'content': 0.031400520354509354, 'timestamp': '2025-10-02 01:04:16.151788', 'step': 29743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:16.211245', 'step': 29743, 'epoch': 3}
{'type': 'loss', 'content': 0.0680851861834526, 'timestamp': '2025-10-02 01:04:16.217172', 'step': 29744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:16.272658', 'step': 29744, 'epoch': 3}
{'type': 'loss', 'content': 0.009073937311768532, 'timestamp': '2025-10-02 01:04:16.275457', 'step': 29745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:16.330169', 'step': 29745, 'epoch': 3}
{'type': 'loss', 'content': 0.001797712524421513, 'timestamp': '2025-10-02 01:04:16.337621', 'step': 29746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:16.394807', 'step': 29746, 'epoch': 3}
{'type': 'loss', 'content': 0.002297138562425971, 'timestamp': '2025-10-02 01:04:16.404447', 'step': 29747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:16.459447', 'step': 29747, 'epoch': 3}
{'type': 'loss', 'content': 0.016655445098876953, 'timestamp': '2025-10-02 01:04:16.465386', 'step': 29748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:16.519676', 'step': 29748, 'epoch': 3}
{'type': 'loss', 'content': 0.05796850100159645, 'timestamp': '2025-10-02 01:04:16.522009', 'step': 29749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:16.576108', 'step': 29749, 'epoch': 3}
{'type': 'loss', 'content': 0.044336333870887756, 'timestamp': '2025-10-02 01:04:16.578096', 'step': 29750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:16.633360', 'step': 29750, 'epoch': 3}
{'type': 'loss', 'content': 0.08125248551368713, 'timestamp': '2025-10-02 01:04:16.635721', 'step': 29751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:16.691009', 'step': 29751, 'epoch': 3}
{'type': 'loss', 'content': 0.033209431916475296, 'timestamp': '2025-10-02 01:04:16.697437', 'step': 29752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:16.753295', 'step': 29752, 'epoch': 3}
{'type': 'loss', 'content': 0.01903623715043068, 'timestamp': '2025-10-02 01:04:16.760910', 'step': 29753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:16.818506', 'step': 29753, 'epoch': 3}
{'type': 'loss', 'content': 0.05399109795689583, 'timestamp': '2025-10-02 01:04:16.821124', 'step': 29754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:16.877834', 'step': 29754, 'epoch': 3}
{'type': 'loss', 'content': 0.01807030290365219, 'timestamp': '2025-10-02 01:04:16.887133', 'step': 29755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:16.944507', 'step': 29755, 'epoch': 3}
{'type': 'loss', 'content': 0.047203630208969116, 'timestamp': '2025-10-02 01:04:16.951453', 'step': 29756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:17.008265', 'step': 29756, 'epoch': 3}
{'type': 'loss', 'content': 0.0259354617446661, 'timestamp': '2025-10-02 01:04:17.011856', 'step': 29757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:17.075708', 'step': 29757, 'epoch': 3}
{'type': 'loss', 'content': 0.060021888464689255, 'timestamp': '2025-10-02 01:04:17.086346', 'step': 29758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:17.144978', 'step': 29758, 'epoch': 3}
{'type': 'loss', 'content': 0.07590055465698242, 'timestamp': '2025-10-02 01:04:17.147346', 'step': 29759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:17.203072', 'step': 29759, 'epoch': 3}
{'type': 'loss', 'content': 0.06284738332033157, 'timestamp': '2025-10-02 01:04:17.209220', 'step': 29760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:17.270231', 'step': 29760, 'epoch': 3}
{'type': 'loss', 'content': 0.03249210864305496, 'timestamp': '2025-10-02 01:04:17.281151', 'step': 29761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:17.337885', 'step': 29761, 'epoch': 3}
{'type': 'loss', 'content': 0.005733510944992304, 'timestamp': '2025-10-02 01:04:17.345090', 'step': 29762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:04:17.421113', 'step': 29762, 'epoch': 3}
{'type': 'loss', 'content': 0.017511844635009766, 'timestamp': '2025-10-02 01:04:17.433826', 'step': 29763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:17.494505', 'step': 29763, 'epoch': 3}
{'type': 'loss', 'content': 0.08319935202598572, 'timestamp': '2025-10-02 01:04:17.501319', 'step': 29764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:17.557597', 'step': 29764, 'epoch': 3}
{'type': 'loss', 'content': 0.02581554464995861, 'timestamp': '2025-10-02 01:04:17.560565', 'step': 29765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:17.623388', 'step': 29765, 'epoch': 3}
{'type': 'loss', 'content': 0.01805947534739971, 'timestamp': '2025-10-02 01:04:17.633684', 'step': 29766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:04:17.699694', 'step': 29766, 'epoch': 3}
{'type': 'loss', 'content': 0.04568736255168915, 'timestamp': '2025-10-02 01:04:17.710508', 'step': 29767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:17.767800', 'step': 29767, 'epoch': 3}
{'type': 'loss', 'content': 0.021791532635688782, 'timestamp': '2025-10-02 01:04:17.773909', 'step': 29768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:17.830727', 'step': 29768, 'epoch': 3}
{'type': 'loss', 'content': 0.023393884301185608, 'timestamp': '2025-10-02 01:04:17.838229', 'step': 29769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:17.894628', 'step': 29769, 'epoch': 3}
{'type': 'loss', 'content': 0.02010761946439743, 'timestamp': '2025-10-02 01:04:17.897359', 'step': 29770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:17.952737', 'step': 29770, 'epoch': 3}
{'type': 'loss', 'content': 0.0400485098361969, 'timestamp': '2025-10-02 01:04:17.960139', 'step': 29771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:04:18.034390', 'step': 29771, 'epoch': 3}
{'type': 'loss', 'content': 0.02739192172884941, 'timestamp': '2025-10-02 01:04:18.048378', 'step': 29772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:18.104913', 'step': 29772, 'epoch': 3}
{'type': 'loss', 'content': 0.045587994158267975, 'timestamp': '2025-10-02 01:04:18.107339', 'step': 29773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:18.161826', 'step': 29773, 'epoch': 3}
{'type': 'loss', 'content': 0.03216620534658432, 'timestamp': '2025-10-02 01:04:18.164577', 'step': 29774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:18.219049', 'step': 29774, 'epoch': 3}
{'type': 'loss', 'content': 0.031069982796907425, 'timestamp': '2025-10-02 01:04:18.221476', 'step': 29775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:18.276531', 'step': 29775, 'epoch': 3}
{'type': 'loss', 'content': 0.047844551503658295, 'timestamp': '2025-10-02 01:04:18.286975', 'step': 29776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:18.340285', 'step': 29776, 'epoch': 3}
{'type': 'loss', 'content': 0.05953380838036537, 'timestamp': '2025-10-02 01:04:18.342661', 'step': 29777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:04:18.398736', 'step': 29777, 'epoch': 3}
{'type': 'loss', 'content': 0.08728678524494171, 'timestamp': '2025-10-02 01:04:18.401128', 'step': 29778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:18.455288', 'step': 29778, 'epoch': 3}
{'type': 'loss', 'content': 0.011728719808161259, 'timestamp': '2025-10-02 01:04:18.461103', 'step': 29779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:18.516462', 'step': 29779, 'epoch': 3}
{'type': 'loss', 'content': 0.08129335194826126, 'timestamp': '2025-10-02 01:04:18.522488', 'step': 29780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:18.576805', 'step': 29780, 'epoch': 3}
{'type': 'loss', 'content': 0.026643136516213417, 'timestamp': '2025-10-02 01:04:18.586455', 'step': 29781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:18.642438', 'step': 29781, 'epoch': 3}
{'type': 'loss', 'content': 0.01957688294351101, 'timestamp': '2025-10-02 01:04:18.644531', 'step': 29782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:18.700005', 'step': 29782, 'epoch': 3}
{'type': 'loss', 'content': 0.021805739030241966, 'timestamp': '2025-10-02 01:04:18.705811', 'step': 29783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:18.761176', 'step': 29783, 'epoch': 3}
{'type': 'loss', 'content': 0.06892837584018707, 'timestamp': '2025-10-02 01:04:18.771469', 'step': 29784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:04:18.843242', 'step': 29784, 'epoch': 3}
{'type': 'loss', 'content': 0.01033223606646061, 'timestamp': '2025-10-02 01:04:18.857755', 'step': 29785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:04:18.926855', 'step': 29785, 'epoch': 3}
{'type': 'loss', 'content': 0.005893225781619549, 'timestamp': '2025-10-02 01:04:18.938834', 'step': 29786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:18.995326', 'step': 29786, 'epoch': 3}
{'type': 'loss', 'content': 0.08314226567745209, 'timestamp': '2025-10-02 01:04:19.004863', 'step': 29787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:19.059430', 'step': 29787, 'epoch': 3}
{'type': 'loss', 'content': 0.09330688416957855, 'timestamp': '2025-10-02 01:04:19.065952', 'step': 29788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:19.120830', 'step': 29788, 'epoch': 3}
{'type': 'loss', 'content': 0.022968994453549385, 'timestamp': '2025-10-02 01:04:19.123282', 'step': 29789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:19.177776', 'step': 29789, 'epoch': 3}
{'type': 'loss', 'content': 0.11324834823608398, 'timestamp': '2025-10-02 01:04:19.180392', 'step': 29790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:19.235920', 'step': 29790, 'epoch': 3}
{'type': 'loss', 'content': 0.0524514801800251, 'timestamp': '2025-10-02 01:04:19.238547', 'step': 29791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:19.293452', 'step': 29791, 'epoch': 3}
{'type': 'loss', 'content': 0.042070675641298294, 'timestamp': '2025-10-02 01:04:19.299669', 'step': 29792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:19.353656', 'step': 29792, 'epoch': 3}
{'type': 'loss', 'content': 0.023468373343348503, 'timestamp': '2025-10-02 01:04:19.361402', 'step': 29793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:19.416800', 'step': 29793, 'epoch': 3}
{'type': 'loss', 'content': 0.05335249751806259, 'timestamp': '2025-10-02 01:04:19.419345', 'step': 29794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:19.474205', 'step': 29794, 'epoch': 3}
{'type': 'loss', 'content': 0.13591447472572327, 'timestamp': '2025-10-02 01:04:19.476696', 'step': 29795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:19.530502', 'step': 29795, 'epoch': 3}
{'type': 'loss', 'content': 0.025349214673042297, 'timestamp': '2025-10-02 01:04:19.536440', 'step': 29796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:19.591866', 'step': 29796, 'epoch': 3}
{'type': 'loss', 'content': 0.05553998798131943, 'timestamp': '2025-10-02 01:04:19.594417', 'step': 29797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:19.648188', 'step': 29797, 'epoch': 3}
{'type': 'loss', 'content': 0.09028910100460052, 'timestamp': '2025-10-02 01:04:19.650731', 'step': 29798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:19.704668', 'step': 29798, 'epoch': 3}
{'type': 'loss', 'content': 0.04840906336903572, 'timestamp': '2025-10-02 01:04:19.707628', 'step': 29799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:19.761625', 'step': 29799, 'epoch': 3}
{'type': 'loss', 'content': 0.09328979253768921, 'timestamp': '2025-10-02 01:04:19.767953', 'step': 29800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:19.824467', 'step': 29800, 'epoch': 3}
{'type': 'loss', 'content': 0.011888967826962471, 'timestamp': '2025-10-02 01:04:19.826873', 'step': 29801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:19.882500', 'step': 29801, 'epoch': 3}
{'type': 'loss', 'content': 0.06824381649494171, 'timestamp': '2025-10-02 01:04:19.890087', 'step': 29802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:19.944590', 'step': 29802, 'epoch': 3}
{'type': 'loss', 'content': 0.07318674772977829, 'timestamp': '2025-10-02 01:04:19.947175', 'step': 29803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:20.001405', 'step': 29803, 'epoch': 3}
{'type': 'loss', 'content': 0.07733088731765747, 'timestamp': '2025-10-02 01:04:20.007429', 'step': 29804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:20.060954', 'step': 29804, 'epoch': 3}
{'type': 'loss', 'content': 0.010956314392387867, 'timestamp': '2025-10-02 01:04:20.063564', 'step': 29805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:20.119512', 'step': 29805, 'epoch': 3}
{'type': 'loss', 'content': 0.025798052549362183, 'timestamp': '2025-10-02 01:04:20.125463', 'step': 29806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:20.181678', 'step': 29806, 'epoch': 3}
{'type': 'loss', 'content': 0.030678365379571915, 'timestamp': '2025-10-02 01:04:20.187556', 'step': 29807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:20.242788', 'step': 29807, 'epoch': 3}
{'type': 'loss', 'content': 0.04573633894324303, 'timestamp': '2025-10-02 01:04:20.249095', 'step': 29808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:04:20.323077', 'step': 29808, 'epoch': 3}
{'type': 'loss', 'content': 0.018063364550471306, 'timestamp': '2025-10-02 01:04:20.337776', 'step': 29809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:20.393260', 'step': 29809, 'epoch': 3}
{'type': 'loss', 'content': 0.04852015897631645, 'timestamp': '2025-10-02 01:04:20.395761', 'step': 29810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:20.451228', 'step': 29810, 'epoch': 3}
{'type': 'loss', 'content': 0.019097890704870224, 'timestamp': '2025-10-02 01:04:20.453827', 'step': 29811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:20.508258', 'step': 29811, 'epoch': 3}
{'type': 'loss', 'content': 0.2582566738128662, 'timestamp': '2025-10-02 01:04:20.514851', 'step': 29812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:20.570285', 'step': 29812, 'epoch': 3}
{'type': 'loss', 'content': 0.024741802364587784, 'timestamp': '2025-10-02 01:04:20.573377', 'step': 29813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:20.627773', 'step': 29813, 'epoch': 3}
{'type': 'loss', 'content': 0.02602744661271572, 'timestamp': '2025-10-02 01:04:20.630421', 'step': 29814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:04:20.701663', 'step': 29814, 'epoch': 3}
{'type': 'loss', 'content': 0.0014126674504950643, 'timestamp': '2025-10-02 01:04:20.714012', 'step': 29815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:20.769398', 'step': 29815, 'epoch': 3}
{'type': 'loss', 'content': 0.054095566272735596, 'timestamp': '2025-10-02 01:04:20.775431', 'step': 29816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:20.829602', 'step': 29816, 'epoch': 3}
{'type': 'loss', 'content': 0.04958480969071388, 'timestamp': '2025-10-02 01:04:20.832055', 'step': 29817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:20.885983', 'step': 29817, 'epoch': 3}
{'type': 'loss', 'content': 0.07966089993715286, 'timestamp': '2025-10-02 01:04:20.888509', 'step': 29818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:20.952461', 'step': 29818, 'epoch': 3}
{'type': 'loss', 'content': 0.0047937361523509026, 'timestamp': '2025-10-02 01:04:20.963109', 'step': 29819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:21.017848', 'step': 29819, 'epoch': 3}
{'type': 'loss', 'content': 0.015929343178868294, 'timestamp': '2025-10-02 01:04:21.023905', 'step': 29820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:21.080082', 'step': 29820, 'epoch': 3}
{'type': 'loss', 'content': 0.04075999930500984, 'timestamp': '2025-10-02 01:04:21.082725', 'step': 29821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:21.137248', 'step': 29821, 'epoch': 3}
{'type': 'loss', 'content': 0.08001942187547684, 'timestamp': '2025-10-02 01:04:21.139695', 'step': 29822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:21.195264', 'step': 29822, 'epoch': 3}
{'type': 'loss', 'content': 0.02574877254664898, 'timestamp': '2025-10-02 01:04:21.202669', 'step': 29823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:21.257063', 'step': 29823, 'epoch': 3}
{'type': 'loss', 'content': 0.1392442286014557, 'timestamp': '2025-10-02 01:04:21.263216', 'step': 29824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:21.318300', 'step': 29824, 'epoch': 3}
{'type': 'loss', 'content': 0.07990718632936478, 'timestamp': '2025-10-02 01:04:21.320733', 'step': 29825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:21.375200', 'step': 29825, 'epoch': 3}
{'type': 'loss', 'content': 0.033964261412620544, 'timestamp': '2025-10-02 01:04:21.377429', 'step': 29826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:21.432099', 'step': 29826, 'epoch': 3}
{'type': 'loss', 'content': 0.02748025394976139, 'timestamp': '2025-10-02 01:04:21.434307', 'step': 29827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:21.488415', 'step': 29827, 'epoch': 3}
{'type': 'loss', 'content': 0.0650281012058258, 'timestamp': '2025-10-02 01:04:21.495180', 'step': 29828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:21.548603', 'step': 29828, 'epoch': 3}
{'type': 'loss', 'content': 0.08296538144350052, 'timestamp': '2025-10-02 01:04:21.550864', 'step': 29829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:21.606126', 'step': 29829, 'epoch': 3}
{'type': 'loss', 'content': 0.027634304016828537, 'timestamp': '2025-10-02 01:04:21.608176', 'step': 29830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:21.663527', 'step': 29830, 'epoch': 3}
{'type': 'loss', 'content': 0.036511894315481186, 'timestamp': '2025-10-02 01:04:21.670814', 'step': 29831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:21.727724', 'step': 29831, 'epoch': 3}
{'type': 'loss', 'content': 0.03571159020066261, 'timestamp': '2025-10-02 01:04:21.734550', 'step': 29832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:21.789010', 'step': 29832, 'epoch': 3}
{'type': 'loss', 'content': 0.04933184012770653, 'timestamp': '2025-10-02 01:04:21.799380', 'step': 29833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:21.853774', 'step': 29833, 'epoch': 3}
{'type': 'loss', 'content': 0.05722782388329506, 'timestamp': '2025-10-02 01:04:21.856629', 'step': 29834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:21.913845', 'step': 29834, 'epoch': 3}
{'type': 'loss', 'content': 0.07117749005556107, 'timestamp': '2025-10-02 01:04:21.919444', 'step': 29835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:21.975189', 'step': 29835, 'epoch': 3}
{'type': 'loss', 'content': 0.028517838567495346, 'timestamp': '2025-10-02 01:04:21.981939', 'step': 29836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:04:22.041928', 'step': 29836, 'epoch': 3}
{'type': 'loss', 'content': 0.06059737130999565, 'timestamp': '2025-10-02 01:04:22.053376', 'step': 29837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:22.113687', 'step': 29837, 'epoch': 3}
{'type': 'loss', 'content': 0.015803636983036995, 'timestamp': '2025-10-02 01:04:22.123788', 'step': 29838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:22.178681', 'step': 29838, 'epoch': 3}
{'type': 'loss', 'content': 0.036219850182533264, 'timestamp': '2025-10-02 01:04:22.181082', 'step': 29839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:22.234810', 'step': 29839, 'epoch': 3}
{'type': 'loss', 'content': 0.04429113492369652, 'timestamp': '2025-10-02 01:04:22.241036', 'step': 29840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:22.295525', 'step': 29840, 'epoch': 3}
{'type': 'loss', 'content': 0.05034990236163139, 'timestamp': '2025-10-02 01:04:22.298400', 'step': 29841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:22.352639', 'step': 29841, 'epoch': 3}
{'type': 'loss', 'content': 0.042696528136730194, 'timestamp': '2025-10-02 01:04:22.358575', 'step': 29842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:22.416818', 'step': 29842, 'epoch': 3}
{'type': 'loss', 'content': 0.04766529053449631, 'timestamp': '2025-10-02 01:04:22.426345', 'step': 29843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:22.481929', 'step': 29843, 'epoch': 3}
{'type': 'loss', 'content': 0.04641290754079819, 'timestamp': '2025-10-02 01:04:22.489934', 'step': 29844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:22.543610', 'step': 29844, 'epoch': 3}
{'type': 'loss', 'content': 0.05145011097192764, 'timestamp': '2025-10-02 01:04:22.546758', 'step': 29845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:22.601064', 'step': 29845, 'epoch': 3}
{'type': 'loss', 'content': 0.04387372359633446, 'timestamp': '2025-10-02 01:04:22.603725', 'step': 29846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:22.657784', 'step': 29846, 'epoch': 3}
{'type': 'loss', 'content': 0.018206510692834854, 'timestamp': '2025-10-02 01:04:22.660660', 'step': 29847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:22.716656', 'step': 29847, 'epoch': 3}
{'type': 'loss', 'content': 0.02281823754310608, 'timestamp': '2025-10-02 01:04:22.724864', 'step': 29848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:22.779367', 'step': 29848, 'epoch': 3}
{'type': 'loss', 'content': 0.032085735350847244, 'timestamp': '2025-10-02 01:04:22.785501', 'step': 29849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:22.840298', 'step': 29849, 'epoch': 3}
{'type': 'loss', 'content': 0.009550710208714008, 'timestamp': '2025-10-02 01:04:22.849620', 'step': 29850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:22.904517', 'step': 29850, 'epoch': 3}
{'type': 'loss', 'content': 0.010892482474446297, 'timestamp': '2025-10-02 01:04:22.907040', 'step': 29851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:22.961745', 'step': 29851, 'epoch': 3}
{'type': 'loss', 'content': 0.051416367292404175, 'timestamp': '2025-10-02 01:04:22.967828', 'step': 29852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:23.023710', 'step': 29852, 'epoch': 3}
{'type': 'loss', 'content': 0.0368417352437973, 'timestamp': '2025-10-02 01:04:23.026510', 'step': 29853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:23.081437', 'step': 29853, 'epoch': 3}
{'type': 'loss', 'content': 0.11140716820955276, 'timestamp': '2025-10-02 01:04:23.083348', 'step': 29854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:23.137676', 'step': 29854, 'epoch': 3}
{'type': 'loss', 'content': 0.15810684859752655, 'timestamp': '2025-10-02 01:04:23.140274', 'step': 29855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:23.194310', 'step': 29855, 'epoch': 3}
{'type': 'loss', 'content': 0.06359142065048218, 'timestamp': '2025-10-02 01:04:23.199861', 'step': 29856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:23.254369', 'step': 29856, 'epoch': 3}
{'type': 'loss', 'content': 0.028291869908571243, 'timestamp': '2025-10-02 01:04:23.263954', 'step': 29857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:23.318716', 'step': 29857, 'epoch': 3}
{'type': 'loss', 'content': 0.002907349495217204, 'timestamp': '2025-10-02 01:04:23.321383', 'step': 29858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:04:23.376393', 'step': 29858, 'epoch': 3}
{'type': 'loss', 'content': 0.024753069505095482, 'timestamp': '2025-10-02 01:04:23.378691', 'step': 29859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:23.432754', 'step': 29859, 'epoch': 3}
{'type': 'loss', 'content': 0.06068623811006546, 'timestamp': '2025-10-02 01:04:23.439005', 'step': 29860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:23.492498', 'step': 29860, 'epoch': 3}
{'type': 'loss', 'content': 0.0529630221426487, 'timestamp': '2025-10-02 01:04:23.500222', 'step': 29861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:23.555504', 'step': 29861, 'epoch': 3}
{'type': 'loss', 'content': 0.057056721299886703, 'timestamp': '2025-10-02 01:04:23.557561', 'step': 29862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:23.611435', 'step': 29862, 'epoch': 3}
{'type': 'loss', 'content': 0.05162300169467926, 'timestamp': '2025-10-02 01:04:23.614323', 'step': 29863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:23.673781', 'step': 29863, 'epoch': 3}
{'type': 'loss', 'content': 0.025558514520525932, 'timestamp': '2025-10-02 01:04:23.684790', 'step': 29864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:23.738305', 'step': 29864, 'epoch': 3}
{'type': 'loss', 'content': 0.09495902061462402, 'timestamp': '2025-10-02 01:04:23.740729', 'step': 29865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:23.795793', 'step': 29865, 'epoch': 3}
{'type': 'loss', 'content': 0.05735696852207184, 'timestamp': '2025-10-02 01:04:23.798301', 'step': 29866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:23.853175', 'step': 29866, 'epoch': 3}
{'type': 'loss', 'content': 0.08725731074810028, 'timestamp': '2025-10-02 01:04:23.855852', 'step': 29867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:23.910428', 'step': 29867, 'epoch': 3}
{'type': 'loss', 'content': 0.03699329122900963, 'timestamp': '2025-10-02 01:04:23.916590', 'step': 29868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:23.970601', 'step': 29868, 'epoch': 3}
{'type': 'loss', 'content': 0.023853272199630737, 'timestamp': '2025-10-02 01:04:23.980827', 'step': 29869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:24.036524', 'step': 29869, 'epoch': 3}
{'type': 'loss', 'content': 0.004995107185095549, 'timestamp': '2025-10-02 01:04:24.045906', 'step': 29870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:24.101354', 'step': 29870, 'epoch': 3}
{'type': 'loss', 'content': 0.010774131864309311, 'timestamp': '2025-10-02 01:04:24.104449', 'step': 29871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:24.158827', 'step': 29871, 'epoch': 3}
{'type': 'loss', 'content': 0.03432236239314079, 'timestamp': '2025-10-02 01:04:24.165124', 'step': 29872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:24.224168', 'step': 29872, 'epoch': 3}
{'type': 'loss', 'content': 0.038644108921289444, 'timestamp': '2025-10-02 01:04:24.235154', 'step': 29873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:24.302284', 'step': 29873, 'epoch': 3}
{'type': 'loss', 'content': 0.04596292972564697, 'timestamp': '2025-10-02 01:04:24.312881', 'step': 29874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:24.367630', 'step': 29874, 'epoch': 3}
{'type': 'loss', 'content': 0.04581231251358986, 'timestamp': '2025-10-02 01:04:24.369995', 'step': 29875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:24.424340', 'step': 29875, 'epoch': 3}
{'type': 'loss', 'content': 0.03514784574508667, 'timestamp': '2025-10-02 01:04:24.430129', 'step': 29876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:24.490814', 'step': 29876, 'epoch': 3}
{'type': 'loss', 'content': 0.026635531336069107, 'timestamp': '2025-10-02 01:04:24.502486', 'step': 29877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:24.558744', 'step': 29877, 'epoch': 3}
{'type': 'loss', 'content': 0.03137079253792763, 'timestamp': '2025-10-02 01:04:24.565960', 'step': 29878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:24.621212', 'step': 29878, 'epoch': 3}
{'type': 'loss', 'content': 0.0328703410923481, 'timestamp': '2025-10-02 01:04:24.623563', 'step': 29879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:24.679209', 'step': 29879, 'epoch': 3}
{'type': 'loss', 'content': 0.06598340719938278, 'timestamp': '2025-10-02 01:04:24.685418', 'step': 29880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:24.739805', 'step': 29880, 'epoch': 3}
{'type': 'loss', 'content': 0.05607730150222778, 'timestamp': '2025-10-02 01:04:24.742190', 'step': 29881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:04:24.804280', 'step': 29881, 'epoch': 3}
{'type': 'loss', 'content': 0.002794495550915599, 'timestamp': '2025-10-02 01:04:24.814652', 'step': 29882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:24.869751', 'step': 29882, 'epoch': 3}
{'type': 'loss', 'content': 0.08268909901380539, 'timestamp': '2025-10-02 01:04:24.872229', 'step': 29883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:24.927409', 'step': 29883, 'epoch': 3}
{'type': 'loss', 'content': 0.015204941853880882, 'timestamp': '2025-10-02 01:04:24.933792', 'step': 29884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:24.988987', 'step': 29884, 'epoch': 3}
{'type': 'loss', 'content': 0.04940047115087509, 'timestamp': '2025-10-02 01:04:24.991487', 'step': 29885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:25.046602', 'step': 29885, 'epoch': 3}
{'type': 'loss', 'content': 0.059352997690439224, 'timestamp': '2025-10-02 01:04:25.049781', 'step': 29886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:25.105405', 'step': 29886, 'epoch': 3}
{'type': 'loss', 'content': 0.08474472910165787, 'timestamp': '2025-10-02 01:04:25.108289', 'step': 29887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:25.163681', 'step': 29887, 'epoch': 3}
{'type': 'loss', 'content': 0.09116291999816895, 'timestamp': '2025-10-02 01:04:25.171996', 'step': 29888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:25.230582', 'step': 29888, 'epoch': 3}
{'type': 'loss', 'content': 0.008673531003296375, 'timestamp': '2025-10-02 01:04:25.239053', 'step': 29889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:25.294406', 'step': 29889, 'epoch': 3}
{'type': 'loss', 'content': 0.059648074209690094, 'timestamp': '2025-10-02 01:04:25.300040', 'step': 29890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:25.354679', 'step': 29890, 'epoch': 3}
{'type': 'loss', 'content': 0.03623872995376587, 'timestamp': '2025-10-02 01:04:25.356717', 'step': 29891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:25.411611', 'step': 29891, 'epoch': 3}
{'type': 'loss', 'content': 0.09786152839660645, 'timestamp': '2025-10-02 01:04:25.417833', 'step': 29892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:25.479190', 'step': 29892, 'epoch': 3}
{'type': 'loss', 'content': 0.04055715724825859, 'timestamp': '2025-10-02 01:04:25.490731', 'step': 29893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:25.545738', 'step': 29893, 'epoch': 3}
{'type': 'loss', 'content': 0.020346852019429207, 'timestamp': '2025-10-02 01:04:25.548300', 'step': 29894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:25.603166', 'step': 29894, 'epoch': 3}
{'type': 'loss', 'content': 0.0489436499774456, 'timestamp': '2025-10-02 01:04:25.606970', 'step': 29895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:25.664070', 'step': 29895, 'epoch': 3}
{'type': 'loss', 'content': 0.049644146114587784, 'timestamp': '2025-10-02 01:04:25.670935', 'step': 29896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:25.726680', 'step': 29896, 'epoch': 3}
{'type': 'loss', 'content': 0.032485220581293106, 'timestamp': '2025-10-02 01:04:25.728811', 'step': 29897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:25.784619', 'step': 29897, 'epoch': 3}
{'type': 'loss', 'content': 0.04593195021152496, 'timestamp': '2025-10-02 01:04:25.790544', 'step': 29898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:25.848679', 'step': 29898, 'epoch': 3}
{'type': 'loss', 'content': 0.05839040130376816, 'timestamp': '2025-10-02 01:04:25.854250', 'step': 29899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:25.912163', 'step': 29899, 'epoch': 3}
{'type': 'loss', 'content': 0.01859254390001297, 'timestamp': '2025-10-02 01:04:25.922563', 'step': 29900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:25.980013', 'step': 29900, 'epoch': 3}
{'type': 'loss', 'content': 0.022150179371237755, 'timestamp': '2025-10-02 01:04:25.984293', 'step': 29901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:26.042448', 'step': 29901, 'epoch': 3}
{'type': 'loss', 'content': 0.056952111423015594, 'timestamp': '2025-10-02 01:04:26.048419', 'step': 29902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:26.105413', 'step': 29902, 'epoch': 3}
{'type': 'loss', 'content': 0.026628397405147552, 'timestamp': '2025-10-02 01:04:26.107845', 'step': 29903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:26.165180', 'step': 29903, 'epoch': 3}
{'type': 'loss', 'content': 0.019850224256515503, 'timestamp': '2025-10-02 01:04:26.171915', 'step': 29904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:26.230965', 'step': 29904, 'epoch': 3}
{'type': 'loss', 'content': 0.056065067648887634, 'timestamp': '2025-10-02 01:04:26.242071', 'step': 29905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:26.299777', 'step': 29905, 'epoch': 3}
{'type': 'loss', 'content': 0.005861036479473114, 'timestamp': '2025-10-02 01:04:26.303011', 'step': 29906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:26.360305', 'step': 29906, 'epoch': 3}
{'type': 'loss', 'content': 0.04398484155535698, 'timestamp': '2025-10-02 01:04:26.366162', 'step': 29907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:26.423598', 'step': 29907, 'epoch': 3}
{'type': 'loss', 'content': 0.028178833425045013, 'timestamp': '2025-10-02 01:04:26.430891', 'step': 29908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:26.487789', 'step': 29908, 'epoch': 3}
{'type': 'loss', 'content': 0.16916082799434662, 'timestamp': '2025-10-02 01:04:26.490670', 'step': 29909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:26.548077', 'step': 29909, 'epoch': 3}
{'type': 'loss', 'content': 0.01705523021519184, 'timestamp': '2025-10-02 01:04:26.553994', 'step': 29910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:26.611191', 'step': 29910, 'epoch': 3}
{'type': 'loss', 'content': 0.057855747640132904, 'timestamp': '2025-10-02 01:04:26.614654', 'step': 29911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:26.674394', 'step': 29911, 'epoch': 3}
{'type': 'loss', 'content': 0.01513720490038395, 'timestamp': '2025-10-02 01:04:26.681114', 'step': 29912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:26.736983', 'step': 29912, 'epoch': 3}
{'type': 'loss', 'content': 0.032920848578214645, 'timestamp': '2025-10-02 01:04:26.739692', 'step': 29913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:26.796212', 'step': 29913, 'epoch': 3}
{'type': 'loss', 'content': 0.03828129544854164, 'timestamp': '2025-10-02 01:04:26.798292', 'step': 29914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:26.853877', 'step': 29914, 'epoch': 3}
{'type': 'loss', 'content': 0.008024642243981361, 'timestamp': '2025-10-02 01:04:26.860025', 'step': 29915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:04:26.916092', 'step': 29915, 'epoch': 3}
{'type': 'loss', 'content': 0.13943125307559967, 'timestamp': '2025-10-02 01:04:26.922920', 'step': 29916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:26.980359', 'step': 29916, 'epoch': 3}
{'type': 'loss', 'content': 0.0760498121380806, 'timestamp': '2025-10-02 01:04:26.982872', 'step': 29917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:27.039508', 'step': 29917, 'epoch': 3}
{'type': 'loss', 'content': 0.029910044744610786, 'timestamp': '2025-10-02 01:04:27.042228', 'step': 29918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:27.099265', 'step': 29918, 'epoch': 3}
{'type': 'loss', 'content': 0.007190011441707611, 'timestamp': '2025-10-02 01:04:27.106766', 'step': 29919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:27.163474', 'step': 29919, 'epoch': 3}
{'type': 'loss', 'content': 0.016080349683761597, 'timestamp': '2025-10-02 01:04:27.170160', 'step': 29920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:27.229204', 'step': 29920, 'epoch': 3}
{'type': 'loss', 'content': 0.030907081440091133, 'timestamp': '2025-10-02 01:04:27.232541', 'step': 29921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:27.290103', 'step': 29921, 'epoch': 3}
{'type': 'loss', 'content': 0.03722504898905754, 'timestamp': '2025-10-02 01:04:27.292553', 'step': 29922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:27.350501', 'step': 29922, 'epoch': 3}
{'type': 'loss', 'content': 0.003160760970786214, 'timestamp': '2025-10-02 01:04:27.356309', 'step': 29923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:27.411568', 'step': 29923, 'epoch': 3}
{'type': 'loss', 'content': 0.06556158512830734, 'timestamp': '2025-10-02 01:04:27.419561', 'step': 29924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:27.476654', 'step': 29924, 'epoch': 3}
{'type': 'loss', 'content': 0.07168912142515182, 'timestamp': '2025-10-02 01:04:27.479335', 'step': 29925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:27.540763', 'step': 29925, 'epoch': 3}
{'type': 'loss', 'content': 0.05935594439506531, 'timestamp': '2025-10-02 01:04:27.543140', 'step': 29926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:27.597742', 'step': 29926, 'epoch': 3}
{'type': 'loss', 'content': 0.052406907081604004, 'timestamp': '2025-10-02 01:04:27.600395', 'step': 29927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:27.655199', 'step': 29927, 'epoch': 3}
{'type': 'loss', 'content': 0.006943468935787678, 'timestamp': '2025-10-02 01:04:27.662042', 'step': 29928, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 01:04:54.369131', 'step': 29928, 'epoch': 3}
{'type': 'pplx', 'content': 86.59223285451911, 'timestamp': '2025-10-02 01:04:54.372902', 'step': 29928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:54.428391', 'step': 29928, 'epoch': 3}
{'type': 'loss', 'content': 0.035315729677677155, 'timestamp': '2025-10-02 01:04:54.430995', 'step': 29929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:04:54.496600', 'step': 29929, 'epoch': 3}
{'type': 'loss', 'content': 0.017703454941511154, 'timestamp': '2025-10-02 01:04:54.507367', 'step': 29930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:54.562549', 'step': 29930, 'epoch': 3}
{'type': 'loss', 'content': 0.038693495094776154, 'timestamp': '2025-10-02 01:04:54.565177', 'step': 29931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:54.619983', 'step': 29931, 'epoch': 3}
{'type': 'loss', 'content': 0.061933714896440506, 'timestamp': '2025-10-02 01:04:54.626569', 'step': 29932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:54.681170', 'step': 29932, 'epoch': 3}
{'type': 'loss', 'content': 0.020472809672355652, 'timestamp': '2025-10-02 01:04:54.691405', 'step': 29933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:54.746929', 'step': 29933, 'epoch': 3}
{'type': 'loss', 'content': 0.06662086397409439, 'timestamp': '2025-10-02 01:04:54.749377', 'step': 29934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:54.810305', 'step': 29934, 'epoch': 3}
{'type': 'loss', 'content': 0.024480434134602547, 'timestamp': '2025-10-02 01:04:54.820491', 'step': 29935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:54.874836', 'step': 29935, 'epoch': 3}
{'type': 'loss', 'content': 0.11249806731939316, 'timestamp': '2025-10-02 01:04:54.881156', 'step': 29936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:54.935657', 'step': 29936, 'epoch': 3}
{'type': 'loss', 'content': 0.026126965880393982, 'timestamp': '2025-10-02 01:04:54.941331', 'step': 29937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:54.996326', 'step': 29937, 'epoch': 3}
{'type': 'loss', 'content': 0.02819758839905262, 'timestamp': '2025-10-02 01:04:55.001956', 'step': 29938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:55.056759', 'step': 29938, 'epoch': 3}
{'type': 'loss', 'content': 0.00047819907194934785, 'timestamp': '2025-10-02 01:04:55.059645', 'step': 29939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:55.115606', 'step': 29939, 'epoch': 3}
{'type': 'loss', 'content': 0.05052484944462776, 'timestamp': '2025-10-02 01:04:55.121716', 'step': 29940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:55.176569', 'step': 29940, 'epoch': 3}
{'type': 'loss', 'content': 0.005848923698067665, 'timestamp': '2025-10-02 01:04:55.182419', 'step': 29941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:55.239000', 'step': 29941, 'epoch': 3}
{'type': 'loss', 'content': 0.046698976308107376, 'timestamp': '2025-10-02 01:04:55.242127', 'step': 29942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:55.298398', 'step': 29942, 'epoch': 3}
{'type': 'loss', 'content': 0.00026971253100782633, 'timestamp': '2025-10-02 01:04:55.305864', 'step': 29943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:55.362708', 'step': 29943, 'epoch': 3}
{'type': 'loss', 'content': 0.042936112731695175, 'timestamp': '2025-10-02 01:04:55.368974', 'step': 29944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:55.422517', 'step': 29944, 'epoch': 3}
{'type': 'loss', 'content': 0.07360652834177017, 'timestamp': '2025-10-02 01:04:55.425396', 'step': 29945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:55.481082', 'step': 29945, 'epoch': 3}
{'type': 'loss', 'content': 0.013710337691009045, 'timestamp': '2025-10-02 01:04:55.483690', 'step': 29946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:55.538658', 'step': 29946, 'epoch': 3}
{'type': 'loss', 'content': 0.03610070049762726, 'timestamp': '2025-10-02 01:04:55.541294', 'step': 29947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:55.595771', 'step': 29947, 'epoch': 3}
{'type': 'loss', 'content': 0.05254688113927841, 'timestamp': '2025-10-02 01:04:55.604553', 'step': 29948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:55.659426', 'step': 29948, 'epoch': 3}
{'type': 'loss', 'content': 0.016282761469483376, 'timestamp': '2025-10-02 01:04:55.666798', 'step': 29949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:55.722195', 'step': 29949, 'epoch': 3}
{'type': 'loss', 'content': 0.009253142401576042, 'timestamp': '2025-10-02 01:04:55.728021', 'step': 29950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:55.785163', 'step': 29950, 'epoch': 3}
{'type': 'loss', 'content': 0.026585068553686142, 'timestamp': '2025-10-02 01:04:55.794609', 'step': 29951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:55.849777', 'step': 29951, 'epoch': 3}
{'type': 'loss', 'content': 0.01557505689561367, 'timestamp': '2025-10-02 01:04:55.856119', 'step': 29952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:55.911334', 'step': 29952, 'epoch': 3}
{'type': 'loss', 'content': 0.09660081565380096, 'timestamp': '2025-10-02 01:04:55.913660', 'step': 29953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:55.968745', 'step': 29953, 'epoch': 3}
{'type': 'loss', 'content': 0.007884517312049866, 'timestamp': '2025-10-02 01:04:55.971359', 'step': 29954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:56.027369', 'step': 29954, 'epoch': 3}
{'type': 'loss', 'content': 0.033693552017211914, 'timestamp': '2025-10-02 01:04:56.032978', 'step': 29955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:56.088424', 'step': 29955, 'epoch': 3}
{'type': 'loss', 'content': 0.0555664524435997, 'timestamp': '2025-10-02 01:04:56.094724', 'step': 29956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:56.150174', 'step': 29956, 'epoch': 3}
{'type': 'loss', 'content': 0.052461691200733185, 'timestamp': '2025-10-02 01:04:56.159532', 'step': 29957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:56.214595', 'step': 29957, 'epoch': 3}
{'type': 'loss', 'content': 0.039318524301052094, 'timestamp': '2025-10-02 01:04:56.217149', 'step': 29958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:56.273758', 'step': 29958, 'epoch': 3}
{'type': 'loss', 'content': 0.02153894118964672, 'timestamp': '2025-10-02 01:04:56.279439', 'step': 29959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:56.335050', 'step': 29959, 'epoch': 3}
{'type': 'loss', 'content': 0.06071404367685318, 'timestamp': '2025-10-02 01:04:56.345323', 'step': 29960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:56.399757', 'step': 29960, 'epoch': 3}
{'type': 'loss', 'content': 0.02820865996181965, 'timestamp': '2025-10-02 01:04:56.402457', 'step': 29961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:04:56.459837', 'step': 29961, 'epoch': 3}
{'type': 'loss', 'content': 0.0683659017086029, 'timestamp': '2025-10-02 01:04:56.462448', 'step': 29962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:04:56.524488', 'step': 29962, 'epoch': 3}
{'type': 'loss', 'content': 0.015590569004416466, 'timestamp': '2025-10-02 01:04:56.534976', 'step': 29963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:04:56.590172', 'step': 29963, 'epoch': 3}
{'type': 'loss', 'content': 0.058982670307159424, 'timestamp': '2025-10-02 01:04:56.596117', 'step': 29964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:56.650340', 'step': 29964, 'epoch': 3}
{'type': 'loss', 'content': 0.035794105380773544, 'timestamp': '2025-10-02 01:04:56.659610', 'step': 29965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:56.722552', 'step': 29965, 'epoch': 3}
{'type': 'loss', 'content': 0.0021176631562411785, 'timestamp': '2025-10-02 01:04:56.733206', 'step': 29966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:56.790269', 'step': 29966, 'epoch': 3}
{'type': 'loss', 'content': 0.07505868375301361, 'timestamp': '2025-10-02 01:04:56.794665', 'step': 29967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:56.849104', 'step': 29967, 'epoch': 3}
{'type': 'loss', 'content': 0.03298093378543854, 'timestamp': '2025-10-02 01:04:56.855275', 'step': 29968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:56.916709', 'step': 29968, 'epoch': 3}
{'type': 'loss', 'content': 0.017871055752038956, 'timestamp': '2025-10-02 01:04:56.928203', 'step': 29969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:56.984429', 'step': 29969, 'epoch': 3}
{'type': 'loss', 'content': 0.026254933327436447, 'timestamp': '2025-10-02 01:04:56.993960', 'step': 29970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:57.049024', 'step': 29970, 'epoch': 3}
{'type': 'loss', 'content': 0.07327744364738464, 'timestamp': '2025-10-02 01:04:57.051376', 'step': 29971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:57.106456', 'step': 29971, 'epoch': 3}
{'type': 'loss', 'content': 0.07433273643255234, 'timestamp': '2025-10-02 01:04:57.116479', 'step': 29972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:57.170232', 'step': 29972, 'epoch': 3}
{'type': 'loss', 'content': 0.020878763869404793, 'timestamp': '2025-10-02 01:04:57.172819', 'step': 29973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:04:57.227376', 'step': 29973, 'epoch': 3}
{'type': 'loss', 'content': 0.020771684125065804, 'timestamp': '2025-10-02 01:04:57.230118', 'step': 29974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:04:57.285312', 'step': 29974, 'epoch': 3}
{'type': 'loss', 'content': 0.053795818239450455, 'timestamp': '2025-10-02 01:04:57.290855', 'step': 29975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:57.345396', 'step': 29975, 'epoch': 3}
{'type': 'loss', 'content': 0.015699967741966248, 'timestamp': '2025-10-02 01:04:57.351188', 'step': 29976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:04:57.422628', 'step': 29976, 'epoch': 3}
{'type': 'loss', 'content': 0.015067758038640022, 'timestamp': '2025-10-02 01:04:57.436987', 'step': 29977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:57.491408', 'step': 29977, 'epoch': 3}
{'type': 'loss', 'content': 0.09294428676366806, 'timestamp': '2025-10-02 01:04:57.493759', 'step': 29978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:57.548715', 'step': 29978, 'epoch': 3}
{'type': 'loss', 'content': 0.07148592174053192, 'timestamp': '2025-10-02 01:04:57.551135', 'step': 29979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:04:57.605922', 'step': 29979, 'epoch': 3}
{'type': 'loss', 'content': 0.02827613614499569, 'timestamp': '2025-10-02 01:04:57.612476', 'step': 29980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:57.667624', 'step': 29980, 'epoch': 3}
{'type': 'loss', 'content': 0.007926098071038723, 'timestamp': '2025-10-02 01:04:57.670315', 'step': 29981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:57.724130', 'step': 29981, 'epoch': 3}
{'type': 'loss', 'content': 0.0545017383992672, 'timestamp': '2025-10-02 01:04:57.726563', 'step': 29982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:04:57.781263', 'step': 29982, 'epoch': 3}
{'type': 'loss', 'content': 0.05402464419603348, 'timestamp': '2025-10-02 01:04:57.783749', 'step': 29983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:57.839059', 'step': 29983, 'epoch': 3}
{'type': 'loss', 'content': 0.02597688138484955, 'timestamp': '2025-10-02 01:04:57.848631', 'step': 29984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:57.902390', 'step': 29984, 'epoch': 3}
{'type': 'loss', 'content': 0.034362804144620895, 'timestamp': '2025-10-02 01:04:57.911517', 'step': 29985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:04:57.971506', 'step': 29985, 'epoch': 3}
{'type': 'loss', 'content': 0.014875547960400581, 'timestamp': '2025-10-02 01:04:57.981708', 'step': 29986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:04:58.036560', 'step': 29986, 'epoch': 3}
{'type': 'loss', 'content': 0.05608099699020386, 'timestamp': '2025-10-02 01:04:58.038439', 'step': 29987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:58.094158', 'step': 29987, 'epoch': 3}
{'type': 'loss', 'content': 0.020529428496956825, 'timestamp': '2025-10-02 01:04:58.104565', 'step': 29988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:04:58.158737', 'step': 29988, 'epoch': 3}
{'type': 'loss', 'content': 0.012695197016000748, 'timestamp': '2025-10-02 01:04:58.168933', 'step': 29989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:58.223793', 'step': 29989, 'epoch': 3}
{'type': 'loss', 'content': 0.0786973088979721, 'timestamp': '2025-10-02 01:04:58.226303', 'step': 29990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:58.281730', 'step': 29990, 'epoch': 3}
{'type': 'loss', 'content': 0.09733514487743378, 'timestamp': '2025-10-02 01:04:58.284304', 'step': 29991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:58.338793', 'step': 29991, 'epoch': 3}
{'type': 'loss', 'content': 0.04849259555339813, 'timestamp': '2025-10-02 01:04:58.348663', 'step': 29992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:58.409888', 'step': 29992, 'epoch': 3}
{'type': 'loss', 'content': 0.01010588277131319, 'timestamp': '2025-10-02 01:04:58.421417', 'step': 29993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:04:58.475928', 'step': 29993, 'epoch': 3}
{'type': 'loss', 'content': 0.09048070758581161, 'timestamp': '2025-10-02 01:04:58.478330', 'step': 29994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:04:58.533234', 'step': 29994, 'epoch': 3}
{'type': 'loss', 'content': 0.029107294976711273, 'timestamp': '2025-10-02 01:04:58.540080', 'step': 29995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:58.594795', 'step': 29995, 'epoch': 3}
{'type': 'loss', 'content': 0.020966891199350357, 'timestamp': '2025-10-02 01:04:58.600761', 'step': 29996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:58.654828', 'step': 29996, 'epoch': 3}
{'type': 'loss', 'content': 0.03364838659763336, 'timestamp': '2025-10-02 01:04:58.663977', 'step': 29997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:58.718708', 'step': 29997, 'epoch': 3}
{'type': 'loss', 'content': 0.056214794516563416, 'timestamp': '2025-10-02 01:04:58.720991', 'step': 29998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:04:58.783433', 'step': 29998, 'epoch': 3}
{'type': 'loss', 'content': 0.06582207977771759, 'timestamp': '2025-10-02 01:04:58.794059', 'step': 29999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:04:58.857134', 'step': 29999, 'epoch': 3}
{'type': 'loss', 'content': 0.02736862562596798, 'timestamp': '2025-10-02 01:04:58.868737', 'step': 30000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 30000', 'timestamp': '2025-10-02 01:04:59.264471', 'step': 30000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:59.318291', 'step': 30000, 'epoch': 3}
{'type': 'loss', 'content': 0.03745141252875328, 'timestamp': '2025-10-02 01:04:59.326507', 'step': 30001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:59.386570', 'step': 30001, 'epoch': 3}
{'type': 'loss', 'content': 0.05390612408518791, 'timestamp': '2025-10-02 01:04:59.388921', 'step': 30002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:59.446499', 'step': 30002, 'epoch': 3}
{'type': 'loss', 'content': 0.029208241030573845, 'timestamp': '2025-10-02 01:04:59.449852', 'step': 30003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:59.514839', 'step': 30003, 'epoch': 3}
{'type': 'loss', 'content': 0.06321945786476135, 'timestamp': '2025-10-02 01:04:59.522298', 'step': 30004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:59.597277', 'step': 30004, 'epoch': 3}
{'type': 'loss', 'content': 0.001593577559106052, 'timestamp': '2025-10-02 01:04:59.605471', 'step': 30005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:04:59.670401', 'step': 30005, 'epoch': 3}
{'type': 'loss', 'content': 0.019631080329418182, 'timestamp': '2025-10-02 01:04:59.680784', 'step': 30006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:04:59.747415', 'step': 30006, 'epoch': 3}
{'type': 'loss', 'content': 0.02814771793782711, 'timestamp': '2025-10-02 01:04:59.751390', 'step': 30007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:04:59.807688', 'step': 30007, 'epoch': 3}
{'type': 'loss', 'content': 0.10098586976528168, 'timestamp': '2025-10-02 01:04:59.814017', 'step': 30008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:04:59.868251', 'step': 30008, 'epoch': 3}
{'type': 'loss', 'content': 0.05392874404788017, 'timestamp': '2025-10-02 01:04:59.877638', 'step': 30009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:04:59.939891', 'step': 30009, 'epoch': 3}
{'type': 'loss', 'content': 0.0042357658967375755, 'timestamp': '2025-10-02 01:04:59.942365', 'step': 30010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:00.001013', 'step': 30010, 'epoch': 3}
{'type': 'loss', 'content': 0.12833859026432037, 'timestamp': '2025-10-02 01:05:00.003567', 'step': 30011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:00.069103', 'step': 30011, 'epoch': 3}
{'type': 'loss', 'content': 0.025159385055303574, 'timestamp': '2025-10-02 01:05:00.080136', 'step': 30012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:00.137107', 'step': 30012, 'epoch': 3}
{'type': 'loss', 'content': 0.0516212023794651, 'timestamp': '2025-10-02 01:05:00.143790', 'step': 30013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:00.204780', 'step': 30013, 'epoch': 3}
{'type': 'loss', 'content': 0.05771391838788986, 'timestamp': '2025-10-02 01:05:00.208725', 'step': 30014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:00.265926', 'step': 30014, 'epoch': 3}
{'type': 'loss', 'content': 0.03164767101407051, 'timestamp': '2025-10-02 01:05:00.268830', 'step': 30015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:00.327335', 'step': 30015, 'epoch': 3}
{'type': 'loss', 'content': 0.07218355685472488, 'timestamp': '2025-10-02 01:05:00.336243', 'step': 30016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:00.391065', 'step': 30016, 'epoch': 3}
{'type': 'loss', 'content': 0.1090138778090477, 'timestamp': '2025-10-02 01:05:00.393569', 'step': 30017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:00.448989', 'step': 30017, 'epoch': 3}
{'type': 'loss', 'content': 0.026019828394055367, 'timestamp': '2025-10-02 01:05:00.451386', 'step': 30018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:00.507166', 'step': 30018, 'epoch': 3}
{'type': 'loss', 'content': 0.011952875182032585, 'timestamp': '2025-10-02 01:05:00.509809', 'step': 30019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:00.564291', 'step': 30019, 'epoch': 3}
{'type': 'loss', 'content': 0.08127088844776154, 'timestamp': '2025-10-02 01:05:00.570132', 'step': 30020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:00.637671', 'step': 30020, 'epoch': 3}
{'type': 'loss', 'content': 0.025563053786754608, 'timestamp': '2025-10-02 01:05:00.642730', 'step': 30021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:00.718370', 'step': 30021, 'epoch': 3}
{'type': 'loss', 'content': 0.010026802308857441, 'timestamp': '2025-10-02 01:05:00.725574', 'step': 30022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:00.784309', 'step': 30022, 'epoch': 3}
{'type': 'loss', 'content': 0.04530702903866768, 'timestamp': '2025-10-02 01:05:00.790942', 'step': 30023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:00.849516', 'step': 30023, 'epoch': 3}
{'type': 'loss', 'content': 0.0230721402913332, 'timestamp': '2025-10-02 01:05:00.856265', 'step': 30024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:00.933370', 'step': 30024, 'epoch': 3}
{'type': 'loss', 'content': 0.001826889580115676, 'timestamp': '2025-10-02 01:05:00.944360', 'step': 30025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:01.015001', 'step': 30025, 'epoch': 3}
{'type': 'loss', 'content': 0.009673858992755413, 'timestamp': '2025-10-02 01:05:01.020089', 'step': 30026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:01.121009', 'step': 30026, 'epoch': 3}
{'type': 'loss', 'content': 0.06610603630542755, 'timestamp': '2025-10-02 01:05:01.127715', 'step': 30027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:01.198045', 'step': 30027, 'epoch': 3}
{'type': 'loss', 'content': 0.0005237057921476662, 'timestamp': '2025-10-02 01:05:01.210252', 'step': 30028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:01.282667', 'step': 30028, 'epoch': 3}
{'type': 'loss', 'content': 0.08113327622413635, 'timestamp': '2025-10-02 01:05:01.290114', 'step': 30029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:01.355702', 'step': 30029, 'epoch': 3}
{'type': 'loss', 'content': 0.0464903749525547, 'timestamp': '2025-10-02 01:05:01.364953', 'step': 30030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:01.440772', 'step': 30030, 'epoch': 3}
{'type': 'loss', 'content': 0.0015502870082855225, 'timestamp': '2025-10-02 01:05:01.450905', 'step': 30031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:01.515452', 'step': 30031, 'epoch': 3}
{'type': 'loss', 'content': 0.006812580395489931, 'timestamp': '2025-10-02 01:05:01.527455', 'step': 30032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:01.591160', 'step': 30032, 'epoch': 3}
{'type': 'loss', 'content': 0.06807296723127365, 'timestamp': '2025-10-02 01:05:01.593553', 'step': 30033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:01.651277', 'step': 30033, 'epoch': 3}
{'type': 'loss', 'content': 0.08287470787763596, 'timestamp': '2025-10-02 01:05:01.659738', 'step': 30034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:01.719388', 'step': 30034, 'epoch': 3}
{'type': 'loss', 'content': 0.0006526934448629618, 'timestamp': '2025-10-02 01:05:01.724479', 'step': 30035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:01.787337', 'step': 30035, 'epoch': 3}
{'type': 'loss', 'content': 0.030799541622400284, 'timestamp': '2025-10-02 01:05:01.797441', 'step': 30036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:01.853773', 'step': 30036, 'epoch': 3}
{'type': 'loss', 'content': 0.009699773974716663, 'timestamp': '2025-10-02 01:05:01.856777', 'step': 30037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:01.914306', 'step': 30037, 'epoch': 3}
{'type': 'loss', 'content': 0.0671641156077385, 'timestamp': '2025-10-02 01:05:01.917301', 'step': 30038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:01.975531', 'step': 30038, 'epoch': 3}
{'type': 'loss', 'content': 0.0052033900283277035, 'timestamp': '2025-10-02 01:05:01.982861', 'step': 30039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:02.041102', 'step': 30039, 'epoch': 3}
{'type': 'loss', 'content': 0.0025171658489853144, 'timestamp': '2025-10-02 01:05:02.047083', 'step': 30040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:02.112329', 'step': 30040, 'epoch': 3}
{'type': 'loss', 'content': 0.030536292120814323, 'timestamp': '2025-10-02 01:05:02.116305', 'step': 30041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:02.179305', 'step': 30041, 'epoch': 3}
{'type': 'loss', 'content': 0.06367748230695724, 'timestamp': '2025-10-02 01:05:02.189502', 'step': 30042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:02.256170', 'step': 30042, 'epoch': 3}
{'type': 'loss', 'content': 0.036938704550266266, 'timestamp': '2025-10-02 01:05:02.261799', 'step': 30043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:02.320044', 'step': 30043, 'epoch': 3}
{'type': 'loss', 'content': 0.10631751269102097, 'timestamp': '2025-10-02 01:05:02.331380', 'step': 30044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:02.389965', 'step': 30044, 'epoch': 3}
{'type': 'loss', 'content': 0.037709061056375504, 'timestamp': '2025-10-02 01:05:02.395385', 'step': 30045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:02.454095', 'step': 30045, 'epoch': 3}
{'type': 'loss', 'content': 0.07637271285057068, 'timestamp': '2025-10-02 01:05:02.458893', 'step': 30046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:02.521463', 'step': 30046, 'epoch': 3}
{'type': 'loss', 'content': 0.0764860212802887, 'timestamp': '2025-10-02 01:05:02.524290', 'step': 30047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:02.582678', 'step': 30047, 'epoch': 3}
{'type': 'loss', 'content': 0.03666621446609497, 'timestamp': '2025-10-02 01:05:02.594833', 'step': 30048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:02.651998', 'step': 30048, 'epoch': 3}
{'type': 'loss', 'content': 0.04770614206790924, 'timestamp': '2025-10-02 01:05:02.654959', 'step': 30049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:02.715762', 'step': 30049, 'epoch': 3}
{'type': 'loss', 'content': 0.014107531867921352, 'timestamp': '2025-10-02 01:05:02.718272', 'step': 30050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:02.778266', 'step': 30050, 'epoch': 3}
{'type': 'loss', 'content': 0.0741664320230484, 'timestamp': '2025-10-02 01:05:02.781538', 'step': 30051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:02.850841', 'step': 30051, 'epoch': 3}
{'type': 'loss', 'content': 0.014054316096007824, 'timestamp': '2025-10-02 01:05:02.858470', 'step': 30052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:02.921512', 'step': 30052, 'epoch': 3}
{'type': 'loss', 'content': 0.02053675800561905, 'timestamp': '2025-10-02 01:05:02.927088', 'step': 30053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:02.984711', 'step': 30053, 'epoch': 3}
{'type': 'loss', 'content': 0.030352840200066566, 'timestamp': '2025-10-02 01:05:02.987813', 'step': 30054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:03.058454', 'step': 30054, 'epoch': 3}
{'type': 'loss', 'content': 0.035485345870256424, 'timestamp': '2025-10-02 01:05:03.065604', 'step': 30055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:03.123404', 'step': 30055, 'epoch': 3}
{'type': 'loss', 'content': 0.032310280948877335, 'timestamp': '2025-10-02 01:05:03.130810', 'step': 30056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:03.195359', 'step': 30056, 'epoch': 3}
{'type': 'loss', 'content': 0.032943420112133026, 'timestamp': '2025-10-02 01:05:03.200053', 'step': 30057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:03.261881', 'step': 30057, 'epoch': 3}
{'type': 'loss', 'content': 0.048138897866010666, 'timestamp': '2025-10-02 01:05:03.270885', 'step': 30058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:03.330442', 'step': 30058, 'epoch': 3}
{'type': 'loss', 'content': 0.025806505233049393, 'timestamp': '2025-10-02 01:05:03.335960', 'step': 30059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:03.418712', 'step': 30059, 'epoch': 3}
{'type': 'loss', 'content': 0.06265787035226822, 'timestamp': '2025-10-02 01:05:03.431620', 'step': 30060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:03.508911', 'step': 30060, 'epoch': 3}
{'type': 'loss', 'content': 0.04414433240890503, 'timestamp': '2025-10-02 01:05:03.512603', 'step': 30061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:03.570095', 'step': 30061, 'epoch': 3}
{'type': 'loss', 'content': 0.021996458992362022, 'timestamp': '2025-10-02 01:05:03.578552', 'step': 30062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:03.651630', 'step': 30062, 'epoch': 3}
{'type': 'loss', 'content': 0.02339884079992771, 'timestamp': '2025-10-02 01:05:03.660685', 'step': 30063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:03.732896', 'step': 30063, 'epoch': 3}
{'type': 'loss', 'content': 0.02215726487338543, 'timestamp': '2025-10-02 01:05:03.748291', 'step': 30064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:03.820353', 'step': 30064, 'epoch': 3}
{'type': 'loss', 'content': 0.0632288008928299, 'timestamp': '2025-10-02 01:05:03.828720', 'step': 30065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:03.891050', 'step': 30065, 'epoch': 3}
{'type': 'loss', 'content': 0.10270638763904572, 'timestamp': '2025-10-02 01:05:03.900534', 'step': 30066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:03.977644', 'step': 30066, 'epoch': 3}
{'type': 'loss', 'content': 0.018492184579372406, 'timestamp': '2025-10-02 01:05:03.987120', 'step': 30067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:04.062401', 'step': 30067, 'epoch': 3}
{'type': 'loss', 'content': 0.03903704509139061, 'timestamp': '2025-10-02 01:05:04.074221', 'step': 30068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:04.150621', 'step': 30068, 'epoch': 3}
{'type': 'loss', 'content': 0.0015196240274235606, 'timestamp': '2025-10-02 01:05:04.159695', 'step': 30069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:04.226181', 'step': 30069, 'epoch': 3}
{'type': 'loss', 'content': 0.05331113934516907, 'timestamp': '2025-10-02 01:05:04.232417', 'step': 30070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:04.299530', 'step': 30070, 'epoch': 3}
{'type': 'loss', 'content': 0.05756168067455292, 'timestamp': '2025-10-02 01:05:04.304358', 'step': 30071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:04.377332', 'step': 30071, 'epoch': 3}
{'type': 'loss', 'content': 0.06418536603450775, 'timestamp': '2025-10-02 01:05:04.385747', 'step': 30072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:04.451620', 'step': 30072, 'epoch': 3}
{'type': 'loss', 'content': 0.034563034772872925, 'timestamp': '2025-10-02 01:05:04.454494', 'step': 30073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:04.533613', 'step': 30073, 'epoch': 3}
{'type': 'loss', 'content': 0.01613481715321541, 'timestamp': '2025-10-02 01:05:04.541029', 'step': 30074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 01:05:04.642208', 'step': 30074, 'epoch': 3}
{'type': 'loss', 'content': 0.027253838256001472, 'timestamp': '2025-10-02 01:05:04.657040', 'step': 30075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:04.729498', 'step': 30075, 'epoch': 3}
{'type': 'loss', 'content': 0.0021146454382687807, 'timestamp': '2025-10-02 01:05:04.736166', 'step': 30076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:04.801257', 'step': 30076, 'epoch': 3}
{'type': 'loss', 'content': 0.03432289510965347, 'timestamp': '2025-10-02 01:05:04.808848', 'step': 30077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:04.878169', 'step': 30077, 'epoch': 3}
{'type': 'loss', 'content': 0.051205892115831375, 'timestamp': '2025-10-02 01:05:04.887509', 'step': 30078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:04.954576', 'step': 30078, 'epoch': 3}
{'type': 'loss', 'content': 0.020297978073358536, 'timestamp': '2025-10-02 01:05:04.960419', 'step': 30079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:05.038851', 'step': 30079, 'epoch': 3}
{'type': 'loss', 'content': 0.033785704523324966, 'timestamp': '2025-10-02 01:05:05.051318', 'step': 30080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:05.106732', 'step': 30080, 'epoch': 3}
{'type': 'loss', 'content': 0.029245998710393906, 'timestamp': '2025-10-02 01:05:05.114278', 'step': 30081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:05.187001', 'step': 30081, 'epoch': 3}
{'type': 'loss', 'content': 0.052374161779880524, 'timestamp': '2025-10-02 01:05:05.195627', 'step': 30082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:05.268938', 'step': 30082, 'epoch': 3}
{'type': 'loss', 'content': 0.03260691463947296, 'timestamp': '2025-10-02 01:05:05.276371', 'step': 30083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:05:05.364910', 'step': 30083, 'epoch': 3}
{'type': 'loss', 'content': 0.028255755081772804, 'timestamp': '2025-10-02 01:05:05.378977', 'step': 30084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:05.438852', 'step': 30084, 'epoch': 3}
{'type': 'loss', 'content': 0.01966354437172413, 'timestamp': '2025-10-02 01:05:05.442397', 'step': 30085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:05.505813', 'step': 30085, 'epoch': 3}
{'type': 'loss', 'content': 0.045737236738204956, 'timestamp': '2025-10-02 01:05:05.511682', 'step': 30086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:05.568929', 'step': 30086, 'epoch': 3}
{'type': 'loss', 'content': 0.03753063827753067, 'timestamp': '2025-10-02 01:05:05.571763', 'step': 30087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:05:05.664398', 'step': 30087, 'epoch': 3}
{'type': 'loss', 'content': 0.0224867295473814, 'timestamp': '2025-10-02 01:05:05.677554', 'step': 30088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:05:05.746963', 'step': 30088, 'epoch': 3}
{'type': 'loss', 'content': 0.05288367718458176, 'timestamp': '2025-10-02 01:05:05.758752', 'step': 30089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:05.830897', 'step': 30089, 'epoch': 3}
{'type': 'loss', 'content': 0.02191167138516903, 'timestamp': '2025-10-02 01:05:05.838048', 'step': 30090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:05.910403', 'step': 30090, 'epoch': 3}
{'type': 'loss', 'content': 0.06250570714473724, 'timestamp': '2025-10-02 01:05:05.919884', 'step': 30091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:05.995379', 'step': 30091, 'epoch': 3}
{'type': 'loss', 'content': 0.03525976091623306, 'timestamp': '2025-10-02 01:05:06.007448', 'step': 30092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:06.082081', 'step': 30092, 'epoch': 3}
{'type': 'loss', 'content': 0.04482712596654892, 'timestamp': '2025-10-02 01:05:06.086553', 'step': 30093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:05:06.142208', 'step': 30093, 'epoch': 3}
{'type': 'loss', 'content': 0.06185399368405342, 'timestamp': '2025-10-02 01:05:06.145531', 'step': 30094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:06.202908', 'step': 30094, 'epoch': 3}
{'type': 'loss', 'content': 0.06134217977523804, 'timestamp': '2025-10-02 01:05:06.208959', 'step': 30095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:06.270141', 'step': 30095, 'epoch': 3}
{'type': 'loss', 'content': 0.02524912729859352, 'timestamp': '2025-10-02 01:05:06.277223', 'step': 30096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:06.335013', 'step': 30096, 'epoch': 3}
{'type': 'loss', 'content': 0.03092167153954506, 'timestamp': '2025-10-02 01:05:06.340664', 'step': 30097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:06.406719', 'step': 30097, 'epoch': 3}
{'type': 'loss', 'content': 0.09535618126392365, 'timestamp': '2025-10-02 01:05:06.410187', 'step': 30098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:06.472433', 'step': 30098, 'epoch': 3}
{'type': 'loss', 'content': 0.05137594789266586, 'timestamp': '2025-10-02 01:05:06.476675', 'step': 30099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:06.535929', 'step': 30099, 'epoch': 3}
{'type': 'loss', 'content': 0.032416801899671555, 'timestamp': '2025-10-02 01:05:06.543327', 'step': 30100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:06.611415', 'step': 30100, 'epoch': 3}
{'type': 'loss', 'content': 0.01701388694345951, 'timestamp': '2025-10-02 01:05:06.620369', 'step': 30101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:06.688186', 'step': 30101, 'epoch': 3}
{'type': 'loss', 'content': 0.017375780269503593, 'timestamp': '2025-10-02 01:05:06.691486', 'step': 30102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:06.747273', 'step': 30102, 'epoch': 3}
{'type': 'loss', 'content': 0.11598102003335953, 'timestamp': '2025-10-02 01:05:06.750894', 'step': 30103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:06.807370', 'step': 30103, 'epoch': 3}
{'type': 'loss', 'content': 0.019657127559185028, 'timestamp': '2025-10-02 01:05:06.815775', 'step': 30104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:06.870499', 'step': 30104, 'epoch': 3}
{'type': 'loss', 'content': 0.06570103019475937, 'timestamp': '2025-10-02 01:05:06.873900', 'step': 30105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:06.935933', 'step': 30105, 'epoch': 3}
{'type': 'loss', 'content': 0.054480381309986115, 'timestamp': '2025-10-02 01:05:06.938603', 'step': 30106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:07.003676', 'step': 30106, 'epoch': 3}
{'type': 'loss', 'content': 0.03720913454890251, 'timestamp': '2025-10-02 01:05:07.007131', 'step': 30107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:07.063194', 'step': 30107, 'epoch': 3}
{'type': 'loss', 'content': 0.016260651871562004, 'timestamp': '2025-10-02 01:05:07.071493', 'step': 30108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:07.127720', 'step': 30108, 'epoch': 3}
{'type': 'loss', 'content': 0.03512483835220337, 'timestamp': '2025-10-02 01:05:07.133271', 'step': 30109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:07.192171', 'step': 30109, 'epoch': 3}
{'type': 'loss', 'content': 0.017210086807608604, 'timestamp': '2025-10-02 01:05:07.195057', 'step': 30110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:07.254381', 'step': 30110, 'epoch': 3}
{'type': 'loss', 'content': 0.1418295055627823, 'timestamp': '2025-10-02 01:05:07.257334', 'step': 30111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:07.321583', 'step': 30111, 'epoch': 3}
{'type': 'loss', 'content': 0.014774754643440247, 'timestamp': '2025-10-02 01:05:07.328402', 'step': 30112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:07.385473', 'step': 30112, 'epoch': 3}
{'type': 'loss', 'content': 0.0370691679418087, 'timestamp': '2025-10-02 01:05:07.391524', 'step': 30113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:07.456924', 'step': 30113, 'epoch': 3}
{'type': 'loss', 'content': 0.024499209597706795, 'timestamp': '2025-10-02 01:05:07.460424', 'step': 30114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:07.515687', 'step': 30114, 'epoch': 3}
{'type': 'loss', 'content': 0.10443565249443054, 'timestamp': '2025-10-02 01:05:07.523037', 'step': 30115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:07.590880', 'step': 30115, 'epoch': 3}
{'type': 'loss', 'content': 0.060209278017282486, 'timestamp': '2025-10-02 01:05:07.597378', 'step': 30116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:07.655303', 'step': 30116, 'epoch': 3}
{'type': 'loss', 'content': 0.0076209306716918945, 'timestamp': '2025-10-02 01:05:07.665588', 'step': 30117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:07.727318', 'step': 30117, 'epoch': 3}
{'type': 'loss', 'content': 0.0008054290665313601, 'timestamp': '2025-10-02 01:05:07.736713', 'step': 30118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:07.806900', 'step': 30118, 'epoch': 3}
{'type': 'loss', 'content': 0.03788844496011734, 'timestamp': '2025-10-02 01:05:07.810247', 'step': 30119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:07.867247', 'step': 30119, 'epoch': 3}
{'type': 'loss', 'content': 0.06892549991607666, 'timestamp': '2025-10-02 01:05:07.874861', 'step': 30120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:07.931566', 'step': 30120, 'epoch': 3}
{'type': 'loss', 'content': 0.017145536839962006, 'timestamp': '2025-10-02 01:05:07.941212', 'step': 30121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:08.006777', 'step': 30121, 'epoch': 3}
{'type': 'loss', 'content': 0.018571224063634872, 'timestamp': '2025-10-02 01:05:08.010120', 'step': 30122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:08.071100', 'step': 30122, 'epoch': 3}
{'type': 'loss', 'content': 0.02212417498230934, 'timestamp': '2025-10-02 01:05:08.073704', 'step': 30123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:08.130432', 'step': 30123, 'epoch': 3}
{'type': 'loss', 'content': 0.09414634108543396, 'timestamp': '2025-10-02 01:05:08.138247', 'step': 30124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:08.203226', 'step': 30124, 'epoch': 3}
{'type': 'loss', 'content': 0.02177107334136963, 'timestamp': '2025-10-02 01:05:08.220295', 'step': 30125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:08.312863', 'step': 30125, 'epoch': 3}
{'type': 'loss', 'content': 0.12412624806165695, 'timestamp': '2025-10-02 01:05:08.316755', 'step': 30126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:08.380033', 'step': 30126, 'epoch': 3}
{'type': 'loss', 'content': 0.0006285347626544535, 'timestamp': '2025-10-02 01:05:08.389590', 'step': 30127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:08.463390', 'step': 30127, 'epoch': 3}
{'type': 'loss', 'content': 0.0599207878112793, 'timestamp': '2025-10-02 01:05:08.478490', 'step': 30128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:08.539932', 'step': 30128, 'epoch': 3}
{'type': 'loss', 'content': 0.01545498427003622, 'timestamp': '2025-10-02 01:05:08.548290', 'step': 30129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:08.622136', 'step': 30129, 'epoch': 3}
{'type': 'loss', 'content': 0.03444842994213104, 'timestamp': '2025-10-02 01:05:08.631572', 'step': 30130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:08.701684', 'step': 30130, 'epoch': 3}
{'type': 'loss', 'content': 0.047620102763175964, 'timestamp': '2025-10-02 01:05:08.712409', 'step': 30131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:08.781020', 'step': 30131, 'epoch': 3}
{'type': 'loss', 'content': 0.059474676847457886, 'timestamp': '2025-10-02 01:05:08.794410', 'step': 30132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:08.868080', 'step': 30132, 'epoch': 3}
{'type': 'loss', 'content': 0.06086644157767296, 'timestamp': '2025-10-02 01:05:08.874228', 'step': 30133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:08.951245', 'step': 30133, 'epoch': 3}
{'type': 'loss', 'content': 0.026587946340441704, 'timestamp': '2025-10-02 01:05:08.960784', 'step': 30134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:09.023171', 'step': 30134, 'epoch': 3}
{'type': 'loss', 'content': 0.015409098006784916, 'timestamp': '2025-10-02 01:05:09.032721', 'step': 30135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:09.104448', 'step': 30135, 'epoch': 3}
{'type': 'loss', 'content': 0.05699032172560692, 'timestamp': '2025-10-02 01:05:09.117568', 'step': 30136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:09.189129', 'step': 30136, 'epoch': 3}
{'type': 'loss', 'content': 0.03932773321866989, 'timestamp': '2025-10-02 01:05:09.191806', 'step': 30137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:09.257499', 'step': 30137, 'epoch': 3}
{'type': 'loss', 'content': 0.02834404818713665, 'timestamp': '2025-10-02 01:05:09.266883', 'step': 30138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:09.335744', 'step': 30138, 'epoch': 3}
{'type': 'loss', 'content': 0.06905629485845566, 'timestamp': '2025-10-02 01:05:09.342854', 'step': 30139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:09.416752', 'step': 30139, 'epoch': 3}
{'type': 'loss', 'content': 0.050475578755140305, 'timestamp': '2025-10-02 01:05:09.432000', 'step': 30140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:09.514193', 'step': 30140, 'epoch': 3}
{'type': 'loss', 'content': 0.09814590960741043, 'timestamp': '2025-10-02 01:05:09.518504', 'step': 30141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:09.594184', 'step': 30141, 'epoch': 3}
{'type': 'loss', 'content': 0.026393255218863487, 'timestamp': '2025-10-02 01:05:09.596962', 'step': 30142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:09.671636', 'step': 30142, 'epoch': 3}
{'type': 'loss', 'content': 0.017395393922924995, 'timestamp': '2025-10-02 01:05:09.679773', 'step': 30143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:09.752042', 'step': 30143, 'epoch': 3}
{'type': 'loss', 'content': 0.1792977899312973, 'timestamp': '2025-10-02 01:05:09.760240', 'step': 30144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:09.828021', 'step': 30144, 'epoch': 3}
{'type': 'loss', 'content': 0.09287501871585846, 'timestamp': '2025-10-02 01:05:09.836746', 'step': 30145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:09.904302', 'step': 30145, 'epoch': 3}
{'type': 'loss', 'content': 0.08654902130365372, 'timestamp': '2025-10-02 01:05:09.909617', 'step': 30146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:09.973390', 'step': 30146, 'epoch': 3}
{'type': 'loss', 'content': 0.042298220098018646, 'timestamp': '2025-10-02 01:05:09.978612', 'step': 30147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:10.046559', 'step': 30147, 'epoch': 3}
{'type': 'loss', 'content': 0.06049380078911781, 'timestamp': '2025-10-02 01:05:10.056501', 'step': 30148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:10.113792', 'step': 30148, 'epoch': 3}
{'type': 'loss', 'content': 0.0007732946542091668, 'timestamp': '2025-10-02 01:05:10.116339', 'step': 30149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:10.177504', 'step': 30149, 'epoch': 3}
{'type': 'loss', 'content': 0.02464853599667549, 'timestamp': '2025-10-02 01:05:10.183087', 'step': 30150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:10.247843', 'step': 30150, 'epoch': 3}
{'type': 'loss', 'content': 0.1247304156422615, 'timestamp': '2025-10-02 01:05:10.253164', 'step': 30151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:10.319340', 'step': 30151, 'epoch': 3}
{'type': 'loss', 'content': 0.005428910721093416, 'timestamp': '2025-10-02 01:05:10.329480', 'step': 30152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:10.389805', 'step': 30152, 'epoch': 3}
{'type': 'loss', 'content': 0.055859945714473724, 'timestamp': '2025-10-02 01:05:10.392916', 'step': 30153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:10.459289', 'step': 30153, 'epoch': 3}
{'type': 'loss', 'content': 0.028709521517157555, 'timestamp': '2025-10-02 01:05:10.462642', 'step': 30154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:05:10.539583', 'step': 30154, 'epoch': 3}
{'type': 'loss', 'content': 0.02501729503273964, 'timestamp': '2025-10-02 01:05:10.550416', 'step': 30155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:10.608515', 'step': 30155, 'epoch': 3}
{'type': 'loss', 'content': 0.07613015919923782, 'timestamp': '2025-10-02 01:05:10.620267', 'step': 30156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:10.689550', 'step': 30156, 'epoch': 3}
{'type': 'loss', 'content': 0.014834672212600708, 'timestamp': '2025-10-02 01:05:10.696118', 'step': 30157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:10.765858', 'step': 30157, 'epoch': 3}
{'type': 'loss', 'content': 0.012933151796460152, 'timestamp': '2025-10-02 01:05:10.769774', 'step': 30158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:10.826216', 'step': 30158, 'epoch': 3}
{'type': 'loss', 'content': 0.09637138247489929, 'timestamp': '2025-10-02 01:05:10.833671', 'step': 30159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:10.905612', 'step': 30159, 'epoch': 3}
{'type': 'loss', 'content': 0.012202606536448002, 'timestamp': '2025-10-02 01:05:10.921102', 'step': 30160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:10.990208', 'step': 30160, 'epoch': 3}
{'type': 'loss', 'content': 0.01922822929918766, 'timestamp': '2025-10-02 01:05:10.993693', 'step': 30161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:11.050015', 'step': 30161, 'epoch': 3}
{'type': 'loss', 'content': 0.10409865528345108, 'timestamp': '2025-10-02 01:05:11.053455', 'step': 30162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:11.125855', 'step': 30162, 'epoch': 3}
{'type': 'loss', 'content': 0.03591414913535118, 'timestamp': '2025-10-02 01:05:11.136223', 'step': 30163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:11.211673', 'step': 30163, 'epoch': 3}
{'type': 'loss', 'content': 0.09315107762813568, 'timestamp': '2025-10-02 01:05:11.229521', 'step': 30164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:11.304398', 'step': 30164, 'epoch': 3}
{'type': 'loss', 'content': 0.0283705722540617, 'timestamp': '2025-10-02 01:05:11.307163', 'step': 30165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:11.378703', 'step': 30165, 'epoch': 3}
{'type': 'loss', 'content': 0.04459548369050026, 'timestamp': '2025-10-02 01:05:11.386156', 'step': 30166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:11.464173', 'step': 30166, 'epoch': 3}
{'type': 'loss', 'content': 0.06118560954928398, 'timestamp': '2025-10-02 01:05:11.474337', 'step': 30167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:11.545619', 'step': 30167, 'epoch': 3}
{'type': 'loss', 'content': 0.012343542650341988, 'timestamp': '2025-10-02 01:05:11.556405', 'step': 30168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:11.624782', 'step': 30168, 'epoch': 3}
{'type': 'loss', 'content': 0.051971662789583206, 'timestamp': '2025-10-02 01:05:11.634237', 'step': 30169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:11.709011', 'step': 30169, 'epoch': 3}
{'type': 'loss', 'content': 0.010932325385510921, 'timestamp': '2025-10-02 01:05:11.719545', 'step': 30170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:11.787806', 'step': 30170, 'epoch': 3}
{'type': 'loss', 'content': 0.0283786840736866, 'timestamp': '2025-10-02 01:05:11.791471', 'step': 30171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:05:11.884940', 'step': 30171, 'epoch': 3}
{'type': 'loss', 'content': 0.036938007920980453, 'timestamp': '2025-10-02 01:05:11.900141', 'step': 30172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:05:11.980670', 'step': 30172, 'epoch': 3}
{'type': 'loss', 'content': 0.03118632361292839, 'timestamp': '2025-10-02 01:05:11.994087', 'step': 30173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:12.063367', 'step': 30173, 'epoch': 3}
{'type': 'loss', 'content': 0.05639001354575157, 'timestamp': '2025-10-02 01:05:12.072013', 'step': 30174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:12.141114', 'step': 30174, 'epoch': 3}
{'type': 'loss', 'content': 0.09841109067201614, 'timestamp': '2025-10-02 01:05:12.148595', 'step': 30175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:12.220327', 'step': 30175, 'epoch': 3}
{'type': 'loss', 'content': 0.03834574297070503, 'timestamp': '2025-10-02 01:05:12.231052', 'step': 30176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:12.298745', 'step': 30176, 'epoch': 3}
{'type': 'loss', 'content': 0.11401355266571045, 'timestamp': '2025-10-02 01:05:12.301188', 'step': 30177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:12.355751', 'step': 30177, 'epoch': 3}
{'type': 'loss', 'content': 0.08073022961616516, 'timestamp': '2025-10-02 01:05:12.358290', 'step': 30178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:12.413440', 'step': 30178, 'epoch': 3}
{'type': 'loss', 'content': 0.14071761071681976, 'timestamp': '2025-10-02 01:05:12.415973', 'step': 30179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:05:12.488513', 'step': 30179, 'epoch': 3}
{'type': 'loss', 'content': 0.00697492528706789, 'timestamp': '2025-10-02 01:05:12.501599', 'step': 30180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:12.555988', 'step': 30180, 'epoch': 3}
{'type': 'loss', 'content': 0.04141204059123993, 'timestamp': '2025-10-02 01:05:12.561505', 'step': 30181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:12.616961', 'step': 30181, 'epoch': 3}
{'type': 'loss', 'content': 0.0531766451895237, 'timestamp': '2025-10-02 01:05:12.619492', 'step': 30182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:12.675826', 'step': 30182, 'epoch': 3}
{'type': 'loss', 'content': 0.031764671206474304, 'timestamp': '2025-10-02 01:05:12.682211', 'step': 30183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:12.742518', 'step': 30183, 'epoch': 3}
{'type': 'loss', 'content': 0.05664624273777008, 'timestamp': '2025-10-02 01:05:12.753432', 'step': 30184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:12.810495', 'step': 30184, 'epoch': 3}
{'type': 'loss', 'content': 0.0009830499766394496, 'timestamp': '2025-10-02 01:05:12.816456', 'step': 30185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:12.871941', 'step': 30185, 'epoch': 3}
{'type': 'loss', 'content': 0.034727998077869415, 'timestamp': '2025-10-02 01:05:12.874433', 'step': 30186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:12.937323', 'step': 30186, 'epoch': 3}
{'type': 'loss', 'content': 0.04801403731107712, 'timestamp': '2025-10-02 01:05:12.947749', 'step': 30187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:13.006035', 'step': 30187, 'epoch': 3}
{'type': 'loss', 'content': 0.04840967431664467, 'timestamp': '2025-10-02 01:05:13.014970', 'step': 30188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:13.072646', 'step': 30188, 'epoch': 3}
{'type': 'loss', 'content': 0.08064602315425873, 'timestamp': '2025-10-02 01:05:13.075871', 'step': 30189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:13.141610', 'step': 30189, 'epoch': 3}
{'type': 'loss', 'content': 0.023777518421411514, 'timestamp': '2025-10-02 01:05:13.152074', 'step': 30190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:13.218213', 'step': 30190, 'epoch': 3}
{'type': 'loss', 'content': 0.07225999981164932, 'timestamp': '2025-10-02 01:05:13.223631', 'step': 30191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:13.288350', 'step': 30191, 'epoch': 3}
{'type': 'loss', 'content': 0.06839437037706375, 'timestamp': '2025-10-02 01:05:13.295617', 'step': 30192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:13.352803', 'step': 30192, 'epoch': 3}
{'type': 'loss', 'content': 0.039354026317596436, 'timestamp': '2025-10-02 01:05:13.355335', 'step': 30193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:13.410893', 'step': 30193, 'epoch': 3}
{'type': 'loss', 'content': 0.16223271191120148, 'timestamp': '2025-10-02 01:05:13.414612', 'step': 30194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:13.480056', 'step': 30194, 'epoch': 3}
{'type': 'loss', 'content': 0.01767350547015667, 'timestamp': '2025-10-02 01:05:13.490565', 'step': 30195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:13.553725', 'step': 30195, 'epoch': 3}
{'type': 'loss', 'content': 0.016411742195487022, 'timestamp': '2025-10-02 01:05:13.564976', 'step': 30196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:13.619056', 'step': 30196, 'epoch': 3}
{'type': 'loss', 'content': 0.026205088943243027, 'timestamp': '2025-10-02 01:05:13.626467', 'step': 30197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:13.686513', 'step': 30197, 'epoch': 3}
{'type': 'loss', 'content': 0.016799578443169594, 'timestamp': '2025-10-02 01:05:13.696666', 'step': 30198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:13.754335', 'step': 30198, 'epoch': 3}
{'type': 'loss', 'content': 0.06721030175685883, 'timestamp': '2025-10-02 01:05:13.757054', 'step': 30199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:05:13.820223', 'step': 30199, 'epoch': 3}
{'type': 'loss', 'content': 0.004625321365892887, 'timestamp': '2025-10-02 01:05:13.831841', 'step': 30200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:13.885342', 'step': 30200, 'epoch': 3}
{'type': 'loss', 'content': 0.0278877355158329, 'timestamp': '2025-10-02 01:05:13.888852', 'step': 30201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:13.944694', 'step': 30201, 'epoch': 3}
{'type': 'loss', 'content': 0.037353746592998505, 'timestamp': '2025-10-02 01:05:13.947081', 'step': 30202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:05:14.003572', 'step': 30202, 'epoch': 3}
{'type': 'loss', 'content': 0.007599928416311741, 'timestamp': '2025-10-02 01:05:14.006227', 'step': 30203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:14.061217', 'step': 30203, 'epoch': 3}
{'type': 'loss', 'content': 0.02432936243712902, 'timestamp': '2025-10-02 01:05:14.067249', 'step': 30204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:14.122080', 'step': 30204, 'epoch': 3}
{'type': 'loss', 'content': 0.055499617010354996, 'timestamp': '2025-10-02 01:05:14.127430', 'step': 30205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:14.182369', 'step': 30205, 'epoch': 3}
{'type': 'loss', 'content': 0.03307289257645607, 'timestamp': '2025-10-02 01:05:14.185794', 'step': 30206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:14.242935', 'step': 30206, 'epoch': 3}
{'type': 'loss', 'content': 0.021579179912805557, 'timestamp': '2025-10-02 01:05:14.245402', 'step': 30207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:14.300460', 'step': 30207, 'epoch': 3}
{'type': 'loss', 'content': 0.014343587681651115, 'timestamp': '2025-10-02 01:05:14.306895', 'step': 30208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:14.360687', 'step': 30208, 'epoch': 3}
{'type': 'loss', 'content': 0.013498829677700996, 'timestamp': '2025-10-02 01:05:14.367586', 'step': 30209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:14.426675', 'step': 30209, 'epoch': 3}
{'type': 'loss', 'content': 0.00978170521557331, 'timestamp': '2025-10-02 01:05:14.436849', 'step': 30210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:14.492145', 'step': 30210, 'epoch': 3}
{'type': 'loss', 'content': 0.03512701764702797, 'timestamp': '2025-10-02 01:05:14.499606', 'step': 30211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:14.555103', 'step': 30211, 'epoch': 3}
{'type': 'loss', 'content': 0.005971780978143215, 'timestamp': '2025-10-02 01:05:14.561416', 'step': 30212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:14.615326', 'step': 30212, 'epoch': 3}
{'type': 'loss', 'content': 0.006996760610491037, 'timestamp': '2025-10-02 01:05:14.622572', 'step': 30213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:14.677223', 'step': 30213, 'epoch': 3}
{'type': 'loss', 'content': 0.060906585305929184, 'timestamp': '2025-10-02 01:05:14.680126', 'step': 30214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:14.734874', 'step': 30214, 'epoch': 3}
{'type': 'loss', 'content': 0.033882200717926025, 'timestamp': '2025-10-02 01:05:14.737348', 'step': 30215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:14.792603', 'step': 30215, 'epoch': 3}
{'type': 'loss', 'content': 0.0207732655107975, 'timestamp': '2025-10-02 01:05:14.800751', 'step': 30216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:14.854979', 'step': 30216, 'epoch': 3}
{'type': 'loss', 'content': 0.05389181524515152, 'timestamp': '2025-10-02 01:05:14.857506', 'step': 30217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:14.911786', 'step': 30217, 'epoch': 3}
{'type': 'loss', 'content': 0.045220233500003815, 'timestamp': '2025-10-02 01:05:14.917621', 'step': 30218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:14.972734', 'step': 30218, 'epoch': 3}
{'type': 'loss', 'content': 0.04890654236078262, 'timestamp': '2025-10-02 01:05:14.975359', 'step': 30219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:15.037852', 'step': 30219, 'epoch': 3}
{'type': 'loss', 'content': 0.014428065158426762, 'timestamp': '2025-10-02 01:05:15.049316', 'step': 30220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:15.103342', 'step': 30220, 'epoch': 3}
{'type': 'loss', 'content': 0.05558113381266594, 'timestamp': '2025-10-02 01:05:15.105740', 'step': 30221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:15.159104', 'step': 30221, 'epoch': 3}
{'type': 'loss', 'content': 0.14781665802001953, 'timestamp': '2025-10-02 01:05:15.162231', 'step': 30222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:15.216282', 'step': 30222, 'epoch': 3}
{'type': 'loss', 'content': 0.045004457235336304, 'timestamp': '2025-10-02 01:05:15.218509', 'step': 30223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:15.272547', 'step': 30223, 'epoch': 3}
{'type': 'loss', 'content': 0.019314931705594063, 'timestamp': '2025-10-02 01:05:15.278578', 'step': 30224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:15.334091', 'step': 30224, 'epoch': 3}
{'type': 'loss', 'content': 0.021710028871893883, 'timestamp': '2025-10-02 01:05:15.336543', 'step': 30225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:15.390783', 'step': 30225, 'epoch': 3}
{'type': 'loss', 'content': 0.05049796402454376, 'timestamp': '2025-10-02 01:05:15.396381', 'step': 30226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:15.465871', 'step': 30226, 'epoch': 3}
{'type': 'loss', 'content': 0.11196399480104446, 'timestamp': '2025-10-02 01:05:15.468618', 'step': 30227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:15.523678', 'step': 30227, 'epoch': 3}
{'type': 'loss', 'content': 0.02343287505209446, 'timestamp': '2025-10-02 01:05:15.531892', 'step': 30228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:15.587263', 'step': 30228, 'epoch': 3}
{'type': 'loss', 'content': 0.037774451076984406, 'timestamp': '2025-10-02 01:05:15.589693', 'step': 30229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:15.643404', 'step': 30229, 'epoch': 3}
{'type': 'loss', 'content': 0.07798369973897934, 'timestamp': '2025-10-02 01:05:15.646013', 'step': 30230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:15.700528', 'step': 30230, 'epoch': 3}
{'type': 'loss', 'content': 0.08627428859472275, 'timestamp': '2025-10-02 01:05:15.703423', 'step': 30231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:15.759351', 'step': 30231, 'epoch': 3}
{'type': 'loss', 'content': 0.06969314813613892, 'timestamp': '2025-10-02 01:05:15.765468', 'step': 30232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:15.819376', 'step': 30232, 'epoch': 3}
{'type': 'loss', 'content': 0.02962842583656311, 'timestamp': '2025-10-02 01:05:15.826973', 'step': 30233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:15.888230', 'step': 30233, 'epoch': 3}
{'type': 'loss', 'content': 0.11401211470365524, 'timestamp': '2025-10-02 01:05:15.898674', 'step': 30234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:05:15.970386', 'step': 30234, 'epoch': 3}
{'type': 'loss', 'content': 0.05040515586733818, 'timestamp': '2025-10-02 01:05:15.982726', 'step': 30235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:16.037379', 'step': 30235, 'epoch': 3}
{'type': 'loss', 'content': 0.04560941085219383, 'timestamp': '2025-10-02 01:05:16.043677', 'step': 30236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:16.097965', 'step': 30236, 'epoch': 3}
{'type': 'loss', 'content': 0.026559973135590553, 'timestamp': '2025-10-02 01:05:16.100364', 'step': 30237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:16.163831', 'step': 30237, 'epoch': 3}
{'type': 'loss', 'content': 0.0003568786196410656, 'timestamp': '2025-10-02 01:05:16.174052', 'step': 30238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:16.229494', 'step': 30238, 'epoch': 3}
{'type': 'loss', 'content': 0.015071634203195572, 'timestamp': '2025-10-02 01:05:16.231889', 'step': 30239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:16.287111', 'step': 30239, 'epoch': 3}
{'type': 'loss', 'content': 0.022759823128581047, 'timestamp': '2025-10-02 01:05:16.294445', 'step': 30240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:16.354472', 'step': 30240, 'epoch': 3}
{'type': 'loss', 'content': 0.06301495432853699, 'timestamp': '2025-10-02 01:05:16.365463', 'step': 30241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:16.420531', 'step': 30241, 'epoch': 3}
{'type': 'loss', 'content': 0.019270669668912888, 'timestamp': '2025-10-02 01:05:16.423142', 'step': 30242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:16.478381', 'step': 30242, 'epoch': 3}
{'type': 'loss', 'content': 0.0586334727704525, 'timestamp': '2025-10-02 01:05:16.481148', 'step': 30243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:16.536435', 'step': 30243, 'epoch': 3}
{'type': 'loss', 'content': 0.06605549901723862, 'timestamp': '2025-10-02 01:05:16.544339', 'step': 30244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:16.598700', 'step': 30244, 'epoch': 3}
{'type': 'loss', 'content': 0.01714947260916233, 'timestamp': '2025-10-02 01:05:16.607824', 'step': 30245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:16.662999', 'step': 30245, 'epoch': 3}
{'type': 'loss', 'content': 0.10355247557163239, 'timestamp': '2025-10-02 01:05:16.665326', 'step': 30246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:16.720336', 'step': 30246, 'epoch': 3}
{'type': 'loss', 'content': 0.036354485899209976, 'timestamp': '2025-10-02 01:05:16.722996', 'step': 30247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:16.777744', 'step': 30247, 'epoch': 3}
{'type': 'loss', 'content': 0.008941209875047207, 'timestamp': '2025-10-02 01:05:16.783382', 'step': 30248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:16.838089', 'step': 30248, 'epoch': 3}
{'type': 'loss', 'content': 0.03669282793998718, 'timestamp': '2025-10-02 01:05:16.848369', 'step': 30249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:05:16.917503', 'step': 30249, 'epoch': 3}
{'type': 'loss', 'content': 0.007204507011920214, 'timestamp': '2025-10-02 01:05:16.929488', 'step': 30250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:16.985645', 'step': 30250, 'epoch': 3}
{'type': 'loss', 'content': 0.06892003864049911, 'timestamp': '2025-10-02 01:05:16.988250', 'step': 30251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:17.042361', 'step': 30251, 'epoch': 3}
{'type': 'loss', 'content': 0.1068778857588768, 'timestamp': '2025-10-02 01:05:17.048685', 'step': 30252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:17.102464', 'step': 30252, 'epoch': 3}
{'type': 'loss', 'content': 0.05336738005280495, 'timestamp': '2025-10-02 01:05:17.109900', 'step': 30253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:17.163576', 'step': 30253, 'epoch': 3}
{'type': 'loss', 'content': 0.05276617780327797, 'timestamp': '2025-10-02 01:05:17.166280', 'step': 30254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:17.227993', 'step': 30254, 'epoch': 3}
{'type': 'loss', 'content': 0.04566674306988716, 'timestamp': '2025-10-02 01:05:17.238466', 'step': 30255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:17.293509', 'step': 30255, 'epoch': 3}
{'type': 'loss', 'content': 0.02805524691939354, 'timestamp': '2025-10-02 01:05:17.299645', 'step': 30256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:17.353793', 'step': 30256, 'epoch': 3}
{'type': 'loss', 'content': 0.029959499835968018, 'timestamp': '2025-10-02 01:05:17.364054', 'step': 30257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:17.445918', 'step': 30257, 'epoch': 3}
{'type': 'loss', 'content': 0.06300054490566254, 'timestamp': '2025-10-02 01:05:17.454979', 'step': 30258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:17.533905', 'step': 30258, 'epoch': 3}
{'type': 'loss', 'content': 0.025903256610035896, 'timestamp': '2025-10-02 01:05:17.545686', 'step': 30259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:17.626096', 'step': 30259, 'epoch': 3}
{'type': 'loss', 'content': 0.02096925489604473, 'timestamp': '2025-10-02 01:05:17.641498', 'step': 30260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:17.718488', 'step': 30260, 'epoch': 3}
{'type': 'loss', 'content': 0.07357779145240784, 'timestamp': '2025-10-02 01:05:17.731493', 'step': 30261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:17.805086', 'step': 30261, 'epoch': 3}
{'type': 'loss', 'content': 0.007818326354026794, 'timestamp': '2025-10-02 01:05:17.820803', 'step': 30262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:17.907694', 'step': 30262, 'epoch': 3}
{'type': 'loss', 'content': 0.06037350371479988, 'timestamp': '2025-10-02 01:05:17.914388', 'step': 30263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:17.998787', 'step': 30263, 'epoch': 3}
{'type': 'loss', 'content': 0.028833238407969475, 'timestamp': '2025-10-02 01:05:18.008546', 'step': 30264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:18.085058', 'step': 30264, 'epoch': 3}
{'type': 'loss', 'content': 0.0022529042325913906, 'timestamp': '2025-10-02 01:05:18.092787', 'step': 30265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:18.177529', 'step': 30265, 'epoch': 3}
{'type': 'loss', 'content': 0.032753366976976395, 'timestamp': '2025-10-02 01:05:18.184801', 'step': 30266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:18.251275', 'step': 30266, 'epoch': 3}
{'type': 'loss', 'content': 0.1024727001786232, 'timestamp': '2025-10-02 01:05:18.259121', 'step': 30267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:18.331664', 'step': 30267, 'epoch': 3}
{'type': 'loss', 'content': 0.05539242923259735, 'timestamp': '2025-10-02 01:05:18.343061', 'step': 30268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:18.429863', 'step': 30268, 'epoch': 3}
{'type': 'loss', 'content': 0.058470822870731354, 'timestamp': '2025-10-02 01:05:18.437556', 'step': 30269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:18.508826', 'step': 30269, 'epoch': 3}
{'type': 'loss', 'content': 0.013033603318035603, 'timestamp': '2025-10-02 01:05:18.524961', 'step': 30270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:18.594190', 'step': 30270, 'epoch': 3}
{'type': 'loss', 'content': 0.1261696070432663, 'timestamp': '2025-10-02 01:05:18.596748', 'step': 30271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 01:05:18.678884', 'step': 30271, 'epoch': 3}
{'type': 'loss', 'content': 0.03434247523546219, 'timestamp': '2025-10-02 01:05:18.694487', 'step': 30272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:18.748669', 'step': 30272, 'epoch': 3}
{'type': 'loss', 'content': 0.03892600163817406, 'timestamp': '2025-10-02 01:05:18.751341', 'step': 30273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:18.805786', 'step': 30273, 'epoch': 3}
{'type': 'loss', 'content': 0.0926389992237091, 'timestamp': '2025-10-02 01:05:18.808380', 'step': 30274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:18.870109', 'step': 30274, 'epoch': 3}
{'type': 'loss', 'content': 0.0039379489608109, 'timestamp': '2025-10-02 01:05:18.880557', 'step': 30275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:18.936326', 'step': 30275, 'epoch': 3}
{'type': 'loss', 'content': 0.0339188352227211, 'timestamp': '2025-10-02 01:05:18.946613', 'step': 30276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:19.003726', 'step': 30276, 'epoch': 3}
{'type': 'loss', 'content': 0.025998590514063835, 'timestamp': '2025-10-02 01:05:19.006001', 'step': 30277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:19.061150', 'step': 30277, 'epoch': 3}
{'type': 'loss', 'content': 0.0809306874871254, 'timestamp': '2025-10-02 01:05:19.063519', 'step': 30278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:19.118537', 'step': 30278, 'epoch': 3}
{'type': 'loss', 'content': 0.029519813135266304, 'timestamp': '2025-10-02 01:05:19.121114', 'step': 30279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:19.178057', 'step': 30279, 'epoch': 3}
{'type': 'loss', 'content': 0.012318914756178856, 'timestamp': '2025-10-02 01:05:19.185183', 'step': 30280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:19.239962', 'step': 30280, 'epoch': 3}
{'type': 'loss', 'content': 0.06626784801483154, 'timestamp': '2025-10-02 01:05:19.245291', 'step': 30281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:19.300668', 'step': 30281, 'epoch': 3}
{'type': 'loss', 'content': 0.06753074377775192, 'timestamp': '2025-10-02 01:05:19.303352', 'step': 30282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:19.363810', 'step': 30282, 'epoch': 3}
{'type': 'loss', 'content': 0.02445104531943798, 'timestamp': '2025-10-02 01:05:19.373861', 'step': 30283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:19.429322', 'step': 30283, 'epoch': 3}
{'type': 'loss', 'content': 0.0898551270365715, 'timestamp': '2025-10-02 01:05:19.435999', 'step': 30284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:19.489730', 'step': 30284, 'epoch': 3}
{'type': 'loss', 'content': 0.05968344211578369, 'timestamp': '2025-10-02 01:05:19.492450', 'step': 30285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:19.547169', 'step': 30285, 'epoch': 3}
{'type': 'loss', 'content': 0.046961184591054916, 'timestamp': '2025-10-02 01:05:19.552740', 'step': 30286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:19.608884', 'step': 30286, 'epoch': 3}
{'type': 'loss', 'content': 0.014075130224227905, 'timestamp': '2025-10-02 01:05:19.615731', 'step': 30287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:19.670691', 'step': 30287, 'epoch': 3}
{'type': 'loss', 'content': 0.05184068903326988, 'timestamp': '2025-10-02 01:05:19.680475', 'step': 30288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:19.734661', 'step': 30288, 'epoch': 3}
{'type': 'loss', 'content': 0.06971912086009979, 'timestamp': '2025-10-02 01:05:19.736937', 'step': 30289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:19.792370', 'step': 30289, 'epoch': 3}
{'type': 'loss', 'content': 0.030626492574810982, 'timestamp': '2025-10-02 01:05:19.796510', 'step': 30290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:19.852200', 'step': 30290, 'epoch': 3}
{'type': 'loss', 'content': 0.017428921535611153, 'timestamp': '2025-10-02 01:05:19.857573', 'step': 30291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:19.913595', 'step': 30291, 'epoch': 3}
{'type': 'loss', 'content': 0.05272925645112991, 'timestamp': '2025-10-02 01:05:19.919519', 'step': 30292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:19.973217', 'step': 30292, 'epoch': 3}
{'type': 'loss', 'content': 0.12635715305805206, 'timestamp': '2025-10-02 01:05:19.976648', 'step': 30293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:20.030726', 'step': 30293, 'epoch': 3}
{'type': 'loss', 'content': 0.044676411896944046, 'timestamp': '2025-10-02 01:05:20.033349', 'step': 30294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:20.089532', 'step': 30294, 'epoch': 3}
{'type': 'loss', 'content': 0.0009944941848516464, 'timestamp': '2025-10-02 01:05:20.099094', 'step': 30295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:20.155767', 'step': 30295, 'epoch': 3}
{'type': 'loss', 'content': 0.033167000859975815, 'timestamp': '2025-10-02 01:05:20.161882', 'step': 30296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:20.216157', 'step': 30296, 'epoch': 3}
{'type': 'loss', 'content': 0.03989130258560181, 'timestamp': '2025-10-02 01:05:20.218603', 'step': 30297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:20.272906', 'step': 30297, 'epoch': 3}
{'type': 'loss', 'content': 0.09699737280607224, 'timestamp': '2025-10-02 01:05:20.275383', 'step': 30298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:20.330549', 'step': 30298, 'epoch': 3}
{'type': 'loss', 'content': 0.03184228017926216, 'timestamp': '2025-10-02 01:05:20.332717', 'step': 30299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:20.388444', 'step': 30299, 'epoch': 3}
{'type': 'loss', 'content': 0.02266787365078926, 'timestamp': '2025-10-02 01:05:20.394769', 'step': 30300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:05:20.462171', 'step': 30300, 'epoch': 3}
{'type': 'loss', 'content': 0.023894788697361946, 'timestamp': '2025-10-02 01:05:20.475109', 'step': 30301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:20.530118', 'step': 30301, 'epoch': 3}
{'type': 'loss', 'content': 0.01949567161500454, 'timestamp': '2025-10-02 01:05:20.536880', 'step': 30302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:20.592347', 'step': 30302, 'epoch': 3}
{'type': 'loss', 'content': 0.045690931379795074, 'timestamp': '2025-10-02 01:05:20.598040', 'step': 30303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:20.654075', 'step': 30303, 'epoch': 3}
{'type': 'loss', 'content': 0.020308179780840874, 'timestamp': '2025-10-02 01:05:20.663978', 'step': 30304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:05:20.717937', 'step': 30304, 'epoch': 3}
{'type': 'loss', 'content': 0.026652880012989044, 'timestamp': '2025-10-02 01:05:20.720511', 'step': 30305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:20.775683', 'step': 30305, 'epoch': 3}
{'type': 'loss', 'content': 0.03576769307255745, 'timestamp': '2025-10-02 01:05:20.778049', 'step': 30306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:20.833411', 'step': 30306, 'epoch': 3}
{'type': 'loss', 'content': 0.02392573282122612, 'timestamp': '2025-10-02 01:05:20.835942', 'step': 30307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:20.890318', 'step': 30307, 'epoch': 3}
{'type': 'loss', 'content': 0.005015886854380369, 'timestamp': '2025-10-02 01:05:20.896191', 'step': 30308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:20.949332', 'step': 30308, 'epoch': 3}
{'type': 'loss', 'content': 0.051121678203344345, 'timestamp': '2025-10-02 01:05:20.951679', 'step': 30309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:21.005885', 'step': 30309, 'epoch': 3}
{'type': 'loss', 'content': 0.0467144176363945, 'timestamp': '2025-10-02 01:05:21.015254', 'step': 30310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:21.069644', 'step': 30310, 'epoch': 3}
{'type': 'loss', 'content': 0.009851156733930111, 'timestamp': '2025-10-02 01:05:21.075251', 'step': 30311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:21.129374', 'step': 30311, 'epoch': 3}
{'type': 'loss', 'content': 0.0008766756509430707, 'timestamp': '2025-10-02 01:05:21.135551', 'step': 30312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:21.190822', 'step': 30312, 'epoch': 3}
{'type': 'loss', 'content': 0.02012067846953869, 'timestamp': '2025-10-02 01:05:21.198247', 'step': 30313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:21.252790', 'step': 30313, 'epoch': 3}
{'type': 'loss', 'content': 0.03230217471718788, 'timestamp': '2025-10-02 01:05:21.255434', 'step': 30314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:21.309655', 'step': 30314, 'epoch': 3}
{'type': 'loss', 'content': 0.042731184512376785, 'timestamp': '2025-10-02 01:05:21.312241', 'step': 30315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:21.366953', 'step': 30315, 'epoch': 3}
{'type': 'loss', 'content': 0.01862463168799877, 'timestamp': '2025-10-02 01:05:21.372853', 'step': 30316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:21.427311', 'step': 30316, 'epoch': 3}
{'type': 'loss', 'content': 0.025200866162776947, 'timestamp': '2025-10-02 01:05:21.436579', 'step': 30317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:21.491631', 'step': 30317, 'epoch': 3}
{'type': 'loss', 'content': 0.03317508101463318, 'timestamp': '2025-10-02 01:05:21.497210', 'step': 30318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:05:21.570992', 'step': 30318, 'epoch': 3}
{'type': 'loss', 'content': 0.012332797981798649, 'timestamp': '2025-10-02 01:05:21.584155', 'step': 30319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:21.638035', 'step': 30319, 'epoch': 3}
{'type': 'loss', 'content': 0.04667873680591583, 'timestamp': '2025-10-02 01:05:21.644303', 'step': 30320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:21.700053', 'step': 30320, 'epoch': 3}
{'type': 'loss', 'content': 0.004225894808769226, 'timestamp': '2025-10-02 01:05:21.702784', 'step': 30321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:05:21.765511', 'step': 30321, 'epoch': 3}
{'type': 'loss', 'content': 0.020779835060238838, 'timestamp': '2025-10-02 01:05:21.776350', 'step': 30322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:21.833348', 'step': 30322, 'epoch': 3}
{'type': 'loss', 'content': 0.02838471531867981, 'timestamp': '2025-10-02 01:05:21.840597', 'step': 30323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:21.897067', 'step': 30323, 'epoch': 3}
{'type': 'loss', 'content': 0.027759842574596405, 'timestamp': '2025-10-02 01:05:21.904568', 'step': 30324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:21.960249', 'step': 30324, 'epoch': 3}
{'type': 'loss', 'content': 0.04183661565184593, 'timestamp': '2025-10-02 01:05:21.963237', 'step': 30325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:22.019322', 'step': 30325, 'epoch': 3}
{'type': 'loss', 'content': 0.1395878791809082, 'timestamp': '2025-10-02 01:05:22.022572', 'step': 30326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:22.079980', 'step': 30326, 'epoch': 3}
{'type': 'loss', 'content': 0.0591314472258091, 'timestamp': '2025-10-02 01:05:22.083349', 'step': 30327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:22.139278', 'step': 30327, 'epoch': 3}
{'type': 'loss', 'content': 0.037274062633514404, 'timestamp': '2025-10-02 01:05:22.146339', 'step': 30328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:22.203721', 'step': 30328, 'epoch': 3}
{'type': 'loss', 'content': 0.08889506012201309, 'timestamp': '2025-10-02 01:05:22.206478', 'step': 30329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:22.263224', 'step': 30329, 'epoch': 3}
{'type': 'loss', 'content': 0.03194482997059822, 'timestamp': '2025-10-02 01:05:22.266887', 'step': 30330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:22.328519', 'step': 30330, 'epoch': 3}
{'type': 'loss', 'content': 0.03663629665970802, 'timestamp': '2025-10-02 01:05:22.338696', 'step': 30331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:05:22.408521', 'step': 30331, 'epoch': 3}
{'type': 'loss', 'content': 0.033122338354587555, 'timestamp': '2025-10-02 01:05:22.421237', 'step': 30332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:22.481608', 'step': 30332, 'epoch': 3}
{'type': 'loss', 'content': 0.0350036546587944, 'timestamp': '2025-10-02 01:05:22.492533', 'step': 30333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:22.557812', 'step': 30333, 'epoch': 3}
{'type': 'loss', 'content': 0.007633761968463659, 'timestamp': '2025-10-02 01:05:22.568295', 'step': 30334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:22.624966', 'step': 30334, 'epoch': 3}
{'type': 'loss', 'content': 0.01279045082628727, 'timestamp': '2025-10-02 01:05:22.628298', 'step': 30335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:22.684590', 'step': 30335, 'epoch': 3}
{'type': 'loss', 'content': 0.03610522672533989, 'timestamp': '2025-10-02 01:05:22.691186', 'step': 30336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:22.747609', 'step': 30336, 'epoch': 3}
{'type': 'loss', 'content': 0.07256869971752167, 'timestamp': '2025-10-02 01:05:22.750687', 'step': 30337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:22.807683', 'step': 30337, 'epoch': 3}
{'type': 'loss', 'content': 0.043381623923778534, 'timestamp': '2025-10-02 01:05:22.810559', 'step': 30338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:22.866850', 'step': 30338, 'epoch': 3}
{'type': 'loss', 'content': 0.006656531244516373, 'timestamp': '2025-10-02 01:05:22.870172', 'step': 30339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:22.925886', 'step': 30339, 'epoch': 3}
{'type': 'loss', 'content': 0.02273399755358696, 'timestamp': '2025-10-02 01:05:22.932806', 'step': 30340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:22.989664', 'step': 30340, 'epoch': 3}
{'type': 'loss', 'content': 0.010986574925482273, 'timestamp': '2025-10-02 01:05:22.995289', 'step': 30341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:23.053177', 'step': 30341, 'epoch': 3}
{'type': 'loss', 'content': 0.02702213078737259, 'timestamp': '2025-10-02 01:05:23.057391', 'step': 30342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:23.114400', 'step': 30342, 'epoch': 3}
{'type': 'loss', 'content': 0.08681561797857285, 'timestamp': '2025-10-02 01:05:23.117907', 'step': 30343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:23.185814', 'step': 30343, 'epoch': 3}
{'type': 'loss', 'content': 0.0378817580640316, 'timestamp': '2025-10-02 01:05:23.196182', 'step': 30344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:23.252290', 'step': 30344, 'epoch': 3}
{'type': 'loss', 'content': 0.038191139698028564, 'timestamp': '2025-10-02 01:05:23.262502', 'step': 30345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 01:05:23.353011', 'step': 30345, 'epoch': 3}
{'type': 'loss', 'content': 0.00418747728690505, 'timestamp': '2025-10-02 01:05:23.369439', 'step': 30346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:05:23.446075', 'step': 30346, 'epoch': 3}
{'type': 'loss', 'content': 0.013369505293667316, 'timestamp': '2025-10-02 01:05:23.459320', 'step': 30347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:23.524569', 'step': 30347, 'epoch': 3}
{'type': 'loss', 'content': 0.03896322101354599, 'timestamp': '2025-10-02 01:05:23.535754', 'step': 30348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:23.591472', 'step': 30348, 'epoch': 3}
{'type': 'loss', 'content': 0.009961603209376335, 'timestamp': '2025-10-02 01:05:23.594356', 'step': 30349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:23.650304', 'step': 30349, 'epoch': 3}
{'type': 'loss', 'content': 0.010217989794909954, 'timestamp': '2025-10-02 01:05:23.653478', 'step': 30350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:23.709702', 'step': 30350, 'epoch': 3}
{'type': 'loss', 'content': 0.03371458500623703, 'timestamp': '2025-10-02 01:05:23.713329', 'step': 30351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:23.771246', 'step': 30351, 'epoch': 3}
{'type': 'loss', 'content': 0.00961106177419424, 'timestamp': '2025-10-02 01:05:23.778166', 'step': 30352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:23.832827', 'step': 30352, 'epoch': 3}
{'type': 'loss', 'content': 0.0025565826799720526, 'timestamp': '2025-10-02 01:05:23.838690', 'step': 30353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:23.895139', 'step': 30353, 'epoch': 3}
{'type': 'loss', 'content': 0.020409096032381058, 'timestamp': '2025-10-02 01:05:23.898313', 'step': 30354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:23.955035', 'step': 30354, 'epoch': 3}
{'type': 'loss', 'content': 0.0002806142729241401, 'timestamp': '2025-10-02 01:05:23.957896', 'step': 30355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:24.012862', 'step': 30355, 'epoch': 3}
{'type': 'loss', 'content': 0.05807835981249809, 'timestamp': '2025-10-02 01:05:24.019023', 'step': 30356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:24.072626', 'step': 30356, 'epoch': 3}
{'type': 'loss', 'content': 0.06742581725120544, 'timestamp': '2025-10-02 01:05:24.075407', 'step': 30357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:24.139240', 'step': 30357, 'epoch': 3}
{'type': 'loss', 'content': 0.02363349124789238, 'timestamp': '2025-10-02 01:05:24.149833', 'step': 30358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:05:24.218633', 'step': 30358, 'epoch': 3}
{'type': 'loss', 'content': 0.02749640680849552, 'timestamp': '2025-10-02 01:05:24.230922', 'step': 30359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:05:24.285542', 'step': 30359, 'epoch': 3}
{'type': 'loss', 'content': 0.042733293026685715, 'timestamp': '2025-10-02 01:05:24.291594', 'step': 30360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:24.351240', 'step': 30360, 'epoch': 3}
{'type': 'loss', 'content': 0.029029028490185738, 'timestamp': '2025-10-02 01:05:24.362518', 'step': 30361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:24.417324', 'step': 30361, 'epoch': 3}
{'type': 'loss', 'content': 0.06973526626825333, 'timestamp': '2025-10-02 01:05:24.420425', 'step': 30362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:24.479302', 'step': 30362, 'epoch': 3}
{'type': 'loss', 'content': 0.01856417767703533, 'timestamp': '2025-10-02 01:05:24.489450', 'step': 30363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:24.544566', 'step': 30363, 'epoch': 3}
{'type': 'loss', 'content': 0.02277412638068199, 'timestamp': '2025-10-02 01:05:24.550873', 'step': 30364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:24.604631', 'step': 30364, 'epoch': 3}
{'type': 'loss', 'content': 0.08367772400379181, 'timestamp': '2025-10-02 01:05:24.607549', 'step': 30365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:24.662292', 'step': 30365, 'epoch': 3}
{'type': 'loss', 'content': 0.036632902920246124, 'timestamp': '2025-10-02 01:05:24.665122', 'step': 30366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:24.719486', 'step': 30366, 'epoch': 3}
{'type': 'loss', 'content': 0.038647692650556564, 'timestamp': '2025-10-02 01:05:24.722848', 'step': 30367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:24.776863', 'step': 30367, 'epoch': 3}
{'type': 'loss', 'content': 0.020001960918307304, 'timestamp': '2025-10-02 01:05:24.783231', 'step': 30368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:24.836860', 'step': 30368, 'epoch': 3}
{'type': 'loss', 'content': 0.024202516302466393, 'timestamp': '2025-10-02 01:05:24.839425', 'step': 30369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:24.893626', 'step': 30369, 'epoch': 3}
{'type': 'loss', 'content': 0.04501763731241226, 'timestamp': '2025-10-02 01:05:24.896375', 'step': 30370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:24.951528', 'step': 30370, 'epoch': 3}
{'type': 'loss', 'content': 0.007896502502262592, 'timestamp': '2025-10-02 01:05:24.954051', 'step': 30371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:25.009896', 'step': 30371, 'epoch': 3}
{'type': 'loss', 'content': 0.023746144026517868, 'timestamp': '2025-10-02 01:05:25.016087', 'step': 30372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:25.071174', 'step': 30372, 'epoch': 3}
{'type': 'loss', 'content': 0.07355896383523941, 'timestamp': '2025-10-02 01:05:25.073600', 'step': 30373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:25.133360', 'step': 30373, 'epoch': 3}
{'type': 'loss', 'content': 0.033825360238552094, 'timestamp': '2025-10-02 01:05:25.143524', 'step': 30374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:25.200120', 'step': 30374, 'epoch': 3}
{'type': 'loss', 'content': 0.015148011036217213, 'timestamp': '2025-10-02 01:05:25.209665', 'step': 30375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:25.265284', 'step': 30375, 'epoch': 3}
{'type': 'loss', 'content': 0.022439375519752502, 'timestamp': '2025-10-02 01:05:25.271515', 'step': 30376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:25.325059', 'step': 30376, 'epoch': 3}
{'type': 'loss', 'content': 0.08507771044969559, 'timestamp': '2025-10-02 01:05:25.327522', 'step': 30377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:25.381978', 'step': 30377, 'epoch': 3}
{'type': 'loss', 'content': 0.02260635606944561, 'timestamp': '2025-10-02 01:05:25.387706', 'step': 30378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:25.443206', 'step': 30378, 'epoch': 3}
{'type': 'loss', 'content': 0.011466368101537228, 'timestamp': '2025-10-02 01:05:25.448792', 'step': 30379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:25.503876', 'step': 30379, 'epoch': 3}
{'type': 'loss', 'content': 0.02338365465402603, 'timestamp': '2025-10-02 01:05:25.510212', 'step': 30380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:25.564635', 'step': 30380, 'epoch': 3}
{'type': 'loss', 'content': 0.023287439718842506, 'timestamp': '2025-10-02 01:05:25.567041', 'step': 30381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:25.621791', 'step': 30381, 'epoch': 3}
{'type': 'loss', 'content': 0.015790291130542755, 'timestamp': '2025-10-02 01:05:25.624231', 'step': 30382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:25.679707', 'step': 30382, 'epoch': 3}
{'type': 'loss', 'content': 0.07669678330421448, 'timestamp': '2025-10-02 01:05:25.682110', 'step': 30383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:25.737671', 'step': 30383, 'epoch': 3}
{'type': 'loss', 'content': 0.11546593904495239, 'timestamp': '2025-10-02 01:05:25.743868', 'step': 30384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:25.797657', 'step': 30384, 'epoch': 3}
{'type': 'loss', 'content': 0.03139711543917656, 'timestamp': '2025-10-02 01:05:25.800255', 'step': 30385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:25.854135', 'step': 30385, 'epoch': 3}
{'type': 'loss', 'content': 0.09829193353652954, 'timestamp': '2025-10-02 01:05:25.856855', 'step': 30386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:25.911924', 'step': 30386, 'epoch': 3}
{'type': 'loss', 'content': 0.023838048800826073, 'timestamp': '2025-10-02 01:05:25.914338', 'step': 30387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:25.968252', 'step': 30387, 'epoch': 3}
{'type': 'loss', 'content': 0.010446848347783089, 'timestamp': '2025-10-02 01:05:25.974340', 'step': 30388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:26.027983', 'step': 30388, 'epoch': 3}
{'type': 'loss', 'content': 0.033610131591558456, 'timestamp': '2025-10-02 01:05:26.030401', 'step': 30389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:26.084183', 'step': 30389, 'epoch': 3}
{'type': 'loss', 'content': 0.06386439502239227, 'timestamp': '2025-10-02 01:05:26.086989', 'step': 30390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:26.142485', 'step': 30390, 'epoch': 3}
{'type': 'loss', 'content': 0.03127092495560646, 'timestamp': '2025-10-02 01:05:26.149719', 'step': 30391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:26.204380', 'step': 30391, 'epoch': 3}
{'type': 'loss', 'content': 0.07291554659605026, 'timestamp': '2025-10-02 01:05:26.210338', 'step': 30392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:26.264196', 'step': 30392, 'epoch': 3}
{'type': 'loss', 'content': 0.05422838404774666, 'timestamp': '2025-10-02 01:05:26.273551', 'step': 30393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:26.328626', 'step': 30393, 'epoch': 3}
{'type': 'loss', 'content': 0.027828915044665337, 'timestamp': '2025-10-02 01:05:26.334290', 'step': 30394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:26.388492', 'step': 30394, 'epoch': 3}
{'type': 'loss', 'content': 0.06738892942667007, 'timestamp': '2025-10-02 01:05:26.390888', 'step': 30395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:26.445512', 'step': 30395, 'epoch': 3}
{'type': 'loss', 'content': 0.09208899736404419, 'timestamp': '2025-10-02 01:05:26.451857', 'step': 30396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:26.505390', 'step': 30396, 'epoch': 3}
{'type': 'loss', 'content': 0.021578632295131683, 'timestamp': '2025-10-02 01:05:26.508413', 'step': 30397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:26.565175', 'step': 30397, 'epoch': 3}
{'type': 'loss', 'content': 0.002256244421005249, 'timestamp': '2025-10-02 01:05:26.567568', 'step': 30398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:26.623703', 'step': 30398, 'epoch': 3}
{'type': 'loss', 'content': 0.028491538017988205, 'timestamp': '2025-10-02 01:05:26.626528', 'step': 30399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:26.681797', 'step': 30399, 'epoch': 3}
{'type': 'loss', 'content': 0.055337224155664444, 'timestamp': '2025-10-02 01:05:26.687934', 'step': 30400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:26.742643', 'step': 30400, 'epoch': 3}
{'type': 'loss', 'content': 0.0033741204533725977, 'timestamp': '2025-10-02 01:05:26.752876', 'step': 30401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:26.808770', 'step': 30401, 'epoch': 3}
{'type': 'loss', 'content': 0.014273139648139477, 'timestamp': '2025-10-02 01:05:26.814315', 'step': 30402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:26.871642', 'step': 30402, 'epoch': 3}
{'type': 'loss', 'content': 0.024361489340662956, 'timestamp': '2025-10-02 01:05:26.881134', 'step': 30403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:26.942297', 'step': 30403, 'epoch': 3}
{'type': 'loss', 'content': 0.025155123323202133, 'timestamp': '2025-10-02 01:05:26.953445', 'step': 30404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:27.006983', 'step': 30404, 'epoch': 3}
{'type': 'loss', 'content': 0.0106973797082901, 'timestamp': '2025-10-02 01:05:27.014384', 'step': 30405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:27.069624', 'step': 30405, 'epoch': 3}
{'type': 'loss', 'content': 0.022046171128749847, 'timestamp': '2025-10-02 01:05:27.075210', 'step': 30406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:27.137461', 'step': 30406, 'epoch': 3}
{'type': 'loss', 'content': 0.0014422121457755566, 'timestamp': '2025-10-02 01:05:27.148123', 'step': 30407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:27.202917', 'step': 30407, 'epoch': 3}
{'type': 'loss', 'content': 0.0010205537546426058, 'timestamp': '2025-10-02 01:05:27.208958', 'step': 30408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:27.263054', 'step': 30408, 'epoch': 3}
{'type': 'loss', 'content': 0.00807406846433878, 'timestamp': '2025-10-02 01:05:27.269484', 'step': 30409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:27.331898', 'step': 30409, 'epoch': 3}
{'type': 'loss', 'content': 0.013825074769556522, 'timestamp': '2025-10-02 01:05:27.339315', 'step': 30410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:27.393656', 'step': 30410, 'epoch': 3}
{'type': 'loss', 'content': 0.01929451897740364, 'timestamp': '2025-10-02 01:05:27.400930', 'step': 30411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:27.456028', 'step': 30411, 'epoch': 3}
{'type': 'loss', 'content': 0.048981036990880966, 'timestamp': '2025-10-02 01:05:27.462924', 'step': 30412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:27.516856', 'step': 30412, 'epoch': 3}
{'type': 'loss', 'content': 0.018121322616934776, 'timestamp': '2025-10-02 01:05:27.519779', 'step': 30413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:27.574317', 'step': 30413, 'epoch': 3}
{'type': 'loss', 'content': 0.06042315065860748, 'timestamp': '2025-10-02 01:05:27.576793', 'step': 30414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:27.631360', 'step': 30414, 'epoch': 3}
{'type': 'loss', 'content': 0.01831449568271637, 'timestamp': '2025-10-02 01:05:27.633909', 'step': 30415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:05:27.688397', 'step': 30415, 'epoch': 3}
{'type': 'loss', 'content': 0.06135539710521698, 'timestamp': '2025-10-02 01:05:27.694387', 'step': 30416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:27.748785', 'step': 30416, 'epoch': 3}
{'type': 'loss', 'content': 0.06551507115364075, 'timestamp': '2025-10-02 01:05:27.751787', 'step': 30417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:27.806677', 'step': 30417, 'epoch': 3}
{'type': 'loss', 'content': 0.007665218785405159, 'timestamp': '2025-10-02 01:05:27.809216', 'step': 30418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:27.872160', 'step': 30418, 'epoch': 3}
{'type': 'loss', 'content': 0.057528574019670486, 'timestamp': '2025-10-02 01:05:27.882801', 'step': 30419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:27.941685', 'step': 30419, 'epoch': 3}
{'type': 'loss', 'content': 0.0407068096101284, 'timestamp': '2025-10-02 01:05:27.949632', 'step': 30420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:28.003295', 'step': 30420, 'epoch': 3}
{'type': 'loss', 'content': 0.0573960542678833, 'timestamp': '2025-10-02 01:05:28.005931', 'step': 30421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:28.061146', 'step': 30421, 'epoch': 3}
{'type': 'loss', 'content': 0.019461698830127716, 'timestamp': '2025-10-02 01:05:28.066879', 'step': 30422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:28.126524', 'step': 30422, 'epoch': 3}
{'type': 'loss', 'content': 0.06226462870836258, 'timestamp': '2025-10-02 01:05:28.136694', 'step': 30423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:28.192499', 'step': 30423, 'epoch': 3}
{'type': 'loss', 'content': 0.053766652941703796, 'timestamp': '2025-10-02 01:05:28.198724', 'step': 30424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:28.253176', 'step': 30424, 'epoch': 3}
{'type': 'loss', 'content': 0.0004913565353490412, 'timestamp': '2025-10-02 01:05:28.262516', 'step': 30425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:28.318827', 'step': 30425, 'epoch': 3}
{'type': 'loss', 'content': 0.07970788329839706, 'timestamp': '2025-10-02 01:05:28.321346', 'step': 30426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:28.376550', 'step': 30426, 'epoch': 3}
{'type': 'loss', 'content': 0.017091158777475357, 'timestamp': '2025-10-02 01:05:28.383992', 'step': 30427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:28.438794', 'step': 30427, 'epoch': 3}
{'type': 'loss', 'content': 0.028555752709507942, 'timestamp': '2025-10-02 01:05:28.445243', 'step': 30428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:28.500760', 'step': 30428, 'epoch': 3}
{'type': 'loss', 'content': 0.021457919850945473, 'timestamp': '2025-10-02 01:05:28.503837', 'step': 30429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:28.558934', 'step': 30429, 'epoch': 3}
{'type': 'loss', 'content': 0.027310810983181, 'timestamp': '2025-10-02 01:05:28.561372', 'step': 30430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:28.625219', 'step': 30430, 'epoch': 3}
{'type': 'loss', 'content': 0.026970984414219856, 'timestamp': '2025-10-02 01:05:28.635399', 'step': 30431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:28.689791', 'step': 30431, 'epoch': 3}
{'type': 'loss', 'content': 0.06441233307123184, 'timestamp': '2025-10-02 01:05:28.695986', 'step': 30432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:28.751983', 'step': 30432, 'epoch': 3}
{'type': 'loss', 'content': 0.02325982041656971, 'timestamp': '2025-10-02 01:05:28.754522', 'step': 30433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:28.809299', 'step': 30433, 'epoch': 3}
{'type': 'loss', 'content': 0.023689033463597298, 'timestamp': '2025-10-02 01:05:28.818576', 'step': 30434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:28.880883', 'step': 30434, 'epoch': 3}
{'type': 'loss', 'content': 0.04587684944272041, 'timestamp': '2025-10-02 01:05:28.890382', 'step': 30435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:28.946082', 'step': 30435, 'epoch': 3}
{'type': 'loss', 'content': 0.018332716077566147, 'timestamp': '2025-10-02 01:05:28.956407', 'step': 30436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:29.012103', 'step': 30436, 'epoch': 3}
{'type': 'loss', 'content': 0.016685187816619873, 'timestamp': '2025-10-02 01:05:29.015488', 'step': 30437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:29.070297', 'step': 30437, 'epoch': 3}
{'type': 'loss', 'content': 0.07387559860944748, 'timestamp': '2025-10-02 01:05:29.077766', 'step': 30438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:29.140624', 'step': 30438, 'epoch': 3}
{'type': 'loss', 'content': 0.0023446097038686275, 'timestamp': '2025-10-02 01:05:29.151288', 'step': 30439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:29.207243', 'step': 30439, 'epoch': 3}
{'type': 'loss', 'content': 0.03406151011586189, 'timestamp': '2025-10-02 01:05:29.213655', 'step': 30440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:29.267843', 'step': 30440, 'epoch': 3}
{'type': 'loss', 'content': 0.04114428535103798, 'timestamp': '2025-10-02 01:05:29.270223', 'step': 30441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:05:29.340513', 'step': 30441, 'epoch': 3}
{'type': 'loss', 'content': 0.026517879217863083, 'timestamp': '2025-10-02 01:05:29.352965', 'step': 30442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:05:29.423758', 'step': 30442, 'epoch': 3}
{'type': 'loss', 'content': 0.03199295327067375, 'timestamp': '2025-10-02 01:05:29.436069', 'step': 30443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:29.490181', 'step': 30443, 'epoch': 3}
{'type': 'loss', 'content': 0.11553606390953064, 'timestamp': '2025-10-02 01:05:29.496210', 'step': 30444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:29.550879', 'step': 30444, 'epoch': 3}
{'type': 'loss', 'content': 0.03783164173364639, 'timestamp': '2025-10-02 01:05:29.553409', 'step': 30445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:29.607659', 'step': 30445, 'epoch': 3}
{'type': 'loss', 'content': 0.06639189273118973, 'timestamp': '2025-10-02 01:05:29.610233', 'step': 30446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:29.665090', 'step': 30446, 'epoch': 3}
{'type': 'loss', 'content': 0.08930225670337677, 'timestamp': '2025-10-02 01:05:29.667498', 'step': 30447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:29.722898', 'step': 30447, 'epoch': 3}
{'type': 'loss', 'content': 0.01424728985875845, 'timestamp': '2025-10-02 01:05:29.731025', 'step': 30448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:29.785083', 'step': 30448, 'epoch': 3}
{'type': 'loss', 'content': 0.08361970633268356, 'timestamp': '2025-10-02 01:05:29.787505', 'step': 30449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:29.841799', 'step': 30449, 'epoch': 3}
{'type': 'loss', 'content': 0.02308960072696209, 'timestamp': '2025-10-02 01:05:29.844214', 'step': 30450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:29.898762', 'step': 30450, 'epoch': 3}
{'type': 'loss', 'content': 0.0847623273730278, 'timestamp': '2025-10-02 01:05:29.901431', 'step': 30451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:29.956009', 'step': 30451, 'epoch': 3}
{'type': 'loss', 'content': 0.053336963057518005, 'timestamp': '2025-10-02 01:05:29.962085', 'step': 30452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:30.016755', 'step': 30452, 'epoch': 3}
{'type': 'loss', 'content': 0.06748350709676743, 'timestamp': '2025-10-02 01:05:30.019486', 'step': 30453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:30.078884', 'step': 30453, 'epoch': 3}
{'type': 'loss', 'content': 0.007588846143335104, 'timestamp': '2025-10-02 01:05:30.084600', 'step': 30454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:30.140390', 'step': 30454, 'epoch': 3}
{'type': 'loss', 'content': 0.00409004557877779, 'timestamp': '2025-10-02 01:05:30.146066', 'step': 30455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:30.201408', 'step': 30455, 'epoch': 3}
{'type': 'loss', 'content': 0.1256968230009079, 'timestamp': '2025-10-02 01:05:30.207953', 'step': 30456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:30.262770', 'step': 30456, 'epoch': 3}
{'type': 'loss', 'content': 0.04695916175842285, 'timestamp': '2025-10-02 01:05:30.265394', 'step': 30457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:30.322202', 'step': 30457, 'epoch': 3}
{'type': 'loss', 'content': 0.017362307757139206, 'timestamp': '2025-10-02 01:05:30.328016', 'step': 30458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:30.383628', 'step': 30458, 'epoch': 3}
{'type': 'loss', 'content': 0.013083364814519882, 'timestamp': '2025-10-02 01:05:30.386501', 'step': 30459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:30.442526', 'step': 30459, 'epoch': 3}
{'type': 'loss', 'content': 0.052378129214048386, 'timestamp': '2025-10-02 01:05:30.449091', 'step': 30460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:30.502624', 'step': 30460, 'epoch': 3}
{'type': 'loss', 'content': 0.06289050728082657, 'timestamp': '2025-10-02 01:05:30.505368', 'step': 30461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:30.559761', 'step': 30461, 'epoch': 3}
{'type': 'loss', 'content': 0.006987314671278, 'timestamp': '2025-10-02 01:05:30.565592', 'step': 30462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:30.627993', 'step': 30462, 'epoch': 3}
{'type': 'loss', 'content': 0.013079862110316753, 'timestamp': '2025-10-02 01:05:30.638600', 'step': 30463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:30.693751', 'step': 30463, 'epoch': 3}
{'type': 'loss', 'content': 0.01725812442600727, 'timestamp': '2025-10-02 01:05:30.699876', 'step': 30464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:30.753760', 'step': 30464, 'epoch': 3}
{'type': 'loss', 'content': 0.022086376324295998, 'timestamp': '2025-10-02 01:05:30.756294', 'step': 30465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:30.810878', 'step': 30465, 'epoch': 3}
{'type': 'loss', 'content': 0.015017095021903515, 'timestamp': '2025-10-02 01:05:30.813629', 'step': 30466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:30.868109', 'step': 30466, 'epoch': 3}
{'type': 'loss', 'content': 0.0379643514752388, 'timestamp': '2025-10-02 01:05:30.873734', 'step': 30467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:30.932474', 'step': 30467, 'epoch': 3}
{'type': 'loss', 'content': 0.054766517132520676, 'timestamp': '2025-10-02 01:05:30.939437', 'step': 30468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:30.996086', 'step': 30468, 'epoch': 3}
{'type': 'loss', 'content': 0.03542564809322357, 'timestamp': '2025-10-02 01:05:30.999138', 'step': 30469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:31.056160', 'step': 30469, 'epoch': 3}
{'type': 'loss', 'content': 0.05655021220445633, 'timestamp': '2025-10-02 01:05:31.059652', 'step': 30470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:31.115658', 'step': 30470, 'epoch': 3}
{'type': 'loss', 'content': 0.026959314942359924, 'timestamp': '2025-10-02 01:05:31.123036', 'step': 30471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:31.180448', 'step': 30471, 'epoch': 3}
{'type': 'loss', 'content': 0.04492409527301788, 'timestamp': '2025-10-02 01:05:31.186168', 'step': 30472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:31.242104', 'step': 30472, 'epoch': 3}
{'type': 'loss', 'content': 0.003982479218393564, 'timestamp': '2025-10-02 01:05:31.244771', 'step': 30473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:31.302050', 'step': 30473, 'epoch': 3}
{'type': 'loss', 'content': 0.07060243934392929, 'timestamp': '2025-10-02 01:05:31.305097', 'step': 30474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:31.361779', 'step': 30474, 'epoch': 3}
{'type': 'loss', 'content': 0.05204629898071289, 'timestamp': '2025-10-02 01:05:31.365086', 'step': 30475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:31.422124', 'step': 30475, 'epoch': 3}
{'type': 'loss', 'content': 0.03265600651502609, 'timestamp': '2025-10-02 01:05:31.432243', 'step': 30476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:31.489835', 'step': 30476, 'epoch': 3}
{'type': 'loss', 'content': 0.05558990687131882, 'timestamp': '2025-10-02 01:05:31.492636', 'step': 30477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:31.549403', 'step': 30477, 'epoch': 3}
{'type': 'loss', 'content': 0.041853953152894974, 'timestamp': '2025-10-02 01:05:31.552339', 'step': 30478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:31.613338', 'step': 30478, 'epoch': 3}
{'type': 'loss', 'content': 0.04243411123752594, 'timestamp': '2025-10-02 01:05:31.623474', 'step': 30479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:31.679605', 'step': 30479, 'epoch': 3}
{'type': 'loss', 'content': 0.04333953186869621, 'timestamp': '2025-10-02 01:05:31.686568', 'step': 30480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:31.743867', 'step': 30480, 'epoch': 3}
{'type': 'loss', 'content': 0.05361855775117874, 'timestamp': '2025-10-02 01:05:31.751080', 'step': 30481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:31.807699', 'step': 30481, 'epoch': 3}
{'type': 'loss', 'content': 0.008446821011602879, 'timestamp': '2025-10-02 01:05:31.810932', 'step': 30482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:31.876107', 'step': 30482, 'epoch': 3}
{'type': 'loss', 'content': 0.02126086689531803, 'timestamp': '2025-10-02 01:05:31.886532', 'step': 30483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:31.941410', 'step': 30483, 'epoch': 3}
{'type': 'loss', 'content': 0.013289041817188263, 'timestamp': '2025-10-02 01:05:31.947514', 'step': 30484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:32.002239', 'step': 30484, 'epoch': 3}
{'type': 'loss', 'content': 0.05245441198348999, 'timestamp': '2025-10-02 01:05:32.004974', 'step': 30485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:32.060442', 'step': 30485, 'epoch': 3}
{'type': 'loss', 'content': 0.03051295503973961, 'timestamp': '2025-10-02 01:05:32.067855', 'step': 30486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:32.123764', 'step': 30486, 'epoch': 3}
{'type': 'loss', 'content': 0.027473794296383858, 'timestamp': '2025-10-02 01:05:32.126207', 'step': 30487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:32.180178', 'step': 30487, 'epoch': 3}
{'type': 'loss', 'content': 0.05290409177541733, 'timestamp': '2025-10-02 01:05:32.186448', 'step': 30488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:32.240713', 'step': 30488, 'epoch': 3}
{'type': 'loss', 'content': 0.013445820659399033, 'timestamp': '2025-10-02 01:05:32.243320', 'step': 30489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:32.299068', 'step': 30489, 'epoch': 3}
{'type': 'loss', 'content': 0.004236994311213493, 'timestamp': '2025-10-02 01:05:32.301665', 'step': 30490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:32.357029', 'step': 30490, 'epoch': 3}
{'type': 'loss', 'content': 0.06893777847290039, 'timestamp': '2025-10-02 01:05:32.359607', 'step': 30491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:32.414602', 'step': 30491, 'epoch': 3}
{'type': 'loss', 'content': 0.005255711730569601, 'timestamp': '2025-10-02 01:05:32.420590', 'step': 30492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:32.474303', 'step': 30492, 'epoch': 3}
{'type': 'loss', 'content': 0.012801721692085266, 'timestamp': '2025-10-02 01:05:32.476728', 'step': 30493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:32.531596', 'step': 30493, 'epoch': 3}
{'type': 'loss', 'content': 0.08165749162435532, 'timestamp': '2025-10-02 01:05:32.534143', 'step': 30494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:05:32.609379', 'step': 30494, 'epoch': 3}
{'type': 'loss', 'content': 0.002936597913503647, 'timestamp': '2025-10-02 01:05:32.622597', 'step': 30495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:32.678134', 'step': 30495, 'epoch': 3}
{'type': 'loss', 'content': 0.08905431628227234, 'timestamp': '2025-10-02 01:05:32.684371', 'step': 30496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:32.738822', 'step': 30496, 'epoch': 3}
{'type': 'loss', 'content': 0.0588063970208168, 'timestamp': '2025-10-02 01:05:32.740976', 'step': 30497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:32.794954', 'step': 30497, 'epoch': 3}
{'type': 'loss', 'content': 0.03257555887103081, 'timestamp': '2025-10-02 01:05:32.802487', 'step': 30498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:32.857309', 'step': 30498, 'epoch': 3}
{'type': 'loss', 'content': 0.0617646798491478, 'timestamp': '2025-10-02 01:05:32.859889', 'step': 30499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:32.915799', 'step': 30499, 'epoch': 3}
{'type': 'loss', 'content': 0.1375819891691208, 'timestamp': '2025-10-02 01:05:32.926099', 'step': 30500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 30500', 'timestamp': '2025-10-02 01:05:33.346722', 'step': 30500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:05:33.412617', 'step': 30500, 'epoch': 3}
{'type': 'loss', 'content': 0.04887394607067108, 'timestamp': '2025-10-02 01:05:33.425943', 'step': 30501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:33.482399', 'step': 30501, 'epoch': 3}
{'type': 'loss', 'content': 0.037166155874729156, 'timestamp': '2025-10-02 01:05:33.484943', 'step': 30502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:33.540013', 'step': 30502, 'epoch': 3}
{'type': 'loss', 'content': 0.060892872512340546, 'timestamp': '2025-10-02 01:05:33.542777', 'step': 30503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:05:33.611958', 'step': 30503, 'epoch': 3}
{'type': 'loss', 'content': 0.011120988056063652, 'timestamp': '2025-10-02 01:05:33.624974', 'step': 30504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:33.679881', 'step': 30504, 'epoch': 3}
{'type': 'loss', 'content': 0.06607703864574432, 'timestamp': '2025-10-02 01:05:33.682397', 'step': 30505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:33.742680', 'step': 30505, 'epoch': 3}
{'type': 'loss', 'content': 0.00023493314802180976, 'timestamp': '2025-10-02 01:05:33.752814', 'step': 30506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:33.807938', 'step': 30506, 'epoch': 3}
{'type': 'loss', 'content': 0.019416971132159233, 'timestamp': '2025-10-02 01:05:33.810425', 'step': 30507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:33.864671', 'step': 30507, 'epoch': 3}
{'type': 'loss', 'content': 0.09126748144626617, 'timestamp': '2025-10-02 01:05:33.870831', 'step': 30508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:05:33.924916', 'step': 30508, 'epoch': 3}
{'type': 'loss', 'content': 0.12318915873765945, 'timestamp': '2025-10-02 01:05:33.927418', 'step': 30509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:33.982646', 'step': 30509, 'epoch': 3}
{'type': 'loss', 'content': 0.04273347929120064, 'timestamp': '2025-10-02 01:05:33.985249', 'step': 30510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:34.040001', 'step': 30510, 'epoch': 3}
{'type': 'loss', 'content': 0.06303257495164871, 'timestamp': '2025-10-02 01:05:34.042994', 'step': 30511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:34.098472', 'step': 30511, 'epoch': 3}
{'type': 'loss', 'content': 0.052701450884342194, 'timestamp': '2025-10-02 01:05:34.106488', 'step': 30512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:34.161813', 'step': 30512, 'epoch': 3}
{'type': 'loss', 'content': 0.028354723006486893, 'timestamp': '2025-10-02 01:05:34.164476', 'step': 30513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:34.219432', 'step': 30513, 'epoch': 3}
{'type': 'loss', 'content': 0.02635946124792099, 'timestamp': '2025-10-02 01:05:34.221881', 'step': 30514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:34.276211', 'step': 30514, 'epoch': 3}
{'type': 'loss', 'content': 0.047889843583106995, 'timestamp': '2025-10-02 01:05:34.278584', 'step': 30515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:34.340944', 'step': 30515, 'epoch': 3}
{'type': 'loss', 'content': 0.005284573882818222, 'timestamp': '2025-10-02 01:05:34.352205', 'step': 30516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:34.406689', 'step': 30516, 'epoch': 3}
{'type': 'loss', 'content': 0.003930169623345137, 'timestamp': '2025-10-02 01:05:34.415989', 'step': 30517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:34.471929', 'step': 30517, 'epoch': 3}
{'type': 'loss', 'content': 0.05759346857666969, 'timestamp': '2025-10-02 01:05:34.475043', 'step': 30518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:34.530795', 'step': 30518, 'epoch': 3}
{'type': 'loss', 'content': 0.05812899023294449, 'timestamp': '2025-10-02 01:05:34.533298', 'step': 30519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:34.587562', 'step': 30519, 'epoch': 3}
{'type': 'loss', 'content': 0.021581970155239105, 'timestamp': '2025-10-02 01:05:34.593917', 'step': 30520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:34.648082', 'step': 30520, 'epoch': 3}
{'type': 'loss', 'content': 0.03173510730266571, 'timestamp': '2025-10-02 01:05:34.651540', 'step': 30521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:34.705774', 'step': 30521, 'epoch': 3}
{'type': 'loss', 'content': 0.07217351347208023, 'timestamp': '2025-10-02 01:05:34.708396', 'step': 30522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:34.762954', 'step': 30522, 'epoch': 3}
{'type': 'loss', 'content': 0.05707329139113426, 'timestamp': '2025-10-02 01:05:34.765338', 'step': 30523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:34.824681', 'step': 30523, 'epoch': 3}
{'type': 'loss', 'content': 0.031507156789302826, 'timestamp': '2025-10-02 01:05:34.835636', 'step': 30524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:34.890632', 'step': 30524, 'epoch': 3}
{'type': 'loss', 'content': 0.06978431344032288, 'timestamp': '2025-10-02 01:05:34.893387', 'step': 30525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:34.947686', 'step': 30525, 'epoch': 3}
{'type': 'loss', 'content': 0.14199841022491455, 'timestamp': '2025-10-02 01:05:34.950255', 'step': 30526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:35.007206', 'step': 30526, 'epoch': 3}
{'type': 'loss', 'content': 0.021446645259857178, 'timestamp': '2025-10-02 01:05:35.016677', 'step': 30527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:35.071245', 'step': 30527, 'epoch': 3}
{'type': 'loss', 'content': 0.07239674776792526, 'timestamp': '2025-10-02 01:05:35.078158', 'step': 30528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:35.132521', 'step': 30528, 'epoch': 3}
{'type': 'loss', 'content': 0.04210076481103897, 'timestamp': '2025-10-02 01:05:35.142684', 'step': 30529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:35.197677', 'step': 30529, 'epoch': 3}
{'type': 'loss', 'content': 0.04985219985246658, 'timestamp': '2025-10-02 01:05:35.200005', 'step': 30530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:35.261828', 'step': 30530, 'epoch': 3}
{'type': 'loss', 'content': 0.008328113704919815, 'timestamp': '2025-10-02 01:05:35.272314', 'step': 30531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:35.339191', 'step': 30531, 'epoch': 3}
{'type': 'loss', 'content': 0.05780847370624542, 'timestamp': '2025-10-02 01:05:35.346202', 'step': 30532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:05:35.407620', 'step': 30532, 'epoch': 3}
{'type': 'loss', 'content': 0.004497756715863943, 'timestamp': '2025-10-02 01:05:35.419332', 'step': 30533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:35.474076', 'step': 30533, 'epoch': 3}
{'type': 'loss', 'content': 0.05981520563364029, 'timestamp': '2025-10-02 01:05:35.481346', 'step': 30534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:35.541196', 'step': 30534, 'epoch': 3}
{'type': 'loss', 'content': 0.017458241432905197, 'timestamp': '2025-10-02 01:05:35.551391', 'step': 30535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:35.613402', 'step': 30535, 'epoch': 3}
{'type': 'loss', 'content': 0.00978165678679943, 'timestamp': '2025-10-02 01:05:35.624614', 'step': 30536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:35.680047', 'step': 30536, 'epoch': 3}
{'type': 'loss', 'content': 0.03704186528921127, 'timestamp': '2025-10-02 01:05:35.682332', 'step': 30537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:35.737042', 'step': 30537, 'epoch': 3}
{'type': 'loss', 'content': 0.030382942408323288, 'timestamp': '2025-10-02 01:05:35.739540', 'step': 30538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:35.794725', 'step': 30538, 'epoch': 3}
{'type': 'loss', 'content': 0.1355186402797699, 'timestamp': '2025-10-02 01:05:35.797434', 'step': 30539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:35.851996', 'step': 30539, 'epoch': 3}
{'type': 'loss', 'content': 0.02563772350549698, 'timestamp': '2025-10-02 01:05:35.858088', 'step': 30540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:35.913310', 'step': 30540, 'epoch': 3}
{'type': 'loss', 'content': 0.06437130272388458, 'timestamp': '2025-10-02 01:05:35.915825', 'step': 30541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:35.970157', 'step': 30541, 'epoch': 3}
{'type': 'loss', 'content': 0.037271980196237564, 'timestamp': '2025-10-02 01:05:35.972738', 'step': 30542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:36.028201', 'step': 30542, 'epoch': 3}
{'type': 'loss', 'content': 0.0037346009630709887, 'timestamp': '2025-10-02 01:05:36.037544', 'step': 30543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:36.093027', 'step': 30543, 'epoch': 3}
{'type': 'loss', 'content': 0.1091434434056282, 'timestamp': '2025-10-02 01:05:36.100131', 'step': 30544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:36.157473', 'step': 30544, 'epoch': 3}
{'type': 'loss', 'content': 0.0054678041487932205, 'timestamp': '2025-10-02 01:05:36.164792', 'step': 30545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:36.225000', 'step': 30545, 'epoch': 3}
{'type': 'loss', 'content': 0.09386615455150604, 'timestamp': '2025-10-02 01:05:36.235142', 'step': 30546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:36.294405', 'step': 30546, 'epoch': 3}
{'type': 'loss', 'content': 0.0376981683075428, 'timestamp': '2025-10-02 01:05:36.304583', 'step': 30547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:36.360455', 'step': 30547, 'epoch': 3}
{'type': 'loss', 'content': 0.06939887255430222, 'timestamp': '2025-10-02 01:05:36.366957', 'step': 30548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:36.422729', 'step': 30548, 'epoch': 3}
{'type': 'loss', 'content': 0.03650197759270668, 'timestamp': '2025-10-02 01:05:36.432134', 'step': 30549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:36.486816', 'step': 30549, 'epoch': 3}
{'type': 'loss', 'content': 0.08487295359373093, 'timestamp': '2025-10-02 01:05:36.489355', 'step': 30550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:36.544808', 'step': 30550, 'epoch': 3}
{'type': 'loss', 'content': 0.03021242655813694, 'timestamp': '2025-10-02 01:05:36.547282', 'step': 30551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:36.603518', 'step': 30551, 'epoch': 3}
{'type': 'loss', 'content': 0.04142484441399574, 'timestamp': '2025-10-02 01:05:36.609724', 'step': 30552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:36.664113', 'step': 30552, 'epoch': 3}
{'type': 'loss', 'content': 0.038558389991521835, 'timestamp': '2025-10-02 01:05:36.674315', 'step': 30553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:36.729829', 'step': 30553, 'epoch': 3}
{'type': 'loss', 'content': 0.07510004937648773, 'timestamp': '2025-10-02 01:05:36.732296', 'step': 30554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:36.787474', 'step': 30554, 'epoch': 3}
{'type': 'loss', 'content': 0.09375111758708954, 'timestamp': '2025-10-02 01:05:36.789928', 'step': 30555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:36.845167', 'step': 30555, 'epoch': 3}
{'type': 'loss', 'content': 0.01186045166105032, 'timestamp': '2025-10-02 01:05:36.855179', 'step': 30556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:36.909084', 'step': 30556, 'epoch': 3}
{'type': 'loss', 'content': 0.09429574757814407, 'timestamp': '2025-10-02 01:05:36.914694', 'step': 30557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:36.969905', 'step': 30557, 'epoch': 3}
{'type': 'loss', 'content': 0.07593459635972977, 'timestamp': '2025-10-02 01:05:36.973769', 'step': 30558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:05:37.032917', 'step': 30558, 'epoch': 3}
{'type': 'loss', 'content': 0.03886588290333748, 'timestamp': '2025-10-02 01:05:37.043073', 'step': 30559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:37.097663', 'step': 30559, 'epoch': 3}
{'type': 'loss', 'content': 0.11010053008794785, 'timestamp': '2025-10-02 01:05:37.103626', 'step': 30560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:37.157509', 'step': 30560, 'epoch': 3}
{'type': 'loss', 'content': 0.030640721321105957, 'timestamp': '2025-10-02 01:05:37.167718', 'step': 30561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:37.224291', 'step': 30561, 'epoch': 3}
{'type': 'loss', 'content': 0.009969625622034073, 'timestamp': '2025-10-02 01:05:37.233785', 'step': 30562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:37.288727', 'step': 30562, 'epoch': 3}
{'type': 'loss', 'content': 0.07978525757789612, 'timestamp': '2025-10-02 01:05:37.291221', 'step': 30563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:37.345783', 'step': 30563, 'epoch': 3}
{'type': 'loss', 'content': 0.1307368129491806, 'timestamp': '2025-10-02 01:05:37.351844', 'step': 30564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:37.406112', 'step': 30564, 'epoch': 3}
{'type': 'loss', 'content': 0.06351090967655182, 'timestamp': '2025-10-02 01:05:37.413476', 'step': 30565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:05:37.481261', 'step': 30565, 'epoch': 3}
{'type': 'loss', 'content': 0.016608668491244316, 'timestamp': '2025-10-02 01:05:37.493218', 'step': 30566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:37.556330', 'step': 30566, 'epoch': 3}
{'type': 'loss', 'content': 0.01545305922627449, 'timestamp': '2025-10-02 01:05:37.566918', 'step': 30567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:05:37.630408', 'step': 30567, 'epoch': 3}
{'type': 'loss', 'content': 0.0075423880480229855, 'timestamp': '2025-10-02 01:05:37.642010', 'step': 30568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:37.696981', 'step': 30568, 'epoch': 3}
{'type': 'loss', 'content': 0.04324966296553612, 'timestamp': '2025-10-02 01:05:37.699456', 'step': 30569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:37.754686', 'step': 30569, 'epoch': 3}
{'type': 'loss', 'content': 0.035251185297966, 'timestamp': '2025-10-02 01:05:37.757377', 'step': 30570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 01:05:37.839226', 'step': 30570, 'epoch': 3}
{'type': 'loss', 'content': 0.023931201547384262, 'timestamp': '2025-10-02 01:05:37.854065', 'step': 30571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:37.912176', 'step': 30571, 'epoch': 3}
{'type': 'loss', 'content': 0.0108882375061512, 'timestamp': '2025-10-02 01:05:37.922432', 'step': 30572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:37.976676', 'step': 30572, 'epoch': 3}
{'type': 'loss', 'content': 0.01561706978827715, 'timestamp': '2025-10-02 01:05:37.983997', 'step': 30573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:05:38.038437', 'step': 30573, 'epoch': 3}
{'type': 'loss', 'content': 0.055574677884578705, 'timestamp': '2025-10-02 01:05:38.040690', 'step': 30574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:38.095361', 'step': 30574, 'epoch': 3}
{'type': 'loss', 'content': 0.028338801115751266, 'timestamp': '2025-10-02 01:05:38.100993', 'step': 30575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:38.155896', 'step': 30575, 'epoch': 3}
{'type': 'loss', 'content': 0.10520530492067337, 'timestamp': '2025-10-02 01:05:38.161810', 'step': 30576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:38.221962', 'step': 30576, 'epoch': 3}
{'type': 'loss', 'content': 0.04179574176669121, 'timestamp': '2025-10-02 01:05:38.233298', 'step': 30577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:38.288430', 'step': 30577, 'epoch': 3}
{'type': 'loss', 'content': 0.016523098573088646, 'timestamp': '2025-10-02 01:05:38.291209', 'step': 30578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:38.345771', 'step': 30578, 'epoch': 3}
{'type': 'loss', 'content': 0.09849296510219574, 'timestamp': '2025-10-02 01:05:38.348277', 'step': 30579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:38.403222', 'step': 30579, 'epoch': 3}
{'type': 'loss', 'content': 0.05622149258852005, 'timestamp': '2025-10-02 01:05:38.409466', 'step': 30580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:38.463503', 'step': 30580, 'epoch': 3}
{'type': 'loss', 'content': 0.07607310265302658, 'timestamp': '2025-10-02 01:05:38.470713', 'step': 30581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:38.525948', 'step': 30581, 'epoch': 3}
{'type': 'loss', 'content': 0.024126049131155014, 'timestamp': '2025-10-02 01:05:38.535012', 'step': 30582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:38.591164', 'step': 30582, 'epoch': 3}
{'type': 'loss', 'content': 0.06447333097457886, 'timestamp': '2025-10-02 01:05:38.593510', 'step': 30583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:38.658379', 'step': 30583, 'epoch': 3}
{'type': 'loss', 'content': 0.017223481088876724, 'timestamp': '2025-10-02 01:05:38.669798', 'step': 30584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:38.728503', 'step': 30584, 'epoch': 3}
{'type': 'loss', 'content': 0.03618774190545082, 'timestamp': '2025-10-02 01:05:38.731562', 'step': 30585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:38.788324', 'step': 30585, 'epoch': 3}
{'type': 'loss', 'content': 0.02667107991874218, 'timestamp': '2025-10-02 01:05:38.791325', 'step': 30586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:38.848447', 'step': 30586, 'epoch': 3}
{'type': 'loss', 'content': 0.036468710750341415, 'timestamp': '2025-10-02 01:05:38.854076', 'step': 30587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:38.918680', 'step': 30587, 'epoch': 3}
{'type': 'loss', 'content': 0.022073613479733467, 'timestamp': '2025-10-02 01:05:38.929929', 'step': 30588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:38.986147', 'step': 30588, 'epoch': 3}
{'type': 'loss', 'content': 0.10274389386177063, 'timestamp': '2025-10-02 01:05:38.989257', 'step': 30589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:39.048196', 'step': 30589, 'epoch': 3}
{'type': 'loss', 'content': 0.02707410603761673, 'timestamp': '2025-10-02 01:05:39.053890', 'step': 30590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:39.110371', 'step': 30590, 'epoch': 3}
{'type': 'loss', 'content': 0.02738516591489315, 'timestamp': '2025-10-02 01:05:39.113779', 'step': 30591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:39.170192', 'step': 30591, 'epoch': 3}
{'type': 'loss', 'content': 0.09967570006847382, 'timestamp': '2025-10-02 01:05:39.176817', 'step': 30592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:39.232762', 'step': 30592, 'epoch': 3}
{'type': 'loss', 'content': 0.01872948370873928, 'timestamp': '2025-10-02 01:05:39.236041', 'step': 30593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:39.292850', 'step': 30593, 'epoch': 3}
{'type': 'loss', 'content': 0.06092735379934311, 'timestamp': '2025-10-02 01:05:39.295613', 'step': 30594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:39.351731', 'step': 30594, 'epoch': 3}
{'type': 'loss', 'content': 0.08615285158157349, 'timestamp': '2025-10-02 01:05:39.354832', 'step': 30595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:39.411388', 'step': 30595, 'epoch': 3}
{'type': 'loss', 'content': 0.07535701990127563, 'timestamp': '2025-10-02 01:05:39.418477', 'step': 30596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:39.477213', 'step': 30596, 'epoch': 3}
{'type': 'loss', 'content': 0.04520799219608307, 'timestamp': '2025-10-02 01:05:39.484394', 'step': 30597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:39.541438', 'step': 30597, 'epoch': 3}
{'type': 'loss', 'content': 0.03452115133404732, 'timestamp': '2025-10-02 01:05:39.550945', 'step': 30598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:39.607721', 'step': 30598, 'epoch': 3}
{'type': 'loss', 'content': 0.032726891338825226, 'timestamp': '2025-10-02 01:05:39.611298', 'step': 30599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:39.668796', 'step': 30599, 'epoch': 3}
{'type': 'loss', 'content': 0.025151444599032402, 'timestamp': '2025-10-02 01:05:39.678925', 'step': 30600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:39.735269', 'step': 30600, 'epoch': 3}
{'type': 'loss', 'content': 0.025476744398474693, 'timestamp': '2025-10-02 01:05:39.740895', 'step': 30601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:05:39.797732', 'step': 30601, 'epoch': 3}
{'type': 'loss', 'content': 0.0013242702698335052, 'timestamp': '2025-10-02 01:05:39.805203', 'step': 30602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:05:39.860556', 'step': 30602, 'epoch': 3}
{'type': 'loss', 'content': 0.058942776173353195, 'timestamp': '2025-10-02 01:05:39.866786', 'step': 30603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:05:39.926845', 'step': 30603, 'epoch': 3}
{'type': 'loss', 'content': 0.01603846810758114, 'timestamp': '2025-10-02 01:05:39.936911', 'step': 30604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:05:39.993228', 'step': 30604, 'epoch': 3}
{'type': 'loss', 'content': 0.01923483982682228, 'timestamp': '2025-10-02 01:05:39.996283', 'step': 30605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:40.053898', 'step': 30605, 'epoch': 3}
{'type': 'loss', 'content': 0.013193880207836628, 'timestamp': '2025-10-02 01:05:40.056794', 'step': 30606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:05:40.115451', 'step': 30606, 'epoch': 3}
{'type': 'loss', 'content': 0.07490084320306778, 'timestamp': '2025-10-02 01:05:40.118805', 'step': 30607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:05:40.177705', 'step': 30607, 'epoch': 3}
{'type': 'loss', 'content': 0.05899534001946449, 'timestamp': '2025-10-02 01:05:40.183767', 'step': 30608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:40.238642', 'step': 30608, 'epoch': 3}
{'type': 'loss', 'content': 0.002514175372198224, 'timestamp': '2025-10-02 01:05:40.248892', 'step': 30609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:05:40.303392', 'step': 30609, 'epoch': 3}
{'type': 'loss', 'content': 0.13520216941833496, 'timestamp': '2025-10-02 01:05:40.305738', 'step': 30610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:40.368319', 'step': 30610, 'epoch': 3}
{'type': 'loss', 'content': 0.00995941087603569, 'timestamp': '2025-10-02 01:05:40.378918', 'step': 30611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:40.435197', 'step': 30611, 'epoch': 3}
{'type': 'loss', 'content': 0.0806736871600151, 'timestamp': '2025-10-02 01:05:40.441295', 'step': 30612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:40.495328', 'step': 30612, 'epoch': 3}
{'type': 'loss', 'content': 0.06272617727518082, 'timestamp': '2025-10-02 01:05:40.500926', 'step': 30613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:05:40.557684', 'step': 30613, 'epoch': 3}
{'type': 'loss', 'content': 0.013232806697487831, 'timestamp': '2025-10-02 01:05:40.567188', 'step': 30614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:40.622005', 'step': 30614, 'epoch': 3}
{'type': 'loss', 'content': 0.07915712893009186, 'timestamp': '2025-10-02 01:05:40.624830', 'step': 30615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:05:40.688122', 'step': 30615, 'epoch': 3}
{'type': 'loss', 'content': 0.037686705589294434, 'timestamp': '2025-10-02 01:05:40.699376', 'step': 30616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:40.754213', 'step': 30616, 'epoch': 3}
{'type': 'loss', 'content': 0.0006475222762674093, 'timestamp': '2025-10-02 01:05:40.756678', 'step': 30617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:40.811278', 'step': 30617, 'epoch': 3}
{'type': 'loss', 'content': 0.01464038621634245, 'timestamp': '2025-10-02 01:05:40.813898', 'step': 30618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:05:40.876516', 'step': 30618, 'epoch': 3}
{'type': 'loss', 'content': 0.012878520414233208, 'timestamp': '2025-10-02 01:05:40.889382', 'step': 30619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:05:40.955398', 'step': 30619, 'epoch': 3}
{'type': 'loss', 'content': 0.006870049983263016, 'timestamp': '2025-10-02 01:05:40.967636', 'step': 30620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:05:41.026486', 'step': 30620, 'epoch': 3}
{'type': 'loss', 'content': 0.07451166957616806, 'timestamp': '2025-10-02 01:05:41.033281', 'step': 30621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:05:41.090453', 'step': 30621, 'epoch': 3}
{'type': 'loss', 'content': 0.04236813634634018, 'timestamp': '2025-10-02 01:05:41.101977', 'step': 30622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:05:41.158735', 'step': 30622, 'epoch': 3}
{'type': 'loss', 'content': 0.029589086771011353, 'timestamp': '2025-10-02 01:05:41.161487', 'step': 30623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:05:41.229159', 'step': 30623, 'epoch': 3}
{'type': 'loss', 'content': 0.06904484331607819, 'timestamp': '2025-10-02 01:05:41.235840', 'step': 30624, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 01:06:08.554240', 'step': 30624, 'epoch': 3}
{'type': 'pplx', 'content': 93.96296877339563, 'timestamp': '2025-10-02 01:06:08.560298', 'step': 30624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:08.628583', 'step': 30624, 'epoch': 3}
{'type': 'loss', 'content': 0.0905957892537117, 'timestamp': '2025-10-02 01:06:08.637425', 'step': 30625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:08.707742', 'step': 30625, 'epoch': 3}
{'type': 'loss', 'content': 0.0006384723819792271, 'timestamp': '2025-10-02 01:06:08.717729', 'step': 30626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:08.782516', 'step': 30626, 'epoch': 3}
{'type': 'loss', 'content': 0.10402143001556396, 'timestamp': '2025-10-02 01:06:08.787569', 'step': 30627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:08.860659', 'step': 30627, 'epoch': 3}
{'type': 'loss', 'content': 0.07015503942966461, 'timestamp': '2025-10-02 01:06:08.867955', 'step': 30628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:08.929775', 'step': 30628, 'epoch': 3}
{'type': 'loss', 'content': 0.016971468925476074, 'timestamp': '2025-10-02 01:06:08.942549', 'step': 30629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:09.028742', 'step': 30629, 'epoch': 3}
{'type': 'loss', 'content': 0.01329765934497118, 'timestamp': '2025-10-02 01:06:09.036296', 'step': 30630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:09.103039', 'step': 30630, 'epoch': 3}
{'type': 'loss', 'content': 0.008185155689716339, 'timestamp': '2025-10-02 01:06:09.106418', 'step': 30631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:09.183935', 'step': 30631, 'epoch': 3}
{'type': 'loss', 'content': 0.030994009226560593, 'timestamp': '2025-10-02 01:06:09.198838', 'step': 30632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:09.283842', 'step': 30632, 'epoch': 3}
{'type': 'loss', 'content': 0.04216168820858002, 'timestamp': '2025-10-02 01:06:09.293006', 'step': 30633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:09.383506', 'step': 30633, 'epoch': 3}
{'type': 'loss', 'content': 0.020539620891213417, 'timestamp': '2025-10-02 01:06:09.392811', 'step': 30634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:09.485591', 'step': 30634, 'epoch': 3}
{'type': 'loss', 'content': 0.11291293799877167, 'timestamp': '2025-10-02 01:06:09.503000', 'step': 30635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:09.565700', 'step': 30635, 'epoch': 3}
{'type': 'loss', 'content': 0.1354888528585434, 'timestamp': '2025-10-02 01:06:09.576289', 'step': 30636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:09.648937', 'step': 30636, 'epoch': 3}
{'type': 'loss', 'content': 0.032547108829021454, 'timestamp': '2025-10-02 01:06:09.657478', 'step': 30637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:06:09.738706', 'step': 30637, 'epoch': 3}
{'type': 'loss', 'content': 0.014829229563474655, 'timestamp': '2025-10-02 01:06:09.749502', 'step': 30638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:09.817092', 'step': 30638, 'epoch': 3}
{'type': 'loss', 'content': 0.05088857561349869, 'timestamp': '2025-10-02 01:06:09.824140', 'step': 30639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:06:09.899638', 'step': 30639, 'epoch': 3}
{'type': 'loss', 'content': 0.022569211199879646, 'timestamp': '2025-10-02 01:06:09.912647', 'step': 30640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:09.981123', 'step': 30640, 'epoch': 3}
{'type': 'loss', 'content': 0.026632577180862427, 'timestamp': '2025-10-02 01:06:09.990481', 'step': 30641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:10.065532', 'step': 30641, 'epoch': 3}
{'type': 'loss', 'content': 0.04630265384912491, 'timestamp': '2025-10-02 01:06:10.068508', 'step': 30642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:10.141880', 'step': 30642, 'epoch': 3}
{'type': 'loss', 'content': 0.03956213966012001, 'timestamp': '2025-10-02 01:06:10.150619', 'step': 30643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:10.215684', 'step': 30643, 'epoch': 3}
{'type': 'loss', 'content': 0.010553962551057339, 'timestamp': '2025-10-02 01:06:10.225909', 'step': 30644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:10.290981', 'step': 30644, 'epoch': 3}
{'type': 'loss', 'content': 0.05129186064004898, 'timestamp': '2025-10-02 01:06:10.293830', 'step': 30645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:10.356604', 'step': 30645, 'epoch': 3}
{'type': 'loss', 'content': 0.039170268923044205, 'timestamp': '2025-10-02 01:06:10.361942', 'step': 30646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:10.421424', 'step': 30646, 'epoch': 3}
{'type': 'loss', 'content': 0.051004558801651, 'timestamp': '2025-10-02 01:06:10.430746', 'step': 30647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:10.501800', 'step': 30647, 'epoch': 3}
{'type': 'loss', 'content': 0.0578186959028244, 'timestamp': '2025-10-02 01:06:10.513351', 'step': 30648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:10.579230', 'step': 30648, 'epoch': 3}
{'type': 'loss', 'content': 0.025012295693159103, 'timestamp': '2025-10-02 01:06:10.582035', 'step': 30649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:10.649414', 'step': 30649, 'epoch': 3}
{'type': 'loss', 'content': 0.02807779051363468, 'timestamp': '2025-10-02 01:06:10.652768', 'step': 30650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:10.710637', 'step': 30650, 'epoch': 3}
{'type': 'loss', 'content': 0.012661821208894253, 'timestamp': '2025-10-02 01:06:10.719727', 'step': 30651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:10.791884', 'step': 30651, 'epoch': 3}
{'type': 'loss', 'content': 0.042264603078365326, 'timestamp': '2025-10-02 01:06:10.802902', 'step': 30652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:10.867400', 'step': 30652, 'epoch': 3}
{'type': 'loss', 'content': 0.03669464588165283, 'timestamp': '2025-10-02 01:06:10.875444', 'step': 30653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:10.945353', 'step': 30653, 'epoch': 3}
{'type': 'loss', 'content': 0.05242839455604553, 'timestamp': '2025-10-02 01:06:10.953845', 'step': 30654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:11.019048', 'step': 30654, 'epoch': 3}
{'type': 'loss', 'content': 0.16344380378723145, 'timestamp': '2025-10-02 01:06:11.022598', 'step': 30655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:11.083694', 'step': 30655, 'epoch': 3}
{'type': 'loss', 'content': 0.06488911807537079, 'timestamp': '2025-10-02 01:06:11.093327', 'step': 30656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:11.168482', 'step': 30656, 'epoch': 3}
{'type': 'loss', 'content': 0.026540976017713547, 'timestamp': '2025-10-02 01:06:11.178551', 'step': 30657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:11.254751', 'step': 30657, 'epoch': 3}
{'type': 'loss', 'content': 0.05687253177165985, 'timestamp': '2025-10-02 01:06:11.258434', 'step': 30658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:11.319438', 'step': 30658, 'epoch': 3}
{'type': 'loss', 'content': 0.029200099408626556, 'timestamp': '2025-10-02 01:06:11.322876', 'step': 30659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:11.386553', 'step': 30659, 'epoch': 3}
{'type': 'loss', 'content': 0.004553025122731924, 'timestamp': '2025-10-02 01:06:11.397255', 'step': 30660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:11.475048', 'step': 30660, 'epoch': 3}
{'type': 'loss', 'content': 0.01048191450536251, 'timestamp': '2025-10-02 01:06:11.477770', 'step': 30661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:11.538373', 'step': 30661, 'epoch': 3}
{'type': 'loss', 'content': 0.031987279653549194, 'timestamp': '2025-10-02 01:06:11.546098', 'step': 30662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:11.611773', 'step': 30662, 'epoch': 3}
{'type': 'loss', 'content': 0.09892801940441132, 'timestamp': '2025-10-02 01:06:11.615117', 'step': 30663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:11.691960', 'step': 30663, 'epoch': 3}
{'type': 'loss', 'content': 0.027645207941532135, 'timestamp': '2025-10-02 01:06:11.698014', 'step': 30664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:11.753356', 'step': 30664, 'epoch': 3}
{'type': 'loss', 'content': 0.06267007440328598, 'timestamp': '2025-10-02 01:06:11.756729', 'step': 30665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:11.814015', 'step': 30665, 'epoch': 3}
{'type': 'loss', 'content': 0.05125808343291283, 'timestamp': '2025-10-02 01:06:11.823556', 'step': 30666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:11.880880', 'step': 30666, 'epoch': 3}
{'type': 'loss', 'content': 0.034994423389434814, 'timestamp': '2025-10-02 01:06:11.888365', 'step': 30667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:11.950849', 'step': 30667, 'epoch': 3}
{'type': 'loss', 'content': 0.017917964607477188, 'timestamp': '2025-10-02 01:06:11.959264', 'step': 30668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:06:12.036607', 'step': 30668, 'epoch': 3}
{'type': 'loss', 'content': 0.06356721371412277, 'timestamp': '2025-10-02 01:06:12.050381', 'step': 30669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:06:12.125544', 'step': 30669, 'epoch': 3}
{'type': 'loss', 'content': 0.010087321512401104, 'timestamp': '2025-10-02 01:06:12.136217', 'step': 30670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:12.197038', 'step': 30670, 'epoch': 3}
{'type': 'loss', 'content': 0.110069639980793, 'timestamp': '2025-10-02 01:06:12.206352', 'step': 30671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:12.269348', 'step': 30671, 'epoch': 3}
{'type': 'loss', 'content': 0.07738346606492996, 'timestamp': '2025-10-02 01:06:12.282479', 'step': 30672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:12.357253', 'step': 30672, 'epoch': 3}
{'type': 'loss', 'content': 0.006996533367782831, 'timestamp': '2025-10-02 01:06:12.367552', 'step': 30673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:12.430066', 'step': 30673, 'epoch': 3}
{'type': 'loss', 'content': 0.04233025759458542, 'timestamp': '2025-10-02 01:06:12.440254', 'step': 30674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:12.501873', 'step': 30674, 'epoch': 3}
{'type': 'loss', 'content': 0.06367813795804977, 'timestamp': '2025-10-02 01:06:12.504750', 'step': 30675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:12.559072', 'step': 30675, 'epoch': 3}
{'type': 'loss', 'content': 0.11176571995019913, 'timestamp': '2025-10-02 01:06:12.565530', 'step': 30676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:12.620481', 'step': 30676, 'epoch': 3}
{'type': 'loss', 'content': 0.016913624480366707, 'timestamp': '2025-10-02 01:06:12.622605', 'step': 30677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:12.678764', 'step': 30677, 'epoch': 3}
{'type': 'loss', 'content': 0.025785597041249275, 'timestamp': '2025-10-02 01:06:12.686460', 'step': 30678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:12.741110', 'step': 30678, 'epoch': 3}
{'type': 'loss', 'content': 0.09376975893974304, 'timestamp': '2025-10-02 01:06:12.743569', 'step': 30679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:12.798194', 'step': 30679, 'epoch': 3}
{'type': 'loss', 'content': 0.016017017886042595, 'timestamp': '2025-10-02 01:06:12.804446', 'step': 30680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:12.859119', 'step': 30680, 'epoch': 3}
{'type': 'loss', 'content': 0.03963834047317505, 'timestamp': '2025-10-02 01:06:12.861383', 'step': 30681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:06:12.932746', 'step': 30681, 'epoch': 3}
{'type': 'loss', 'content': 0.020024891942739487, 'timestamp': '2025-10-02 01:06:12.945202', 'step': 30682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:13.000896', 'step': 30682, 'epoch': 3}
{'type': 'loss', 'content': 0.02741401270031929, 'timestamp': '2025-10-02 01:06:13.008317', 'step': 30683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:13.065508', 'step': 30683, 'epoch': 3}
{'type': 'loss', 'content': 0.016080420464277267, 'timestamp': '2025-10-02 01:06:13.071662', 'step': 30684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:13.125358', 'step': 30684, 'epoch': 3}
{'type': 'loss', 'content': 0.03869355842471123, 'timestamp': '2025-10-02 01:06:13.127731', 'step': 30685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:13.184799', 'step': 30685, 'epoch': 3}
{'type': 'loss', 'content': 0.034084245562553406, 'timestamp': '2025-10-02 01:06:13.193998', 'step': 30686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:13.250485', 'step': 30686, 'epoch': 3}
{'type': 'loss', 'content': 0.024163151159882545, 'timestamp': '2025-10-02 01:06:13.253181', 'step': 30687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:13.308310', 'step': 30687, 'epoch': 3}
{'type': 'loss', 'content': 0.0737527459859848, 'timestamp': '2025-10-02 01:06:13.316310', 'step': 30688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:13.376636', 'step': 30688, 'epoch': 3}
{'type': 'loss', 'content': 0.03800530359148979, 'timestamp': '2025-10-02 01:06:13.379290', 'step': 30689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:13.433552', 'step': 30689, 'epoch': 3}
{'type': 'loss', 'content': 0.13112540543079376, 'timestamp': '2025-10-02 01:06:13.435974', 'step': 30690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:13.492779', 'step': 30690, 'epoch': 3}
{'type': 'loss', 'content': 0.023502757772803307, 'timestamp': '2025-10-02 01:06:13.495388', 'step': 30691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:13.550507', 'step': 30691, 'epoch': 3}
{'type': 'loss', 'content': 0.05174478143453598, 'timestamp': '2025-10-02 01:06:13.557731', 'step': 30692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:13.612409', 'step': 30692, 'epoch': 3}
{'type': 'loss', 'content': 0.05341798812150955, 'timestamp': '2025-10-02 01:06:13.614825', 'step': 30693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:13.669903', 'step': 30693, 'epoch': 3}
{'type': 'loss', 'content': 0.017082620412111282, 'timestamp': '2025-10-02 01:06:13.672087', 'step': 30694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:13.727193', 'step': 30694, 'epoch': 3}
{'type': 'loss', 'content': 0.04054481163620949, 'timestamp': '2025-10-02 01:06:13.729752', 'step': 30695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:13.784792', 'step': 30695, 'epoch': 3}
{'type': 'loss', 'content': 0.004005911760032177, 'timestamp': '2025-10-02 01:06:13.790677', 'step': 30696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:13.845500', 'step': 30696, 'epoch': 3}
{'type': 'loss', 'content': 0.05746179074048996, 'timestamp': '2025-10-02 01:06:13.847953', 'step': 30697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:13.902376', 'step': 30697, 'epoch': 3}
{'type': 'loss', 'content': 0.05190681293606758, 'timestamp': '2025-10-02 01:06:13.905358', 'step': 30698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:13.959848', 'step': 30698, 'epoch': 3}
{'type': 'loss', 'content': 0.048254791647195816, 'timestamp': '2025-10-02 01:06:13.962610', 'step': 30699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:06:14.030900', 'step': 30699, 'epoch': 3}
{'type': 'loss', 'content': 0.008426378481090069, 'timestamp': '2025-10-02 01:06:14.043633', 'step': 30700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:14.098141', 'step': 30700, 'epoch': 3}
{'type': 'loss', 'content': 0.0616898313164711, 'timestamp': '2025-10-02 01:06:14.100427', 'step': 30701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:06:14.170729', 'step': 30701, 'epoch': 3}
{'type': 'loss', 'content': 0.004454998765140772, 'timestamp': '2025-10-02 01:06:14.183065', 'step': 30702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:06:14.247903', 'step': 30702, 'epoch': 3}
{'type': 'loss', 'content': 0.029566805809736252, 'timestamp': '2025-10-02 01:06:14.258696', 'step': 30703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:14.317554', 'step': 30703, 'epoch': 3}
{'type': 'loss', 'content': 0.08835956454277039, 'timestamp': '2025-10-02 01:06:14.327878', 'step': 30704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:14.382087', 'step': 30704, 'epoch': 3}
{'type': 'loss', 'content': 0.0585891529917717, 'timestamp': '2025-10-02 01:06:14.389601', 'step': 30705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:14.443724', 'step': 30705, 'epoch': 3}
{'type': 'loss', 'content': 0.07963501662015915, 'timestamp': '2025-10-02 01:06:14.449539', 'step': 30706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:14.504617', 'step': 30706, 'epoch': 3}
{'type': 'loss', 'content': 0.03761140629649162, 'timestamp': '2025-10-02 01:06:14.507543', 'step': 30707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:14.562155', 'step': 30707, 'epoch': 3}
{'type': 'loss', 'content': 0.021082768216729164, 'timestamp': '2025-10-02 01:06:14.568290', 'step': 30708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:14.622490', 'step': 30708, 'epoch': 3}
{'type': 'loss', 'content': 0.021156737580895424, 'timestamp': '2025-10-02 01:06:14.626117', 'step': 30709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:14.682608', 'step': 30709, 'epoch': 3}
{'type': 'loss', 'content': 0.12735936045646667, 'timestamp': '2025-10-02 01:06:14.685505', 'step': 30710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:06:14.739828', 'step': 30710, 'epoch': 3}
{'type': 'loss', 'content': 0.0889807939529419, 'timestamp': '2025-10-02 01:06:14.742502', 'step': 30711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:14.797612', 'step': 30711, 'epoch': 3}
{'type': 'loss', 'content': 0.09517760574817657, 'timestamp': '2025-10-02 01:06:14.804523', 'step': 30712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:14.858840', 'step': 30712, 'epoch': 3}
{'type': 'loss', 'content': 0.08148118108510971, 'timestamp': '2025-10-02 01:06:14.861249', 'step': 30713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:14.915997', 'step': 30713, 'epoch': 3}
{'type': 'loss', 'content': 0.026397505775094032, 'timestamp': '2025-10-02 01:06:14.918645', 'step': 30714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:14.973509', 'step': 30714, 'epoch': 3}
{'type': 'loss', 'content': 0.01871456392109394, 'timestamp': '2025-10-02 01:06:14.975729', 'step': 30715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:15.030656', 'step': 30715, 'epoch': 3}
{'type': 'loss', 'content': 0.043832506984472275, 'timestamp': '2025-10-02 01:06:15.036694', 'step': 30716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:15.090841', 'step': 30716, 'epoch': 3}
{'type': 'loss', 'content': 0.02931031957268715, 'timestamp': '2025-10-02 01:06:15.096877', 'step': 30717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:15.157405', 'step': 30717, 'epoch': 3}
{'type': 'loss', 'content': 0.006597458850592375, 'timestamp': '2025-10-02 01:06:15.167582', 'step': 30718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:15.222500', 'step': 30718, 'epoch': 3}
{'type': 'loss', 'content': 0.003392719430848956, 'timestamp': '2025-10-02 01:06:15.225370', 'step': 30719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 640], 'flops': 12800077771264.0}, 'timestamp': '2025-10-02 01:06:15.320810', 'step': 30719, 'epoch': 3}
{'type': 'loss', 'content': 0.017619971185922623, 'timestamp': '2025-10-02 01:06:15.338766', 'step': 30720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:15.393335', 'step': 30720, 'epoch': 3}
{'type': 'loss', 'content': 0.005984077695757151, 'timestamp': '2025-10-02 01:06:15.395488', 'step': 30721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:15.450193', 'step': 30721, 'epoch': 3}
{'type': 'loss', 'content': 0.09261853247880936, 'timestamp': '2025-10-02 01:06:15.452640', 'step': 30722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:15.507423', 'step': 30722, 'epoch': 3}
{'type': 'loss', 'content': 0.022285258397459984, 'timestamp': '2025-10-02 01:06:15.510661', 'step': 30723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:06:15.582357', 'step': 30723, 'epoch': 3}
{'type': 'loss', 'content': 0.0009214375168085098, 'timestamp': '2025-10-02 01:06:15.593951', 'step': 30724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:15.653835', 'step': 30724, 'epoch': 3}
{'type': 'loss', 'content': 0.013417392037808895, 'timestamp': '2025-10-02 01:06:15.665146', 'step': 30725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:15.720072', 'step': 30725, 'epoch': 3}
{'type': 'loss', 'content': 0.09613360464572906, 'timestamp': '2025-10-02 01:06:15.723272', 'step': 30726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-10-02 01:06:15.800723', 'step': 30726, 'epoch': 3}
{'type': 'loss', 'content': 0.00515363086014986, 'timestamp': '2025-10-02 01:06:15.814466', 'step': 30727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:15.870727', 'step': 30727, 'epoch': 3}
{'type': 'loss', 'content': 0.07278501242399216, 'timestamp': '2025-10-02 01:06:15.876649', 'step': 30728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:15.939727', 'step': 30728, 'epoch': 3}
{'type': 'loss', 'content': 0.05912507697939873, 'timestamp': '2025-10-02 01:06:15.942614', 'step': 30729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:15.997876', 'step': 30729, 'epoch': 3}
{'type': 'loss', 'content': 0.021075814962387085, 'timestamp': '2025-10-02 01:06:16.001770', 'step': 30730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:16.059451', 'step': 30730, 'epoch': 3}
{'type': 'loss', 'content': 0.06617528945207596, 'timestamp': '2025-10-02 01:06:16.063002', 'step': 30731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:16.119627', 'step': 30731, 'epoch': 3}
{'type': 'loss', 'content': 0.012266228906810284, 'timestamp': '2025-10-02 01:06:16.126819', 'step': 30732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:16.185322', 'step': 30732, 'epoch': 3}
{'type': 'loss', 'content': 0.06321104615926743, 'timestamp': '2025-10-02 01:06:16.197126', 'step': 30733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:16.253190', 'step': 30733, 'epoch': 3}
{'type': 'loss', 'content': 0.06364891678094864, 'timestamp': '2025-10-02 01:06:16.259141', 'step': 30734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:16.315818', 'step': 30734, 'epoch': 3}
{'type': 'loss', 'content': 0.006340238731354475, 'timestamp': '2025-10-02 01:06:16.318301', 'step': 30735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:16.376143', 'step': 30735, 'epoch': 3}
{'type': 'loss', 'content': 0.001662906724959612, 'timestamp': '2025-10-02 01:06:16.384525', 'step': 30736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:16.441394', 'step': 30736, 'epoch': 3}
{'type': 'loss', 'content': 0.01983269676566124, 'timestamp': '2025-10-02 01:06:16.456199', 'step': 30737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:16.511262', 'step': 30737, 'epoch': 3}
{'type': 'loss', 'content': 0.03607796132564545, 'timestamp': '2025-10-02 01:06:16.514420', 'step': 30738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:16.580231', 'step': 30738, 'epoch': 3}
{'type': 'loss', 'content': 0.007287968881428242, 'timestamp': '2025-10-02 01:06:16.585583', 'step': 30739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:16.649951', 'step': 30739, 'epoch': 3}
{'type': 'loss', 'content': 0.01692960411310196, 'timestamp': '2025-10-02 01:06:16.661237', 'step': 30740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:16.718937', 'step': 30740, 'epoch': 3}
{'type': 'loss', 'content': 0.03660343587398529, 'timestamp': '2025-10-02 01:06:16.729223', 'step': 30741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:16.787162', 'step': 30741, 'epoch': 3}
{'type': 'loss', 'content': 0.05125724524259567, 'timestamp': '2025-10-02 01:06:16.791765', 'step': 30742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:16.854612', 'step': 30742, 'epoch': 3}
{'type': 'loss', 'content': 0.033979177474975586, 'timestamp': '2025-10-02 01:06:16.858381', 'step': 30743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:16.919521', 'step': 30743, 'epoch': 3}
{'type': 'loss', 'content': 0.010472425259649754, 'timestamp': '2025-10-02 01:06:16.926389', 'step': 30744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:16.984587', 'step': 30744, 'epoch': 3}
{'type': 'loss', 'content': 0.04309928044676781, 'timestamp': '2025-10-02 01:06:16.988745', 'step': 30745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:17.049456', 'step': 30745, 'epoch': 3}
{'type': 'loss', 'content': 0.039561815559864044, 'timestamp': '2025-10-02 01:06:17.064993', 'step': 30746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:17.122260', 'step': 30746, 'epoch': 3}
{'type': 'loss', 'content': 0.013401240110397339, 'timestamp': '2025-10-02 01:06:17.129904', 'step': 30747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:17.197255', 'step': 30747, 'epoch': 3}
{'type': 'loss', 'content': 0.020966501906514168, 'timestamp': '2025-10-02 01:06:17.203813', 'step': 30748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:17.272089', 'step': 30748, 'epoch': 3}
{'type': 'loss', 'content': 0.04642150551080704, 'timestamp': '2025-10-02 01:06:17.281861', 'step': 30749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:17.340944', 'step': 30749, 'epoch': 3}
{'type': 'loss', 'content': 0.06159588694572449, 'timestamp': '2025-10-02 01:06:17.348446', 'step': 30750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:17.406308', 'step': 30750, 'epoch': 3}
{'type': 'loss', 'content': 0.04551819711923599, 'timestamp': '2025-10-02 01:06:17.408676', 'step': 30751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:17.465608', 'step': 30751, 'epoch': 3}
{'type': 'loss', 'content': 0.10402435064315796, 'timestamp': '2025-10-02 01:06:17.472377', 'step': 30752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:17.528751', 'step': 30752, 'epoch': 3}
{'type': 'loss', 'content': 0.03374507650732994, 'timestamp': '2025-10-02 01:06:17.536273', 'step': 30753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:17.593911', 'step': 30753, 'epoch': 3}
{'type': 'loss', 'content': 0.043368782848119736, 'timestamp': '2025-10-02 01:06:17.596832', 'step': 30754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:17.652953', 'step': 30754, 'epoch': 3}
{'type': 'loss', 'content': 0.09246161580085754, 'timestamp': '2025-10-02 01:06:17.656024', 'step': 30755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:17.713133', 'step': 30755, 'epoch': 3}
{'type': 'loss', 'content': 0.06640855967998505, 'timestamp': '2025-10-02 01:06:17.721734', 'step': 30756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:17.778026', 'step': 30756, 'epoch': 3}
{'type': 'loss', 'content': 0.08291777968406677, 'timestamp': '2025-10-02 01:06:17.780694', 'step': 30757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:17.837082', 'step': 30757, 'epoch': 3}
{'type': 'loss', 'content': 0.052930328994989395, 'timestamp': '2025-10-02 01:06:17.840522', 'step': 30758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:17.900389', 'step': 30758, 'epoch': 3}
{'type': 'loss', 'content': 0.13352110981941223, 'timestamp': '2025-10-02 01:06:17.903821', 'step': 30759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:17.961507', 'step': 30759, 'epoch': 3}
{'type': 'loss', 'content': 0.012266837060451508, 'timestamp': '2025-10-02 01:06:17.971848', 'step': 30760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:18.026595', 'step': 30760, 'epoch': 3}
{'type': 'loss', 'content': 0.06187058240175247, 'timestamp': '2025-10-02 01:06:18.028999', 'step': 30761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:06:18.105026', 'step': 30761, 'epoch': 3}
{'type': 'loss', 'content': 0.0032211088109761477, 'timestamp': '2025-10-02 01:06:18.116989', 'step': 30762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:06:18.186512', 'step': 30762, 'epoch': 3}
{'type': 'loss', 'content': 0.00561372097581625, 'timestamp': '2025-10-02 01:06:18.198529', 'step': 30763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:18.256619', 'step': 30763, 'epoch': 3}
{'type': 'loss', 'content': 0.03741550073027611, 'timestamp': '2025-10-02 01:06:18.263333', 'step': 30764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:18.319210', 'step': 30764, 'epoch': 3}
{'type': 'loss', 'content': 0.005688842386007309, 'timestamp': '2025-10-02 01:06:18.329459', 'step': 30765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:18.386611', 'step': 30765, 'epoch': 3}
{'type': 'loss', 'content': 0.010609719902276993, 'timestamp': '2025-10-02 01:06:18.389667', 'step': 30766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:06:18.456643', 'step': 30766, 'epoch': 3}
{'type': 'loss', 'content': 0.010986996814608574, 'timestamp': '2025-10-02 01:06:18.467503', 'step': 30767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:18.521928', 'step': 30767, 'epoch': 3}
{'type': 'loss', 'content': 0.023784568533301353, 'timestamp': '2025-10-02 01:06:18.528000', 'step': 30768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:18.582548', 'step': 30768, 'epoch': 3}
{'type': 'loss', 'content': 0.0007092968444339931, 'timestamp': '2025-10-02 01:06:18.585283', 'step': 30769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:18.639842', 'step': 30769, 'epoch': 3}
{'type': 'loss', 'content': 0.10485892742872238, 'timestamp': '2025-10-02 01:06:18.642530', 'step': 30770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:18.698157', 'step': 30770, 'epoch': 3}
{'type': 'loss', 'content': 0.07405976951122284, 'timestamp': '2025-10-02 01:06:18.701179', 'step': 30771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:18.755058', 'step': 30771, 'epoch': 3}
{'type': 'loss', 'content': 0.09282908588647842, 'timestamp': '2025-10-02 01:06:18.761370', 'step': 30772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:18.815515', 'step': 30772, 'epoch': 3}
{'type': 'loss', 'content': 0.024415945634245872, 'timestamp': '2025-10-02 01:06:18.817933', 'step': 30773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:18.872446', 'step': 30773, 'epoch': 3}
{'type': 'loss', 'content': 0.07222160696983337, 'timestamp': '2025-10-02 01:06:18.881737', 'step': 30774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:18.936237', 'step': 30774, 'epoch': 3}
{'type': 'loss', 'content': 0.054693982005119324, 'timestamp': '2025-10-02 01:06:18.938776', 'step': 30775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:18.999033', 'step': 30775, 'epoch': 3}
{'type': 'loss', 'content': 0.08366718143224716, 'timestamp': '2025-10-02 01:06:19.007382', 'step': 30776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:19.061582', 'step': 30776, 'epoch': 3}
{'type': 'loss', 'content': 0.011890086345374584, 'timestamp': '2025-10-02 01:06:19.063857', 'step': 30777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:19.118807', 'step': 30777, 'epoch': 3}
{'type': 'loss', 'content': 0.06440961360931396, 'timestamp': '2025-10-02 01:06:19.120947', 'step': 30778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:19.176832', 'step': 30778, 'epoch': 3}
{'type': 'loss', 'content': 0.04318803548812866, 'timestamp': '2025-10-02 01:06:19.180755', 'step': 30779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:19.237659', 'step': 30779, 'epoch': 3}
{'type': 'loss', 'content': 0.034860365092754364, 'timestamp': '2025-10-02 01:06:19.243925', 'step': 30780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:19.297754', 'step': 30780, 'epoch': 3}
{'type': 'loss', 'content': 0.05622772127389908, 'timestamp': '2025-10-02 01:06:19.300399', 'step': 30781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:19.355205', 'step': 30781, 'epoch': 3}
{'type': 'loss', 'content': 0.024454277008771896, 'timestamp': '2025-10-02 01:06:19.357586', 'step': 30782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:19.412076', 'step': 30782, 'epoch': 3}
{'type': 'loss', 'content': 0.03951062634587288, 'timestamp': '2025-10-02 01:06:19.417878', 'step': 30783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:19.472519', 'step': 30783, 'epoch': 3}
{'type': 'loss', 'content': 0.015616361983120441, 'timestamp': '2025-10-02 01:06:19.478585', 'step': 30784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:19.533960', 'step': 30784, 'epoch': 3}
{'type': 'loss', 'content': 0.017019441351294518, 'timestamp': '2025-10-02 01:06:19.536593', 'step': 30785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:19.590620', 'step': 30785, 'epoch': 3}
{'type': 'loss', 'content': 0.05143313854932785, 'timestamp': '2025-10-02 01:06:19.593134', 'step': 30786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:19.648629', 'step': 30786, 'epoch': 3}
{'type': 'loss', 'content': 0.052171170711517334, 'timestamp': '2025-10-02 01:06:19.651236', 'step': 30787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:19.705139', 'step': 30787, 'epoch': 3}
{'type': 'loss', 'content': 0.05120657756924629, 'timestamp': '2025-10-02 01:06:19.711143', 'step': 30788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:19.764737', 'step': 30788, 'epoch': 3}
{'type': 'loss', 'content': 0.05908031016588211, 'timestamp': '2025-10-02 01:06:19.767262', 'step': 30789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:19.829504', 'step': 30789, 'epoch': 3}
{'type': 'loss', 'content': 0.05201541259884834, 'timestamp': '2025-10-02 01:06:19.839661', 'step': 30790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:19.893507', 'step': 30790, 'epoch': 3}
{'type': 'loss', 'content': 0.03343404829502106, 'timestamp': '2025-10-02 01:06:19.896573', 'step': 30791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:19.952017', 'step': 30791, 'epoch': 3}
{'type': 'loss', 'content': 0.0028964506927877665, 'timestamp': '2025-10-02 01:06:19.958068', 'step': 30792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:20.015329', 'step': 30792, 'epoch': 3}
{'type': 'loss', 'content': 0.03627733513712883, 'timestamp': '2025-10-02 01:06:20.020068', 'step': 30793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:20.084901', 'step': 30793, 'epoch': 3}
{'type': 'loss', 'content': 0.031146755442023277, 'timestamp': '2025-10-02 01:06:20.087368', 'step': 30794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:20.142416', 'step': 30794, 'epoch': 3}
{'type': 'loss', 'content': 0.08471935987472534, 'timestamp': '2025-10-02 01:06:20.144802', 'step': 30795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:20.198892', 'step': 30795, 'epoch': 3}
{'type': 'loss', 'content': 0.025859732180833817, 'timestamp': '2025-10-02 01:06:20.205625', 'step': 30796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:20.259109', 'step': 30796, 'epoch': 3}
{'type': 'loss', 'content': 0.019912991672754288, 'timestamp': '2025-10-02 01:06:20.265147', 'step': 30797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:20.320442', 'step': 30797, 'epoch': 3}
{'type': 'loss', 'content': 0.03395381197333336, 'timestamp': '2025-10-02 01:06:20.323170', 'step': 30798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:20.379642', 'step': 30798, 'epoch': 3}
{'type': 'loss', 'content': 0.02663753181695938, 'timestamp': '2025-10-02 01:06:20.385833', 'step': 30799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:20.448497', 'step': 30799, 'epoch': 3}
{'type': 'loss', 'content': 0.01374666579067707, 'timestamp': '2025-10-02 01:06:20.456402', 'step': 30800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:20.511085', 'step': 30800, 'epoch': 3}
{'type': 'loss', 'content': 0.052237849682569504, 'timestamp': '2025-10-02 01:06:20.513634', 'step': 30801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:20.568684', 'step': 30801, 'epoch': 3}
{'type': 'loss', 'content': 0.030027661472558975, 'timestamp': '2025-10-02 01:06:20.571226', 'step': 30802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:06:20.626016', 'step': 30802, 'epoch': 3}
{'type': 'loss', 'content': 0.059895507991313934, 'timestamp': '2025-10-02 01:06:20.629823', 'step': 30803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:20.686305', 'step': 30803, 'epoch': 3}
{'type': 'loss', 'content': 0.053297750651836395, 'timestamp': '2025-10-02 01:06:20.694293', 'step': 30804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:20.749870', 'step': 30804, 'epoch': 3}
{'type': 'loss', 'content': 0.022433796897530556, 'timestamp': '2025-10-02 01:06:20.752834', 'step': 30805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:20.812939', 'step': 30805, 'epoch': 3}
{'type': 'loss', 'content': 0.01814449392259121, 'timestamp': '2025-10-02 01:06:20.823095', 'step': 30806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:06:20.894945', 'step': 30806, 'epoch': 3}
{'type': 'loss', 'content': 0.024498358368873596, 'timestamp': '2025-10-02 01:06:20.907576', 'step': 30807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:20.966038', 'step': 30807, 'epoch': 3}
{'type': 'loss', 'content': 0.03706638514995575, 'timestamp': '2025-10-02 01:06:20.973152', 'step': 30808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:21.028048', 'step': 30808, 'epoch': 3}
{'type': 'loss', 'content': 0.0009054882684722543, 'timestamp': '2025-10-02 01:06:21.033586', 'step': 30809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:21.089153', 'step': 30809, 'epoch': 3}
{'type': 'loss', 'content': 0.04048512503504753, 'timestamp': '2025-10-02 01:06:21.094756', 'step': 30810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:21.152721', 'step': 30810, 'epoch': 3}
{'type': 'loss', 'content': 0.04264736548066139, 'timestamp': '2025-10-02 01:06:21.155746', 'step': 30811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:21.211714', 'step': 30811, 'epoch': 3}
{'type': 'loss', 'content': 0.12164107710123062, 'timestamp': '2025-10-02 01:06:21.218186', 'step': 30812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:21.273831', 'step': 30812, 'epoch': 3}
{'type': 'loss', 'content': 0.047848209738731384, 'timestamp': '2025-10-02 01:06:21.276640', 'step': 30813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:21.341376', 'step': 30813, 'epoch': 3}
{'type': 'loss', 'content': 0.03452654927968979, 'timestamp': '2025-10-02 01:06:21.351501', 'step': 30814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:21.406684', 'step': 30814, 'epoch': 3}
{'type': 'loss', 'content': 0.03477245196700096, 'timestamp': '2025-10-02 01:06:21.408798', 'step': 30815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:21.464786', 'step': 30815, 'epoch': 3}
{'type': 'loss', 'content': 0.1284950077533722, 'timestamp': '2025-10-02 01:06:21.470848', 'step': 30816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:21.525008', 'step': 30816, 'epoch': 3}
{'type': 'loss', 'content': 0.026286499574780464, 'timestamp': '2025-10-02 01:06:21.527776', 'step': 30817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:21.584264', 'step': 30817, 'epoch': 3}
{'type': 'loss', 'content': 0.004772447049617767, 'timestamp': '2025-10-02 01:06:21.586861', 'step': 30818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:21.641358', 'step': 30818, 'epoch': 3}
{'type': 'loss', 'content': 0.09607982635498047, 'timestamp': '2025-10-02 01:06:21.643873', 'step': 30819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:21.700504', 'step': 30819, 'epoch': 3}
{'type': 'loss', 'content': 0.005425687879323959, 'timestamp': '2025-10-02 01:06:21.707321', 'step': 30820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:21.761761', 'step': 30820, 'epoch': 3}
{'type': 'loss', 'content': 0.026463288813829422, 'timestamp': '2025-10-02 01:06:21.769057', 'step': 30821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:21.824311', 'step': 30821, 'epoch': 3}
{'type': 'loss', 'content': 0.07950608432292938, 'timestamp': '2025-10-02 01:06:21.826779', 'step': 30822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:21.881673', 'step': 30822, 'epoch': 3}
{'type': 'loss', 'content': 0.06659766286611557, 'timestamp': '2025-10-02 01:06:21.884256', 'step': 30823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:21.938063', 'step': 30823, 'epoch': 3}
{'type': 'loss', 'content': 0.06482534855604172, 'timestamp': '2025-10-02 01:06:21.944331', 'step': 30824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:21.998340', 'step': 30824, 'epoch': 3}
{'type': 'loss', 'content': 0.00040595035534352064, 'timestamp': '2025-10-02 01:06:22.005984', 'step': 30825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:22.060494', 'step': 30825, 'epoch': 3}
{'type': 'loss', 'content': 0.1665322184562683, 'timestamp': '2025-10-02 01:06:22.063143', 'step': 30826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:22.119177', 'step': 30826, 'epoch': 3}
{'type': 'loss', 'content': 0.05023568868637085, 'timestamp': '2025-10-02 01:06:22.128531', 'step': 30827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:22.184715', 'step': 30827, 'epoch': 3}
{'type': 'loss', 'content': 0.003376950277015567, 'timestamp': '2025-10-02 01:06:22.191767', 'step': 30828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:22.246976', 'step': 30828, 'epoch': 3}
{'type': 'loss', 'content': 0.0031956653110682964, 'timestamp': '2025-10-02 01:06:22.249556', 'step': 30829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:22.305902', 'step': 30829, 'epoch': 3}
{'type': 'loss', 'content': 0.01762443408370018, 'timestamp': '2025-10-02 01:06:22.308376', 'step': 30830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:22.363626', 'step': 30830, 'epoch': 3}
{'type': 'loss', 'content': 0.05800876393914223, 'timestamp': '2025-10-02 01:06:22.366358', 'step': 30831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:22.421764', 'step': 30831, 'epoch': 3}
{'type': 'loss', 'content': 0.052020855247974396, 'timestamp': '2025-10-02 01:06:22.428120', 'step': 30832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:22.483848', 'step': 30832, 'epoch': 3}
{'type': 'loss', 'content': 0.05239851027727127, 'timestamp': '2025-10-02 01:06:22.489266', 'step': 30833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:22.545602', 'step': 30833, 'epoch': 3}
{'type': 'loss', 'content': 0.03445783630013466, 'timestamp': '2025-10-02 01:06:22.553289', 'step': 30834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:22.614088', 'step': 30834, 'epoch': 3}
{'type': 'loss', 'content': 0.000248083146288991, 'timestamp': '2025-10-02 01:06:22.624246', 'step': 30835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:22.682695', 'step': 30835, 'epoch': 3}
{'type': 'loss', 'content': 0.027161361649632454, 'timestamp': '2025-10-02 01:06:22.690252', 'step': 30836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:22.774608', 'step': 30836, 'epoch': 3}
{'type': 'loss', 'content': 0.06716165691614151, 'timestamp': '2025-10-02 01:06:22.778412', 'step': 30837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:22.837448', 'step': 30837, 'epoch': 3}
{'type': 'loss', 'content': 0.026787718757987022, 'timestamp': '2025-10-02 01:06:22.854777', 'step': 30838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:22.954981', 'step': 30838, 'epoch': 3}
{'type': 'loss', 'content': 0.07093338668346405, 'timestamp': '2025-10-02 01:06:22.958261', 'step': 30839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:23.029177', 'step': 30839, 'epoch': 3}
{'type': 'loss', 'content': 0.036990538239479065, 'timestamp': '2025-10-02 01:06:23.040167', 'step': 30840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:23.099487', 'step': 30840, 'epoch': 3}
{'type': 'loss', 'content': 0.06036749482154846, 'timestamp': '2025-10-02 01:06:23.102887', 'step': 30841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:23.173495', 'step': 30841, 'epoch': 3}
{'type': 'loss', 'content': 0.05386153981089592, 'timestamp': '2025-10-02 01:06:23.183053', 'step': 30842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:23.245071', 'step': 30842, 'epoch': 3}
{'type': 'loss', 'content': 0.035049837082624435, 'timestamp': '2025-10-02 01:06:23.254400', 'step': 30843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:23.318799', 'step': 30843, 'epoch': 3}
{'type': 'loss', 'content': 0.06498873978853226, 'timestamp': '2025-10-02 01:06:23.327401', 'step': 30844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:23.403047', 'step': 30844, 'epoch': 3}
{'type': 'loss', 'content': 0.04662876948714256, 'timestamp': '2025-10-02 01:06:23.411444', 'step': 30845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:23.479102', 'step': 30845, 'epoch': 3}
{'type': 'loss', 'content': 0.10476581007242203, 'timestamp': '2025-10-02 01:06:23.483417', 'step': 30846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:23.545067', 'step': 30846, 'epoch': 3}
{'type': 'loss', 'content': 0.02300724945962429, 'timestamp': '2025-10-02 01:06:23.555242', 'step': 30847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:23.627941', 'step': 30847, 'epoch': 3}
{'type': 'loss', 'content': 0.011755667626857758, 'timestamp': '2025-10-02 01:06:23.634067', 'step': 30848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:23.691838', 'step': 30848, 'epoch': 3}
{'type': 'loss', 'content': 0.0004085722321178764, 'timestamp': '2025-10-02 01:06:23.697895', 'step': 30849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:23.762206', 'step': 30849, 'epoch': 3}
{'type': 'loss', 'content': 0.0017461879178881645, 'timestamp': '2025-10-02 01:06:23.770268', 'step': 30850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:23.834333', 'step': 30850, 'epoch': 3}
{'type': 'loss', 'content': 0.03896817937493324, 'timestamp': '2025-10-02 01:06:23.840346', 'step': 30851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:23.907636', 'step': 30851, 'epoch': 3}
{'type': 'loss', 'content': 0.07366745173931122, 'timestamp': '2025-10-02 01:06:23.914773', 'step': 30852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:23.983721', 'step': 30852, 'epoch': 3}
{'type': 'loss', 'content': 0.056486018002033234, 'timestamp': '2025-10-02 01:06:23.986835', 'step': 30853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:24.052028', 'step': 30853, 'epoch': 3}
{'type': 'loss', 'content': 0.012212551198899746, 'timestamp': '2025-10-02 01:06:24.055148', 'step': 30854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:24.126056', 'step': 30854, 'epoch': 3}
{'type': 'loss', 'content': 0.04196375608444214, 'timestamp': '2025-10-02 01:06:24.131146', 'step': 30855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:24.197849', 'step': 30855, 'epoch': 3}
{'type': 'loss', 'content': 0.02818300575017929, 'timestamp': '2025-10-02 01:06:24.206905', 'step': 30856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:24.272453', 'step': 30856, 'epoch': 3}
{'type': 'loss', 'content': 0.05955063924193382, 'timestamp': '2025-10-02 01:06:24.282724', 'step': 30857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:24.346665', 'step': 30857, 'epoch': 3}
{'type': 'loss', 'content': 0.016655584797263145, 'timestamp': '2025-10-02 01:06:24.354421', 'step': 30858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:24.418605', 'step': 30858, 'epoch': 3}
{'type': 'loss', 'content': 0.05023404583334923, 'timestamp': '2025-10-02 01:06:24.427406', 'step': 30859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:24.501727', 'step': 30859, 'epoch': 3}
{'type': 'loss', 'content': 0.014307019300758839, 'timestamp': '2025-10-02 01:06:24.514043', 'step': 30860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:24.570845', 'step': 30860, 'epoch': 3}
{'type': 'loss', 'content': 0.025845112279057503, 'timestamp': '2025-10-02 01:06:24.580541', 'step': 30861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:24.645004', 'step': 30861, 'epoch': 3}
{'type': 'loss', 'content': 0.02530844695866108, 'timestamp': '2025-10-02 01:06:24.654601', 'step': 30862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:24.722417', 'step': 30862, 'epoch': 3}
{'type': 'loss', 'content': 0.04705771058797836, 'timestamp': '2025-10-02 01:06:24.732104', 'step': 30863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:24.800610', 'step': 30863, 'epoch': 3}
{'type': 'loss', 'content': 0.008010456338524818, 'timestamp': '2025-10-02 01:06:24.815039', 'step': 30864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:06:24.889144', 'step': 30864, 'epoch': 3}
{'type': 'loss', 'content': 0.01956389658153057, 'timestamp': '2025-10-02 01:06:24.902564', 'step': 30865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:24.970236', 'step': 30865, 'epoch': 3}
{'type': 'loss', 'content': 0.05388994514942169, 'timestamp': '2025-10-02 01:06:24.977707', 'step': 30866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:25.049703', 'step': 30866, 'epoch': 3}
{'type': 'loss', 'content': 0.05718675255775452, 'timestamp': '2025-10-02 01:06:25.053693', 'step': 30867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:25.116674', 'step': 30867, 'epoch': 3}
{'type': 'loss', 'content': 0.06573013216257095, 'timestamp': '2025-10-02 01:06:25.129184', 'step': 30868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:25.207488', 'step': 30868, 'epoch': 3}
{'type': 'loss', 'content': 0.032134875655174255, 'timestamp': '2025-10-02 01:06:25.218413', 'step': 30869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:06:25.316843', 'step': 30869, 'epoch': 3}
{'type': 'loss', 'content': 0.0023922354448586702, 'timestamp': '2025-10-02 01:06:25.329527', 'step': 30870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:25.404395', 'step': 30870, 'epoch': 3}
{'type': 'loss', 'content': 0.016967369243502617, 'timestamp': '2025-10-02 01:06:25.414910', 'step': 30871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:25.486926', 'step': 30871, 'epoch': 3}
{'type': 'loss', 'content': 0.08660046011209488, 'timestamp': '2025-10-02 01:06:25.493925', 'step': 30872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:25.564874', 'step': 30872, 'epoch': 3}
{'type': 'loss', 'content': 0.03924886882305145, 'timestamp': '2025-10-02 01:06:25.567223', 'step': 30873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:25.636621', 'step': 30873, 'epoch': 3}
{'type': 'loss', 'content': 0.11913156509399414, 'timestamp': '2025-10-02 01:06:25.644026', 'step': 30874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:25.719945', 'step': 30874, 'epoch': 3}
{'type': 'loss', 'content': 0.048393748700618744, 'timestamp': '2025-10-02 01:06:25.727918', 'step': 30875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:25.797826', 'step': 30875, 'epoch': 3}
{'type': 'loss', 'content': 0.10651565343141556, 'timestamp': '2025-10-02 01:06:25.804515', 'step': 30876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:25.880516', 'step': 30876, 'epoch': 3}
{'type': 'loss', 'content': 0.08571908622980118, 'timestamp': '2025-10-02 01:06:25.885809', 'step': 30877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:25.957489', 'step': 30877, 'epoch': 3}
{'type': 'loss', 'content': 0.0276604313403368, 'timestamp': '2025-10-02 01:06:25.967313', 'step': 30878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:26.036052', 'step': 30878, 'epoch': 3}
{'type': 'loss', 'content': 0.09033097326755524, 'timestamp': '2025-10-02 01:06:26.044517', 'step': 30879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:26.116157', 'step': 30879, 'epoch': 3}
{'type': 'loss', 'content': 0.025850102305412292, 'timestamp': '2025-10-02 01:06:26.129349', 'step': 30880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:26.210494', 'step': 30880, 'epoch': 3}
{'type': 'loss', 'content': 0.03618992120027542, 'timestamp': '2025-10-02 01:06:26.222523', 'step': 30881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:26.308515', 'step': 30881, 'epoch': 3}
{'type': 'loss', 'content': 0.0017938836244866252, 'timestamp': '2025-10-02 01:06:26.316111', 'step': 30882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:26.391752', 'step': 30882, 'epoch': 3}
{'type': 'loss', 'content': 0.08129950612783432, 'timestamp': '2025-10-02 01:06:26.401505', 'step': 30883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:26.476219', 'step': 30883, 'epoch': 3}
{'type': 'loss', 'content': 0.04637039825320244, 'timestamp': '2025-10-02 01:06:26.483617', 'step': 30884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:26.551784', 'step': 30884, 'epoch': 3}
{'type': 'loss', 'content': 0.028007324784994125, 'timestamp': '2025-10-02 01:06:26.561589', 'step': 30885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:26.629047', 'step': 30885, 'epoch': 3}
{'type': 'loss', 'content': 0.028923332691192627, 'timestamp': '2025-10-02 01:06:26.638603', 'step': 30886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:26.711804', 'step': 30886, 'epoch': 3}
{'type': 'loss', 'content': 0.06415048986673355, 'timestamp': '2025-10-02 01:06:26.722558', 'step': 30887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:26.795617', 'step': 30887, 'epoch': 3}
{'type': 'loss', 'content': 0.04353700205683708, 'timestamp': '2025-10-02 01:06:26.808868', 'step': 30888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:26.882098', 'step': 30888, 'epoch': 3}
{'type': 'loss', 'content': 0.09784926474094391, 'timestamp': '2025-10-02 01:06:26.884699', 'step': 30889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:26.950603', 'step': 30889, 'epoch': 3}
{'type': 'loss', 'content': 0.01831158623099327, 'timestamp': '2025-10-02 01:06:26.959456', 'step': 30890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:27.032238', 'step': 30890, 'epoch': 3}
{'type': 'loss', 'content': 0.050446100533008575, 'timestamp': '2025-10-02 01:06:27.043619', 'step': 30891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:06:27.131864', 'step': 30891, 'epoch': 3}
{'type': 'loss', 'content': 0.006425753235816956, 'timestamp': '2025-10-02 01:06:27.145262', 'step': 30892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:27.218574', 'step': 30892, 'epoch': 3}
{'type': 'loss', 'content': 0.04707219451665878, 'timestamp': '2025-10-02 01:06:27.228278', 'step': 30893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:27.311914', 'step': 30893, 'epoch': 3}
{'type': 'loss', 'content': 0.029530838131904602, 'timestamp': '2025-10-02 01:06:27.317790', 'step': 30894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:06:27.397375', 'step': 30894, 'epoch': 3}
{'type': 'loss', 'content': 0.03757861256599426, 'timestamp': '2025-10-02 01:06:27.407989', 'step': 30895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:27.480867', 'step': 30895, 'epoch': 3}
{'type': 'loss', 'content': 0.009259871207177639, 'timestamp': '2025-10-02 01:06:27.497330', 'step': 30896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:27.559953', 'step': 30896, 'epoch': 3}
{'type': 'loss', 'content': 0.08881998062133789, 'timestamp': '2025-10-02 01:06:27.568127', 'step': 30897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:27.629509', 'step': 30897, 'epoch': 3}
{'type': 'loss', 'content': 0.21622176468372345, 'timestamp': '2025-10-02 01:06:27.638093', 'step': 30898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:27.713993', 'step': 30898, 'epoch': 3}
{'type': 'loss', 'content': 0.04435478523373604, 'timestamp': '2025-10-02 01:06:27.724110', 'step': 30899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:27.797059', 'step': 30899, 'epoch': 3}
{'type': 'loss', 'content': 0.024890614673495293, 'timestamp': '2025-10-02 01:06:27.809412', 'step': 30900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:27.885876', 'step': 30900, 'epoch': 3}
{'type': 'loss', 'content': 0.020530106499791145, 'timestamp': '2025-10-02 01:06:27.901876', 'step': 30901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:27.994123', 'step': 30901, 'epoch': 3}
{'type': 'loss', 'content': 0.06481965631246567, 'timestamp': '2025-10-02 01:06:28.003995', 'step': 30902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:28.074260', 'step': 30902, 'epoch': 3}
{'type': 'loss', 'content': 0.02246190793812275, 'timestamp': '2025-10-02 01:06:28.085116', 'step': 30903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:28.159429', 'step': 30903, 'epoch': 3}
{'type': 'loss', 'content': 0.0349559523165226, 'timestamp': '2025-10-02 01:06:28.176611', 'step': 30904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:28.243020', 'step': 30904, 'epoch': 3}
{'type': 'loss', 'content': 0.015041559934616089, 'timestamp': '2025-10-02 01:06:28.248323', 'step': 30905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:28.321662', 'step': 30905, 'epoch': 3}
{'type': 'loss', 'content': 0.029559051617980003, 'timestamp': '2025-10-02 01:06:28.325273', 'step': 30906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:28.400207', 'step': 30906, 'epoch': 3}
{'type': 'loss', 'content': 0.030169930309057236, 'timestamp': '2025-10-02 01:06:28.411924', 'step': 30907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:28.498744', 'step': 30907, 'epoch': 3}
{'type': 'loss', 'content': 0.1558263599872589, 'timestamp': '2025-10-02 01:06:28.512754', 'step': 30908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:28.577939', 'step': 30908, 'epoch': 3}
{'type': 'loss', 'content': 0.014148164540529251, 'timestamp': '2025-10-02 01:06:28.591121', 'step': 30909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:28.668452', 'step': 30909, 'epoch': 3}
{'type': 'loss', 'content': 0.11292096972465515, 'timestamp': '2025-10-02 01:06:28.679074', 'step': 30910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:06:28.743084', 'step': 30910, 'epoch': 3}
{'type': 'loss', 'content': 0.02210093103349209, 'timestamp': '2025-10-02 01:06:28.747256', 'step': 30911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:28.819638', 'step': 30911, 'epoch': 3}
{'type': 'loss', 'content': 0.058987367898225784, 'timestamp': '2025-10-02 01:06:28.829324', 'step': 30912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:28.933636', 'step': 30912, 'epoch': 3}
{'type': 'loss', 'content': 0.03778838738799095, 'timestamp': '2025-10-02 01:06:28.941791', 'step': 30913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:29.041735', 'step': 30913, 'epoch': 3}
{'type': 'loss', 'content': 0.027696991339325905, 'timestamp': '2025-10-02 01:06:29.053930', 'step': 30914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:29.130036', 'step': 30914, 'epoch': 3}
{'type': 'loss', 'content': 0.059634771198034286, 'timestamp': '2025-10-02 01:06:29.135824', 'step': 30915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:29.205353', 'step': 30915, 'epoch': 3}
{'type': 'loss', 'content': 0.04563921317458153, 'timestamp': '2025-10-02 01:06:29.211816', 'step': 30916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:29.270841', 'step': 30916, 'epoch': 3}
{'type': 'loss', 'content': 0.02609512023627758, 'timestamp': '2025-10-02 01:06:29.274727', 'step': 30917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:29.338614', 'step': 30917, 'epoch': 3}
{'type': 'loss', 'content': 0.09207671135663986, 'timestamp': '2025-10-02 01:06:29.341528', 'step': 30918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:29.397126', 'step': 30918, 'epoch': 3}
{'type': 'loss', 'content': 0.09154403209686279, 'timestamp': '2025-10-02 01:06:29.400448', 'step': 30919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:29.456138', 'step': 30919, 'epoch': 3}
{'type': 'loss', 'content': 0.08481355756521225, 'timestamp': '2025-10-02 01:06:29.462241', 'step': 30920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:29.516887', 'step': 30920, 'epoch': 3}
{'type': 'loss', 'content': 0.02436208538711071, 'timestamp': '2025-10-02 01:06:29.522918', 'step': 30921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:29.580330', 'step': 30921, 'epoch': 3}
{'type': 'loss', 'content': 0.03594642132520676, 'timestamp': '2025-10-02 01:06:29.588660', 'step': 30922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:29.666139', 'step': 30922, 'epoch': 3}
{'type': 'loss', 'content': 0.06288021057844162, 'timestamp': '2025-10-02 01:06:29.669439', 'step': 30923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:06:29.739332', 'step': 30923, 'epoch': 3}
{'type': 'loss', 'content': 0.019118763506412506, 'timestamp': '2025-10-02 01:06:29.756822', 'step': 30924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:29.846253', 'step': 30924, 'epoch': 3}
{'type': 'loss', 'content': 0.028924744576215744, 'timestamp': '2025-10-02 01:06:29.853953', 'step': 30925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:29.941295', 'step': 30925, 'epoch': 3}
{'type': 'loss', 'content': 0.007288957480341196, 'timestamp': '2025-10-02 01:06:29.952940', 'step': 30926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:30.038073', 'step': 30926, 'epoch': 3}
{'type': 'loss', 'content': 0.12626443803310394, 'timestamp': '2025-10-02 01:06:30.052737', 'step': 30927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:30.112688', 'step': 30927, 'epoch': 3}
{'type': 'loss', 'content': 0.005296842195093632, 'timestamp': '2025-10-02 01:06:30.121347', 'step': 30928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:30.230931', 'step': 30928, 'epoch': 3}
{'type': 'loss', 'content': 0.0804000124335289, 'timestamp': '2025-10-02 01:06:30.242766', 'step': 30929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:30.325416', 'step': 30929, 'epoch': 3}
{'type': 'loss', 'content': 0.02621965855360031, 'timestamp': '2025-10-02 01:06:30.334693', 'step': 30930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:30.422710', 'step': 30930, 'epoch': 3}
{'type': 'loss', 'content': 0.04564753919839859, 'timestamp': '2025-10-02 01:06:30.425658', 'step': 30931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:30.484058', 'step': 30931, 'epoch': 3}
{'type': 'loss', 'content': 0.048572249710559845, 'timestamp': '2025-10-02 01:06:30.492742', 'step': 30932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:30.554609', 'step': 30932, 'epoch': 3}
{'type': 'loss', 'content': 0.035846419632434845, 'timestamp': '2025-10-02 01:06:30.558535', 'step': 30933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:30.631249', 'step': 30933, 'epoch': 3}
{'type': 'loss', 'content': 0.038071222603321075, 'timestamp': '2025-10-02 01:06:30.641544', 'step': 30934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:30.737866', 'step': 30934, 'epoch': 3}
{'type': 'loss', 'content': 0.05327047407627106, 'timestamp': '2025-10-02 01:06:30.747211', 'step': 30935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:30.828480', 'step': 30935, 'epoch': 3}
{'type': 'loss', 'content': 0.05281349644064903, 'timestamp': '2025-10-02 01:06:30.835044', 'step': 30936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:30.892598', 'step': 30936, 'epoch': 3}
{'type': 'loss', 'content': 0.015960808843374252, 'timestamp': '2025-10-02 01:06:30.895922', 'step': 30937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:30.951870', 'step': 30937, 'epoch': 3}
{'type': 'loss', 'content': 0.05158793181180954, 'timestamp': '2025-10-02 01:06:30.962610', 'step': 30938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:31.039907', 'step': 30938, 'epoch': 3}
{'type': 'loss', 'content': 0.1227232813835144, 'timestamp': '2025-10-02 01:06:31.053462', 'step': 30939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:31.130092', 'step': 30939, 'epoch': 3}
{'type': 'loss', 'content': 0.03877328708767891, 'timestamp': '2025-10-02 01:06:31.144291', 'step': 30940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:31.222480', 'step': 30940, 'epoch': 3}
{'type': 'loss', 'content': 0.047276031225919724, 'timestamp': '2025-10-02 01:06:31.233407', 'step': 30941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:31.300126', 'step': 30941, 'epoch': 3}
{'type': 'loss', 'content': 0.07525292783975601, 'timestamp': '2025-10-02 01:06:31.306700', 'step': 30942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:31.383093', 'step': 30942, 'epoch': 3}
{'type': 'loss', 'content': 0.08824417740106583, 'timestamp': '2025-10-02 01:06:31.393343', 'step': 30943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:31.474253', 'step': 30943, 'epoch': 3}
{'type': 'loss', 'content': 0.0669841468334198, 'timestamp': '2025-10-02 01:06:31.484341', 'step': 30944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:31.548179', 'step': 30944, 'epoch': 3}
{'type': 'loss', 'content': 0.019768191501498222, 'timestamp': '2025-10-02 01:06:31.551642', 'step': 30945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:31.617283', 'step': 30945, 'epoch': 3}
{'type': 'loss', 'content': 0.004563526716083288, 'timestamp': '2025-10-02 01:06:31.624815', 'step': 30946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:31.712257', 'step': 30946, 'epoch': 3}
{'type': 'loss', 'content': 0.03466648608446121, 'timestamp': '2025-10-02 01:06:31.722917', 'step': 30947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:31.796194', 'step': 30947, 'epoch': 3}
{'type': 'loss', 'content': 0.03448740765452385, 'timestamp': '2025-10-02 01:06:31.804989', 'step': 30948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:31.889007', 'step': 30948, 'epoch': 3}
{'type': 'loss', 'content': 0.04114772006869316, 'timestamp': '2025-10-02 01:06:31.900026', 'step': 30949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:31.975940', 'step': 30949, 'epoch': 3}
{'type': 'loss', 'content': 0.05035814642906189, 'timestamp': '2025-10-02 01:06:31.988237', 'step': 30950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:32.055800', 'step': 30950, 'epoch': 3}
{'type': 'loss', 'content': 0.008773243986070156, 'timestamp': '2025-10-02 01:06:32.059622', 'step': 30951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:32.137183', 'step': 30951, 'epoch': 3}
{'type': 'loss', 'content': 0.03246583417057991, 'timestamp': '2025-10-02 01:06:32.153090', 'step': 30952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:32.236256', 'step': 30952, 'epoch': 3}
{'type': 'loss', 'content': 0.021760843694210052, 'timestamp': '2025-10-02 01:06:32.247264', 'step': 30953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:32.323041', 'step': 30953, 'epoch': 3}
{'type': 'loss', 'content': 0.06945519894361496, 'timestamp': '2025-10-02 01:06:32.335894', 'step': 30954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:32.427817', 'step': 30954, 'epoch': 3}
{'type': 'loss', 'content': 0.026075124740600586, 'timestamp': '2025-10-02 01:06:32.443704', 'step': 30955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:32.528740', 'step': 30955, 'epoch': 3}
{'type': 'loss', 'content': 0.08142001926898956, 'timestamp': '2025-10-02 01:06:32.537371', 'step': 30956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:32.606906', 'step': 30956, 'epoch': 3}
{'type': 'loss', 'content': 0.02763965353369713, 'timestamp': '2025-10-02 01:06:32.619386', 'step': 30957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:32.696345', 'step': 30957, 'epoch': 3}
{'type': 'loss', 'content': 0.015225211158394814, 'timestamp': '2025-10-02 01:06:32.700024', 'step': 30958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:32.775506', 'step': 30958, 'epoch': 3}
{'type': 'loss', 'content': 0.011236163787543774, 'timestamp': '2025-10-02 01:06:32.787245', 'step': 30959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:32.873042', 'step': 30959, 'epoch': 3}
{'type': 'loss', 'content': 0.0006824122974649072, 'timestamp': '2025-10-02 01:06:32.889807', 'step': 30960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:32.966609', 'step': 30960, 'epoch': 3}
{'type': 'loss', 'content': 0.057380013167858124, 'timestamp': '2025-10-02 01:06:32.969250', 'step': 30961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:33.029204', 'step': 30961, 'epoch': 3}
{'type': 'loss', 'content': 0.007783137261867523, 'timestamp': '2025-10-02 01:06:33.036186', 'step': 30962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:33.128285', 'step': 30962, 'epoch': 3}
{'type': 'loss', 'content': 0.05561898648738861, 'timestamp': '2025-10-02 01:06:33.141074', 'step': 30963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:33.228207', 'step': 30963, 'epoch': 3}
{'type': 'loss', 'content': 0.05034754425287247, 'timestamp': '2025-10-02 01:06:33.248005', 'step': 30964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:33.312898', 'step': 30964, 'epoch': 3}
{'type': 'loss', 'content': 0.12027512490749359, 'timestamp': '2025-10-02 01:06:33.325460', 'step': 30965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:33.396915', 'step': 30965, 'epoch': 3}
{'type': 'loss', 'content': 0.0905216857790947, 'timestamp': '2025-10-02 01:06:33.401446', 'step': 30966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:33.459465', 'step': 30966, 'epoch': 3}
{'type': 'loss', 'content': 0.009839634411036968, 'timestamp': '2025-10-02 01:06:33.462470', 'step': 30967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:33.550103', 'step': 30967, 'epoch': 3}
{'type': 'loss', 'content': 0.058256424963474274, 'timestamp': '2025-10-02 01:06:33.571157', 'step': 30968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:33.637376', 'step': 30968, 'epoch': 3}
{'type': 'loss', 'content': 0.040778998285532, 'timestamp': '2025-10-02 01:06:33.644507', 'step': 30969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:33.717322', 'step': 30969, 'epoch': 3}
{'type': 'loss', 'content': 0.012908347882330418, 'timestamp': '2025-10-02 01:06:33.720855', 'step': 30970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:33.787020', 'step': 30970, 'epoch': 3}
{'type': 'loss', 'content': 0.04956454783678055, 'timestamp': '2025-10-02 01:06:33.791902', 'step': 30971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:33.854450', 'step': 30971, 'epoch': 3}
{'type': 'loss', 'content': 0.014401701278984547, 'timestamp': '2025-10-02 01:06:33.864386', 'step': 30972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:33.925604', 'step': 30972, 'epoch': 3}
{'type': 'loss', 'content': 0.0010008567478507757, 'timestamp': '2025-10-02 01:06:33.935267', 'step': 30973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:33.998105', 'step': 30973, 'epoch': 3}
{'type': 'loss', 'content': 0.08894286304712296, 'timestamp': '2025-10-02 01:06:34.000744', 'step': 30974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:34.060176', 'step': 30974, 'epoch': 3}
{'type': 'loss', 'content': 0.028011862188577652, 'timestamp': '2025-10-02 01:06:34.067386', 'step': 30975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:34.128495', 'step': 30975, 'epoch': 3}
{'type': 'loss', 'content': 0.06291872262954712, 'timestamp': '2025-10-02 01:06:34.138398', 'step': 30976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:34.197466', 'step': 30976, 'epoch': 3}
{'type': 'loss', 'content': 0.0030747989658266306, 'timestamp': '2025-10-02 01:06:34.205230', 'step': 30977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:34.268208', 'step': 30977, 'epoch': 3}
{'type': 'loss', 'content': 0.030040593817830086, 'timestamp': '2025-10-02 01:06:34.274764', 'step': 30978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:34.336842', 'step': 30978, 'epoch': 3}
{'type': 'loss', 'content': 0.03524666652083397, 'timestamp': '2025-10-02 01:06:34.343284', 'step': 30979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:06:34.428688', 'step': 30979, 'epoch': 3}
{'type': 'loss', 'content': 0.026284750550985336, 'timestamp': '2025-10-02 01:06:34.444281', 'step': 30980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:34.525670', 'step': 30980, 'epoch': 3}
{'type': 'loss', 'content': 0.10560110956430435, 'timestamp': '2025-10-02 01:06:34.532903', 'step': 30981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:34.598711', 'step': 30981, 'epoch': 3}
{'type': 'loss', 'content': 0.0581289567053318, 'timestamp': '2025-10-02 01:06:34.605160', 'step': 30982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:34.667808', 'step': 30982, 'epoch': 3}
{'type': 'loss', 'content': 0.08996419608592987, 'timestamp': '2025-10-02 01:06:34.673927', 'step': 30983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:34.739424', 'step': 30983, 'epoch': 3}
{'type': 'loss', 'content': 0.01634460873901844, 'timestamp': '2025-10-02 01:06:34.752571', 'step': 30984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:34.812329', 'step': 30984, 'epoch': 3}
{'type': 'loss', 'content': 0.06968880444765091, 'timestamp': '2025-10-02 01:06:34.817850', 'step': 30985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:34.882848', 'step': 30985, 'epoch': 3}
{'type': 'loss', 'content': 0.04504234343767166, 'timestamp': '2025-10-02 01:06:34.890285', 'step': 30986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:34.951756', 'step': 30986, 'epoch': 3}
{'type': 'loss', 'content': 0.08135978132486343, 'timestamp': '2025-10-02 01:06:34.957631', 'step': 30987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:35.022647', 'step': 30987, 'epoch': 3}
{'type': 'loss', 'content': 0.04324411228299141, 'timestamp': '2025-10-02 01:06:35.031340', 'step': 30988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:35.094159', 'step': 30988, 'epoch': 3}
{'type': 'loss', 'content': 0.09473346918821335, 'timestamp': '2025-10-02 01:06:35.099240', 'step': 30989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:35.160268', 'step': 30989, 'epoch': 3}
{'type': 'loss', 'content': 0.11732982844114304, 'timestamp': '2025-10-02 01:06:35.166227', 'step': 30990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:35.225880', 'step': 30990, 'epoch': 3}
{'type': 'loss', 'content': 0.08117322623729706, 'timestamp': '2025-10-02 01:06:35.232134', 'step': 30991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:35.297356', 'step': 30991, 'epoch': 3}
{'type': 'loss', 'content': 0.06995197385549545, 'timestamp': '2025-10-02 01:06:35.307399', 'step': 30992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:35.370333', 'step': 30992, 'epoch': 3}
{'type': 'loss', 'content': 0.07951732724905014, 'timestamp': '2025-10-02 01:06:35.376059', 'step': 30993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:35.434295', 'step': 30993, 'epoch': 3}
{'type': 'loss', 'content': 0.09438696503639221, 'timestamp': '2025-10-02 01:06:35.439232', 'step': 30994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:35.503633', 'step': 30994, 'epoch': 3}
{'type': 'loss', 'content': 0.09267204254865646, 'timestamp': '2025-10-02 01:06:35.506471', 'step': 30995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:35.571939', 'step': 30995, 'epoch': 3}
{'type': 'loss', 'content': 0.13626454770565033, 'timestamp': '2025-10-02 01:06:35.580713', 'step': 30996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:35.642240', 'step': 30996, 'epoch': 3}
{'type': 'loss', 'content': 0.04426516592502594, 'timestamp': '2025-10-02 01:06:35.647353', 'step': 30997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:35.711214', 'step': 30997, 'epoch': 3}
{'type': 'loss', 'content': 0.06528875231742859, 'timestamp': '2025-10-02 01:06:35.716572', 'step': 30998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:35.776950', 'step': 30998, 'epoch': 3}
{'type': 'loss', 'content': 0.0172270555049181, 'timestamp': '2025-10-02 01:06:35.782541', 'step': 30999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:35.843635', 'step': 30999, 'epoch': 3}
{'type': 'loss', 'content': 0.09841625392436981, 'timestamp': '2025-10-02 01:06:35.851822', 'step': 31000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 31000', 'timestamp': '2025-10-02 01:06:36.288815', 'step': 31000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:36.350506', 'step': 31000, 'epoch': 3}
{'type': 'loss', 'content': 0.027619624510407448, 'timestamp': '2025-10-02 01:06:36.357865', 'step': 31001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:36.424654', 'step': 31001, 'epoch': 3}
{'type': 'loss', 'content': 0.11560527980327606, 'timestamp': '2025-10-02 01:06:36.430731', 'step': 31002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:36.494090', 'step': 31002, 'epoch': 3}
{'type': 'loss', 'content': 0.043688978999853134, 'timestamp': '2025-10-02 01:06:36.500480', 'step': 31003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:36.574646', 'step': 31003, 'epoch': 3}
{'type': 'loss', 'content': 0.0755014643073082, 'timestamp': '2025-10-02 01:06:36.582453', 'step': 31004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:36.643242', 'step': 31004, 'epoch': 3}
{'type': 'loss', 'content': 0.02536841668188572, 'timestamp': '2025-10-02 01:06:36.649430', 'step': 31005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:36.710069', 'step': 31005, 'epoch': 3}
{'type': 'loss', 'content': 0.06058059632778168, 'timestamp': '2025-10-02 01:06:36.715113', 'step': 31006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:36.775017', 'step': 31006, 'epoch': 3}
{'type': 'loss', 'content': 0.050355371087789536, 'timestamp': '2025-10-02 01:06:36.780591', 'step': 31007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:36.839260', 'step': 31007, 'epoch': 3}
{'type': 'loss', 'content': 0.005216083489358425, 'timestamp': '2025-10-02 01:06:36.848963', 'step': 31008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:36.909705', 'step': 31008, 'epoch': 3}
{'type': 'loss', 'content': 0.03862880915403366, 'timestamp': '2025-10-02 01:06:36.919944', 'step': 31009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:36.979134', 'step': 31009, 'epoch': 3}
{'type': 'loss', 'content': 0.04814264178276062, 'timestamp': '2025-10-02 01:06:36.986350', 'step': 31010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:37.048251', 'step': 31010, 'epoch': 3}
{'type': 'loss', 'content': 0.017633378505706787, 'timestamp': '2025-10-02 01:06:37.053819', 'step': 31011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:37.116441', 'step': 31011, 'epoch': 3}
{'type': 'loss', 'content': 0.014558633789420128, 'timestamp': '2025-10-02 01:06:37.126499', 'step': 31012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:37.191427', 'step': 31012, 'epoch': 3}
{'type': 'loss', 'content': 0.01795242354273796, 'timestamp': '2025-10-02 01:06:37.202791', 'step': 31013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:37.261488', 'step': 31013, 'epoch': 3}
{'type': 'loss', 'content': 0.12264437973499298, 'timestamp': '2025-10-02 01:06:37.263962', 'step': 31014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:37.323931', 'step': 31014, 'epoch': 3}
{'type': 'loss', 'content': 0.0674462765455246, 'timestamp': '2025-10-02 01:06:37.327883', 'step': 31015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:37.386518', 'step': 31015, 'epoch': 3}
{'type': 'loss', 'content': 0.05885947123169899, 'timestamp': '2025-10-02 01:06:37.394493', 'step': 31016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:37.454031', 'step': 31016, 'epoch': 3}
{'type': 'loss', 'content': 0.04014138504862785, 'timestamp': '2025-10-02 01:06:37.458596', 'step': 31017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:37.519886', 'step': 31017, 'epoch': 3}
{'type': 'loss', 'content': 0.09781703352928162, 'timestamp': '2025-10-02 01:06:37.530875', 'step': 31018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:37.592656', 'step': 31018, 'epoch': 3}
{'type': 'loss', 'content': 0.15190447866916656, 'timestamp': '2025-10-02 01:06:37.595315', 'step': 31019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:37.657433', 'step': 31019, 'epoch': 3}
{'type': 'loss', 'content': 0.06343424320220947, 'timestamp': '2025-10-02 01:06:37.666395', 'step': 31020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:37.731306', 'step': 31020, 'epoch': 3}
{'type': 'loss', 'content': 0.03235739842057228, 'timestamp': '2025-10-02 01:06:37.736391', 'step': 31021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:37.799854', 'step': 31021, 'epoch': 3}
{'type': 'loss', 'content': 0.0008665421628393233, 'timestamp': '2025-10-02 01:06:37.809330', 'step': 31022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:37.872308', 'step': 31022, 'epoch': 3}
{'type': 'loss', 'content': 0.04475491866469383, 'timestamp': '2025-10-02 01:06:37.876681', 'step': 31023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:37.937481', 'step': 31023, 'epoch': 3}
{'type': 'loss', 'content': 0.01066603884100914, 'timestamp': '2025-10-02 01:06:37.947777', 'step': 31024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:38.004662', 'step': 31024, 'epoch': 3}
{'type': 'loss', 'content': 0.02119722217321396, 'timestamp': '2025-10-02 01:06:38.008606', 'step': 31025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:38.066689', 'step': 31025, 'epoch': 3}
{'type': 'loss', 'content': 0.035654615610837936, 'timestamp': '2025-10-02 01:06:38.070856', 'step': 31026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:38.131920', 'step': 31026, 'epoch': 3}
{'type': 'loss', 'content': 0.05144074186682701, 'timestamp': '2025-10-02 01:06:38.136468', 'step': 31027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:38.198313', 'step': 31027, 'epoch': 3}
{'type': 'loss', 'content': 0.050797317177057266, 'timestamp': '2025-10-02 01:06:38.204860', 'step': 31028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:38.260240', 'step': 31028, 'epoch': 3}
{'type': 'loss', 'content': 0.09372474998235703, 'timestamp': '2025-10-02 01:06:38.262821', 'step': 31029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:06:38.333331', 'step': 31029, 'epoch': 3}
{'type': 'loss', 'content': 0.009559391997754574, 'timestamp': '2025-10-02 01:06:38.344162', 'step': 31030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:38.401450', 'step': 31030, 'epoch': 3}
{'type': 'loss', 'content': 0.031104762107133865, 'timestamp': '2025-10-02 01:06:38.408640', 'step': 31031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:38.469088', 'step': 31031, 'epoch': 3}
{'type': 'loss', 'content': 0.044334039092063904, 'timestamp': '2025-10-02 01:06:38.475792', 'step': 31032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:38.531414', 'step': 31032, 'epoch': 3}
{'type': 'loss', 'content': 0.008029603399336338, 'timestamp': '2025-10-02 01:06:38.541651', 'step': 31033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:38.603201', 'step': 31033, 'epoch': 3}
{'type': 'loss', 'content': 0.01599234901368618, 'timestamp': '2025-10-02 01:06:38.613332', 'step': 31034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:38.670901', 'step': 31034, 'epoch': 3}
{'type': 'loss', 'content': 0.0235274825245142, 'timestamp': '2025-10-02 01:06:38.674359', 'step': 31035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:38.732253', 'step': 31035, 'epoch': 3}
{'type': 'loss', 'content': 0.09607871621847153, 'timestamp': '2025-10-02 01:06:38.740139', 'step': 31036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:38.798767', 'step': 31036, 'epoch': 3}
{'type': 'loss', 'content': 0.004594018217176199, 'timestamp': '2025-10-02 01:06:38.804253', 'step': 31037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:38.862284', 'step': 31037, 'epoch': 3}
{'type': 'loss', 'content': 0.04937441647052765, 'timestamp': '2025-10-02 01:06:38.865137', 'step': 31038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:38.935240', 'step': 31038, 'epoch': 3}
{'type': 'loss', 'content': 0.007499354891479015, 'timestamp': '2025-10-02 01:06:38.945680', 'step': 31039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:39.016008', 'step': 31039, 'epoch': 3}
{'type': 'loss', 'content': 0.053704340010881424, 'timestamp': '2025-10-02 01:06:39.031557', 'step': 31040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:39.120011', 'step': 31040, 'epoch': 3}
{'type': 'loss', 'content': 0.06377200782299042, 'timestamp': '2025-10-02 01:06:39.133258', 'step': 31041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:39.206782', 'step': 31041, 'epoch': 3}
{'type': 'loss', 'content': 0.0955810621380806, 'timestamp': '2025-10-02 01:06:39.219429', 'step': 31042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:39.293893', 'step': 31042, 'epoch': 3}
{'type': 'loss', 'content': 0.0004539493238553405, 'timestamp': '2025-10-02 01:06:39.304084', 'step': 31043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:39.365770', 'step': 31043, 'epoch': 3}
{'type': 'loss', 'content': 0.035524047911167145, 'timestamp': '2025-10-02 01:06:39.373050', 'step': 31044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:39.431595', 'step': 31044, 'epoch': 3}
{'type': 'loss', 'content': 0.026231572031974792, 'timestamp': '2025-10-02 01:06:39.434792', 'step': 31045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:39.515303', 'step': 31045, 'epoch': 3}
{'type': 'loss', 'content': 0.0957985669374466, 'timestamp': '2025-10-02 01:06:39.518764', 'step': 31046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:39.598072', 'step': 31046, 'epoch': 3}
{'type': 'loss', 'content': 0.019827092066407204, 'timestamp': '2025-10-02 01:06:39.608213', 'step': 31047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:39.681225', 'step': 31047, 'epoch': 3}
{'type': 'loss', 'content': 0.013441307470202446, 'timestamp': '2025-10-02 01:06:39.688550', 'step': 31048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:39.756819', 'step': 31048, 'epoch': 3}
{'type': 'loss', 'content': 0.054668862372636795, 'timestamp': '2025-10-02 01:06:39.766103', 'step': 31049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:39.839393', 'step': 31049, 'epoch': 3}
{'type': 'loss', 'content': 0.0484321303665638, 'timestamp': '2025-10-02 01:06:39.848295', 'step': 31050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:39.924802', 'step': 31050, 'epoch': 3}
{'type': 'loss', 'content': 0.03388110548257828, 'timestamp': '2025-10-02 01:06:39.930270', 'step': 31051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:39.990174', 'step': 31051, 'epoch': 3}
{'type': 'loss', 'content': 0.04193678870797157, 'timestamp': '2025-10-02 01:06:39.997888', 'step': 31052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:40.084107', 'step': 31052, 'epoch': 3}
{'type': 'loss', 'content': 0.022187424823641777, 'timestamp': '2025-10-02 01:06:40.095036', 'step': 31053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:40.161880', 'step': 31053, 'epoch': 3}
{'type': 'loss', 'content': 0.03293878957629204, 'timestamp': '2025-10-02 01:06:40.167392', 'step': 31054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:40.232432', 'step': 31054, 'epoch': 3}
{'type': 'loss', 'content': 0.01482559647411108, 'timestamp': '2025-10-02 01:06:40.242628', 'step': 31055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:40.301216', 'step': 31055, 'epoch': 3}
{'type': 'loss', 'content': 0.07549713551998138, 'timestamp': '2025-10-02 01:06:40.310144', 'step': 31056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:40.366186', 'step': 31056, 'epoch': 3}
{'type': 'loss', 'content': 0.03259917348623276, 'timestamp': '2025-10-02 01:06:40.369717', 'step': 31057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:40.425442', 'step': 31057, 'epoch': 3}
{'type': 'loss', 'content': 0.12561564147472382, 'timestamp': '2025-10-02 01:06:40.428243', 'step': 31058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:40.490655', 'step': 31058, 'epoch': 3}
{'type': 'loss', 'content': 0.018262984231114388, 'timestamp': '2025-10-02 01:06:40.501115', 'step': 31059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:40.558884', 'step': 31059, 'epoch': 3}
{'type': 'loss', 'content': 0.08925008773803711, 'timestamp': '2025-10-02 01:06:40.565280', 'step': 31060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:06:40.627872', 'step': 31060, 'epoch': 3}
{'type': 'loss', 'content': 0.012453550472855568, 'timestamp': '2025-10-02 01:06:40.639378', 'step': 31061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:40.695335', 'step': 31061, 'epoch': 3}
{'type': 'loss', 'content': 0.04508671909570694, 'timestamp': '2025-10-02 01:06:40.697821', 'step': 31062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:40.753067', 'step': 31062, 'epoch': 3}
{'type': 'loss', 'content': 0.025646844878792763, 'timestamp': '2025-10-02 01:06:40.760258', 'step': 31063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:06:40.826465', 'step': 31063, 'epoch': 3}
{'type': 'loss', 'content': 0.010266788303852081, 'timestamp': '2025-10-02 01:06:40.837889', 'step': 31064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:40.893189', 'step': 31064, 'epoch': 3}
{'type': 'loss', 'content': 0.021402807906270027, 'timestamp': '2025-10-02 01:06:40.896072', 'step': 31065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:40.966546', 'step': 31065, 'epoch': 3}
{'type': 'loss', 'content': 0.010192553512752056, 'timestamp': '2025-10-02 01:06:40.969584', 'step': 31066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:41.025152', 'step': 31066, 'epoch': 3}
{'type': 'loss', 'content': 0.03729809448122978, 'timestamp': '2025-10-02 01:06:41.032482', 'step': 31067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:41.090632', 'step': 31067, 'epoch': 3}
{'type': 'loss', 'content': 0.06195170804858208, 'timestamp': '2025-10-02 01:06:41.099060', 'step': 31068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:41.165631', 'step': 31068, 'epoch': 3}
{'type': 'loss', 'content': 0.03396233543753624, 'timestamp': '2025-10-02 01:06:41.174615', 'step': 31069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:41.238777', 'step': 31069, 'epoch': 3}
{'type': 'loss', 'content': 0.023235280066728592, 'timestamp': '2025-10-02 01:06:41.244416', 'step': 31070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:41.301909', 'step': 31070, 'epoch': 3}
{'type': 'loss', 'content': 0.054795388132333755, 'timestamp': '2025-10-02 01:06:41.305893', 'step': 31071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:41.374032', 'step': 31071, 'epoch': 3}
{'type': 'loss', 'content': 0.05061166360974312, 'timestamp': '2025-10-02 01:06:41.381261', 'step': 31072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:41.451103', 'step': 31072, 'epoch': 3}
{'type': 'loss', 'content': 0.10146015882492065, 'timestamp': '2025-10-02 01:06:41.459376', 'step': 31073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:41.527680', 'step': 31073, 'epoch': 3}
{'type': 'loss', 'content': 0.05902015045285225, 'timestamp': '2025-10-02 01:06:41.534974', 'step': 31074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:41.609469', 'step': 31074, 'epoch': 3}
{'type': 'loss', 'content': 0.04172772169113159, 'timestamp': '2025-10-02 01:06:41.619002', 'step': 31075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:06:41.692686', 'step': 31075, 'epoch': 3}
{'type': 'loss', 'content': 0.009069964289665222, 'timestamp': '2025-10-02 01:06:41.704061', 'step': 31076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:06:41.762605', 'step': 31076, 'epoch': 3}
{'type': 'loss', 'content': 0.048219773918390274, 'timestamp': '2025-10-02 01:06:41.769228', 'step': 31077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:41.839260', 'step': 31077, 'epoch': 3}
{'type': 'loss', 'content': 0.10257682204246521, 'timestamp': '2025-10-02 01:06:41.842766', 'step': 31078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:41.913651', 'step': 31078, 'epoch': 3}
{'type': 'loss', 'content': 0.004277622327208519, 'timestamp': '2025-10-02 01:06:41.923195', 'step': 31079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:41.982806', 'step': 31079, 'epoch': 3}
{'type': 'loss', 'content': 0.0583747997879982, 'timestamp': '2025-10-02 01:06:41.990184', 'step': 31080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:42.049035', 'step': 31080, 'epoch': 3}
{'type': 'loss', 'content': 0.03138021379709244, 'timestamp': '2025-10-02 01:06:42.053298', 'step': 31081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:42.129274', 'step': 31081, 'epoch': 3}
{'type': 'loss', 'content': 0.020758209750056267, 'timestamp': '2025-10-02 01:06:42.133408', 'step': 31082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:42.193208', 'step': 31082, 'epoch': 3}
{'type': 'loss', 'content': 0.03329145163297653, 'timestamp': '2025-10-02 01:06:42.196041', 'step': 31083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:42.251543', 'step': 31083, 'epoch': 3}
{'type': 'loss', 'content': 0.0637083351612091, 'timestamp': '2025-10-02 01:06:42.258946', 'step': 31084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:42.313894', 'step': 31084, 'epoch': 3}
{'type': 'loss', 'content': 0.043934617191553116, 'timestamp': '2025-10-02 01:06:42.317130', 'step': 31085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:42.372739', 'step': 31085, 'epoch': 3}
{'type': 'loss', 'content': 0.0816565454006195, 'timestamp': '2025-10-02 01:06:42.375858', 'step': 31086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:42.432162', 'step': 31086, 'epoch': 3}
{'type': 'loss', 'content': 0.08512742072343826, 'timestamp': '2025-10-02 01:06:42.435319', 'step': 31087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:42.490456', 'step': 31087, 'epoch': 3}
{'type': 'loss', 'content': 0.10427436977624893, 'timestamp': '2025-10-02 01:06:42.500069', 'step': 31088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:42.565958', 'step': 31088, 'epoch': 3}
{'type': 'loss', 'content': 0.03783175349235535, 'timestamp': '2025-10-02 01:06:42.572894', 'step': 31089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:42.641387', 'step': 31089, 'epoch': 3}
{'type': 'loss', 'content': 0.04016505926847458, 'timestamp': '2025-10-02 01:06:42.646915', 'step': 31090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:42.702734', 'step': 31090, 'epoch': 3}
{'type': 'loss', 'content': 0.04747731611132622, 'timestamp': '2025-10-02 01:06:42.706913', 'step': 31091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:42.765526', 'step': 31091, 'epoch': 3}
{'type': 'loss', 'content': 0.006415640935301781, 'timestamp': '2025-10-02 01:06:42.772844', 'step': 31092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:42.842984', 'step': 31092, 'epoch': 3}
{'type': 'loss', 'content': 0.07410507649183273, 'timestamp': '2025-10-02 01:06:42.849832', 'step': 31093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:42.916445', 'step': 31093, 'epoch': 3}
{'type': 'loss', 'content': 0.02344180829823017, 'timestamp': '2025-10-02 01:06:42.919836', 'step': 31094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:42.980248', 'step': 31094, 'epoch': 3}
{'type': 'loss', 'content': 0.0728180781006813, 'timestamp': '2025-10-02 01:06:42.985149', 'step': 31095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:43.045770', 'step': 31095, 'epoch': 3}
{'type': 'loss', 'content': 0.061678141355514526, 'timestamp': '2025-10-02 01:06:43.053545', 'step': 31096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:43.111810', 'step': 31096, 'epoch': 3}
{'type': 'loss', 'content': 0.052091456949710846, 'timestamp': '2025-10-02 01:06:43.117518', 'step': 31097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:43.173635', 'step': 31097, 'epoch': 3}
{'type': 'loss', 'content': 0.019457772374153137, 'timestamp': '2025-10-02 01:06:43.182817', 'step': 31098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:43.245652', 'step': 31098, 'epoch': 3}
{'type': 'loss', 'content': 0.06106705218553543, 'timestamp': '2025-10-02 01:06:43.251229', 'step': 31099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:43.309692', 'step': 31099, 'epoch': 3}
{'type': 'loss', 'content': 0.03621961921453476, 'timestamp': '2025-10-02 01:06:43.326759', 'step': 31100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:43.391156', 'step': 31100, 'epoch': 3}
{'type': 'loss', 'content': 0.0045196497812867165, 'timestamp': '2025-10-02 01:06:43.398294', 'step': 31101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:43.458837', 'step': 31101, 'epoch': 3}
{'type': 'loss', 'content': 0.01767580211162567, 'timestamp': '2025-10-02 01:06:43.466050', 'step': 31102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:43.524995', 'step': 31102, 'epoch': 3}
{'type': 'loss', 'content': 0.049185965210199356, 'timestamp': '2025-10-02 01:06:43.528808', 'step': 31103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:43.592775', 'step': 31103, 'epoch': 3}
{'type': 'loss', 'content': 0.03680157661437988, 'timestamp': '2025-10-02 01:06:43.603634', 'step': 31104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:43.659871', 'step': 31104, 'epoch': 3}
{'type': 'loss', 'content': 0.08455712348222733, 'timestamp': '2025-10-02 01:06:43.662264', 'step': 31105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 544], 'flops': 10880066115712.0}, 'timestamp': '2025-10-02 01:06:43.746193', 'step': 31105, 'epoch': 3}
{'type': 'loss', 'content': 0.013836147263646126, 'timestamp': '2025-10-02 01:06:43.761044', 'step': 31106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:43.816792', 'step': 31106, 'epoch': 3}
{'type': 'loss', 'content': 0.011704096570611, 'timestamp': '2025-10-02 01:06:43.819653', 'step': 31107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:43.874289', 'step': 31107, 'epoch': 3}
{'type': 'loss', 'content': 0.12286362051963806, 'timestamp': '2025-10-02 01:06:43.884198', 'step': 31108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:43.941100', 'step': 31108, 'epoch': 3}
{'type': 'loss', 'content': 0.017540903761982918, 'timestamp': '2025-10-02 01:06:43.943869', 'step': 31109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:44.006863', 'step': 31109, 'epoch': 3}
{'type': 'loss', 'content': 0.04208403453230858, 'timestamp': '2025-10-02 01:06:44.014206', 'step': 31110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:44.075760', 'step': 31110, 'epoch': 3}
{'type': 'loss', 'content': 0.07753676921129227, 'timestamp': '2025-10-02 01:06:44.078610', 'step': 31111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:44.137846', 'step': 31111, 'epoch': 3}
{'type': 'loss', 'content': 0.012543987482786179, 'timestamp': '2025-10-02 01:06:44.144312', 'step': 31112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:44.200055', 'step': 31112, 'epoch': 3}
{'type': 'loss', 'content': 0.05526136979460716, 'timestamp': '2025-10-02 01:06:44.203506', 'step': 31113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:44.259765', 'step': 31113, 'epoch': 3}
{'type': 'loss', 'content': 0.03498107194900513, 'timestamp': '2025-10-02 01:06:44.262965', 'step': 31114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:44.327857', 'step': 31114, 'epoch': 3}
{'type': 'loss', 'content': 0.013718037866055965, 'timestamp': '2025-10-02 01:06:44.338297', 'step': 31115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:44.396431', 'step': 31115, 'epoch': 3}
{'type': 'loss', 'content': 0.09922870248556137, 'timestamp': '2025-10-02 01:06:44.402917', 'step': 31116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:44.460011', 'step': 31116, 'epoch': 3}
{'type': 'loss', 'content': 0.02239813841879368, 'timestamp': '2025-10-02 01:06:44.469304', 'step': 31117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:44.527251', 'step': 31117, 'epoch': 3}
{'type': 'loss', 'content': 0.08633536845445633, 'timestamp': '2025-10-02 01:06:44.529697', 'step': 31118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:44.585131', 'step': 31118, 'epoch': 3}
{'type': 'loss', 'content': 0.07480738312005997, 'timestamp': '2025-10-02 01:06:44.590639', 'step': 31119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:44.646400', 'step': 31119, 'epoch': 3}
{'type': 'loss', 'content': 0.05230403319001198, 'timestamp': '2025-10-02 01:06:44.653119', 'step': 31120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:44.707614', 'step': 31120, 'epoch': 3}
{'type': 'loss', 'content': 0.0026763593778014183, 'timestamp': '2025-10-02 01:06:44.716864', 'step': 31121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:44.776191', 'step': 31121, 'epoch': 3}
{'type': 'loss', 'content': 0.016031453385949135, 'timestamp': '2025-10-02 01:06:44.786343', 'step': 31122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:44.846424', 'step': 31122, 'epoch': 3}
{'type': 'loss', 'content': 0.043481577187776566, 'timestamp': '2025-10-02 01:06:44.850380', 'step': 31123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:44.920357', 'step': 31123, 'epoch': 3}
{'type': 'loss', 'content': 0.013737045228481293, 'timestamp': '2025-10-02 01:06:44.931605', 'step': 31124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:44.995868', 'step': 31124, 'epoch': 3}
{'type': 'loss', 'content': 0.03097539022564888, 'timestamp': '2025-10-02 01:06:45.001624', 'step': 31125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:45.060687', 'step': 31125, 'epoch': 3}
{'type': 'loss', 'content': 0.08749955892562866, 'timestamp': '2025-10-02 01:06:45.064424', 'step': 31126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:45.119716', 'step': 31126, 'epoch': 3}
{'type': 'loss', 'content': 0.051028817892074585, 'timestamp': '2025-10-02 01:06:45.122601', 'step': 31127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:45.178344', 'step': 31127, 'epoch': 3}
{'type': 'loss', 'content': 0.03069203905761242, 'timestamp': '2025-10-02 01:06:45.184613', 'step': 31128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:45.247305', 'step': 31128, 'epoch': 3}
{'type': 'loss', 'content': 0.03657572343945503, 'timestamp': '2025-10-02 01:06:45.250634', 'step': 31129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:45.317067', 'step': 31129, 'epoch': 3}
{'type': 'loss', 'content': 0.022330762818455696, 'timestamp': '2025-10-02 01:06:45.321905', 'step': 31130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:06:45.379931', 'step': 31130, 'epoch': 3}
{'type': 'loss', 'content': 0.03913699463009834, 'timestamp': '2025-10-02 01:06:45.388019', 'step': 31131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:45.451130', 'step': 31131, 'epoch': 3}
{'type': 'loss', 'content': 0.020923752337694168, 'timestamp': '2025-10-02 01:06:45.457780', 'step': 31132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:45.512996', 'step': 31132, 'epoch': 3}
{'type': 'loss', 'content': 0.06148884445428848, 'timestamp': '2025-10-02 01:06:45.516464', 'step': 31133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:45.572786', 'step': 31133, 'epoch': 3}
{'type': 'loss', 'content': 0.08347097039222717, 'timestamp': '2025-10-02 01:06:45.575433', 'step': 31134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:45.646301', 'step': 31134, 'epoch': 3}
{'type': 'loss', 'content': 0.09956954419612885, 'timestamp': '2025-10-02 01:06:45.649383', 'step': 31135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:45.705769', 'step': 31135, 'epoch': 3}
{'type': 'loss', 'content': 0.03883692994713783, 'timestamp': '2025-10-02 01:06:45.711954', 'step': 31136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:45.766334', 'step': 31136, 'epoch': 3}
{'type': 'loss', 'content': 0.0007018883479759097, 'timestamp': '2025-10-02 01:06:45.773668', 'step': 31137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:06:45.844528', 'step': 31137, 'epoch': 3}
{'type': 'loss', 'content': 0.006198624614626169, 'timestamp': '2025-10-02 01:06:45.856506', 'step': 31138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:06:45.925810', 'step': 31138, 'epoch': 3}
{'type': 'loss', 'content': 0.0332612544298172, 'timestamp': '2025-10-02 01:06:45.936430', 'step': 31139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:46.001862', 'step': 31139, 'epoch': 3}
{'type': 'loss', 'content': 0.0231939647346735, 'timestamp': '2025-10-02 01:06:46.008427', 'step': 31140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:46.066309', 'step': 31140, 'epoch': 3}
{'type': 'loss', 'content': 0.04502413049340248, 'timestamp': '2025-10-02 01:06:46.073832', 'step': 31141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:46.136099', 'step': 31141, 'epoch': 3}
{'type': 'loss', 'content': 0.044086992740631104, 'timestamp': '2025-10-02 01:06:46.139247', 'step': 31142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:46.195625', 'step': 31142, 'epoch': 3}
{'type': 'loss', 'content': 0.05621640756726265, 'timestamp': '2025-10-02 01:06:46.198219', 'step': 31143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:46.253870', 'step': 31143, 'epoch': 3}
{'type': 'loss', 'content': 0.019892266020178795, 'timestamp': '2025-10-02 01:06:46.261299', 'step': 31144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:46.322301', 'step': 31144, 'epoch': 3}
{'type': 'loss', 'content': 0.025927498936653137, 'timestamp': '2025-10-02 01:06:46.327990', 'step': 31145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:46.383696', 'step': 31145, 'epoch': 3}
{'type': 'loss', 'content': 0.04349231347441673, 'timestamp': '2025-10-02 01:06:46.386726', 'step': 31146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:46.446273', 'step': 31146, 'epoch': 3}
{'type': 'loss', 'content': 0.0010395008139312267, 'timestamp': '2025-10-02 01:06:46.453369', 'step': 31147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:46.510310', 'step': 31147, 'epoch': 3}
{'type': 'loss', 'content': 0.060032255947589874, 'timestamp': '2025-10-02 01:06:46.516839', 'step': 31148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:46.571093', 'step': 31148, 'epoch': 3}
{'type': 'loss', 'content': 0.06896845251321793, 'timestamp': '2025-10-02 01:06:46.573547', 'step': 31149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:46.633501', 'step': 31149, 'epoch': 3}
{'type': 'loss', 'content': 0.009198551066219807, 'timestamp': '2025-10-02 01:06:46.643644', 'step': 31150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:46.706425', 'step': 31150, 'epoch': 3}
{'type': 'loss', 'content': 0.02924244850873947, 'timestamp': '2025-10-02 01:06:46.716881', 'step': 31151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:46.772894', 'step': 31151, 'epoch': 3}
{'type': 'loss', 'content': 0.08888276666402817, 'timestamp': '2025-10-02 01:06:46.779279', 'step': 31152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:46.833839', 'step': 31152, 'epoch': 3}
{'type': 'loss', 'content': 0.08283552527427673, 'timestamp': '2025-10-02 01:06:46.843006', 'step': 31153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:46.909525', 'step': 31153, 'epoch': 3}
{'type': 'loss', 'content': 0.052773237228393555, 'timestamp': '2025-10-02 01:06:46.912674', 'step': 31154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:46.975275', 'step': 31154, 'epoch': 3}
{'type': 'loss', 'content': 0.04793703556060791, 'timestamp': '2025-10-02 01:06:46.978679', 'step': 31155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:47.040093', 'step': 31155, 'epoch': 3}
{'type': 'loss', 'content': 0.012913333252072334, 'timestamp': '2025-10-02 01:06:47.049218', 'step': 31156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:47.110612', 'step': 31156, 'epoch': 3}
{'type': 'loss', 'content': 0.005857641343027353, 'timestamp': '2025-10-02 01:06:47.116402', 'step': 31157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:47.176082', 'step': 31157, 'epoch': 3}
{'type': 'loss', 'content': 0.06742379069328308, 'timestamp': '2025-10-02 01:06:47.181348', 'step': 31158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:47.242878', 'step': 31158, 'epoch': 3}
{'type': 'loss', 'content': 0.040863361209630966, 'timestamp': '2025-10-02 01:06:47.246989', 'step': 31159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:47.305735', 'step': 31159, 'epoch': 3}
{'type': 'loss', 'content': 0.0130377858877182, 'timestamp': '2025-10-02 01:06:47.314411', 'step': 31160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:47.373041', 'step': 31160, 'epoch': 3}
{'type': 'loss', 'content': 0.012524169869720936, 'timestamp': '2025-10-02 01:06:47.376905', 'step': 31161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:47.442998', 'step': 31161, 'epoch': 3}
{'type': 'loss', 'content': 0.04342629387974739, 'timestamp': '2025-10-02 01:06:47.446559', 'step': 31162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:47.506106', 'step': 31162, 'epoch': 3}
{'type': 'loss', 'content': 0.025840647518634796, 'timestamp': '2025-10-02 01:06:47.514724', 'step': 31163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:47.579118', 'step': 31163, 'epoch': 3}
{'type': 'loss', 'content': 0.031535279005765915, 'timestamp': '2025-10-02 01:06:47.586057', 'step': 31164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:47.644657', 'step': 31164, 'epoch': 3}
{'type': 'loss', 'content': 0.10296311974525452, 'timestamp': '2025-10-02 01:06:47.648081', 'step': 31165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:47.706306', 'step': 31165, 'epoch': 3}
{'type': 'loss', 'content': 0.019749151542782784, 'timestamp': '2025-10-02 01:06:47.709672', 'step': 31166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:47.768788', 'step': 31166, 'epoch': 3}
{'type': 'loss', 'content': 0.011708649806678295, 'timestamp': '2025-10-02 01:06:47.774051', 'step': 31167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:47.831844', 'step': 31167, 'epoch': 3}
{'type': 'loss', 'content': 0.07088014483451843, 'timestamp': '2025-10-02 01:06:47.840750', 'step': 31168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:47.898996', 'step': 31168, 'epoch': 3}
{'type': 'loss', 'content': 0.03052968531847, 'timestamp': '2025-10-02 01:06:47.902358', 'step': 31169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:47.960683', 'step': 31169, 'epoch': 3}
{'type': 'loss', 'content': 0.0030889420304447412, 'timestamp': '2025-10-02 01:06:47.970208', 'step': 31170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:48.037880', 'step': 31170, 'epoch': 3}
{'type': 'loss', 'content': 0.02671423926949501, 'timestamp': '2025-10-02 01:06:48.040869', 'step': 31171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:06:48.114292', 'step': 31171, 'epoch': 3}
{'type': 'loss', 'content': 0.05142056941986084, 'timestamp': '2025-10-02 01:06:48.127485', 'step': 31172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:48.186520', 'step': 31172, 'epoch': 3}
{'type': 'loss', 'content': 0.12371259927749634, 'timestamp': '2025-10-02 01:06:48.189147', 'step': 31173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:48.260462', 'step': 31173, 'epoch': 3}
{'type': 'loss', 'content': 0.022509740665555, 'timestamp': '2025-10-02 01:06:48.264721', 'step': 31174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:48.331870', 'step': 31174, 'epoch': 3}
{'type': 'loss', 'content': 0.04348701238632202, 'timestamp': '2025-10-02 01:06:48.338387', 'step': 31175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:48.405683', 'step': 31175, 'epoch': 3}
{'type': 'loss', 'content': 0.04017440974712372, 'timestamp': '2025-10-02 01:06:48.414254', 'step': 31176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:48.476666', 'step': 31176, 'epoch': 3}
{'type': 'loss', 'content': 0.04947362467646599, 'timestamp': '2025-10-02 01:06:48.479345', 'step': 31177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:48.539625', 'step': 31177, 'epoch': 3}
{'type': 'loss', 'content': 0.022281579673290253, 'timestamp': '2025-10-02 01:06:48.544600', 'step': 31178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:06:48.612910', 'step': 31178, 'epoch': 3}
{'type': 'loss', 'content': 0.03760169446468353, 'timestamp': '2025-10-02 01:06:48.623574', 'step': 31179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:48.681150', 'step': 31179, 'epoch': 3}
{'type': 'loss', 'content': 0.03533780574798584, 'timestamp': '2025-10-02 01:06:48.687063', 'step': 31180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:48.743829', 'step': 31180, 'epoch': 3}
{'type': 'loss', 'content': 0.028937039896845818, 'timestamp': '2025-10-02 01:06:48.754071', 'step': 31181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:48.813372', 'step': 31181, 'epoch': 3}
{'type': 'loss', 'content': 0.06562550365924835, 'timestamp': '2025-10-02 01:06:48.816771', 'step': 31182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:48.873203', 'step': 31182, 'epoch': 3}
{'type': 'loss', 'content': 0.03694876655936241, 'timestamp': '2025-10-02 01:06:48.877250', 'step': 31183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:48.942756', 'step': 31183, 'epoch': 3}
{'type': 'loss', 'content': 0.06731278449296951, 'timestamp': '2025-10-02 01:06:48.949013', 'step': 31184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:49.003978', 'step': 31184, 'epoch': 3}
{'type': 'loss', 'content': 0.0043863700702786446, 'timestamp': '2025-10-02 01:06:49.007119', 'step': 31185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:49.061477', 'step': 31185, 'epoch': 3}
{'type': 'loss', 'content': 0.11093927174806595, 'timestamp': '2025-10-02 01:06:49.063788', 'step': 31186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:49.120397', 'step': 31186, 'epoch': 3}
{'type': 'loss', 'content': 0.0008003002149052918, 'timestamp': '2025-10-02 01:06:49.122891', 'step': 31187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:49.177735', 'step': 31187, 'epoch': 3}
{'type': 'loss', 'content': 0.02984870970249176, 'timestamp': '2025-10-02 01:06:49.184043', 'step': 31188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:49.239249', 'step': 31188, 'epoch': 3}
{'type': 'loss', 'content': 0.1208522617816925, 'timestamp': '2025-10-02 01:06:49.241985', 'step': 31189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:49.296863', 'step': 31189, 'epoch': 3}
{'type': 'loss', 'content': 0.01782984659075737, 'timestamp': '2025-10-02 01:06:49.300808', 'step': 31190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:06:49.375551', 'step': 31190, 'epoch': 3}
{'type': 'loss', 'content': 0.03313731402158737, 'timestamp': '2025-10-02 01:06:49.387863', 'step': 31191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:49.455022', 'step': 31191, 'epoch': 3}
{'type': 'loss', 'content': 0.0006222591619007289, 'timestamp': '2025-10-02 01:06:49.462977', 'step': 31192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:49.524784', 'step': 31192, 'epoch': 3}
{'type': 'loss', 'content': 0.06307445466518402, 'timestamp': '2025-10-02 01:06:49.530288', 'step': 31193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:06:49.596307', 'step': 31193, 'epoch': 3}
{'type': 'loss', 'content': 0.02640792913734913, 'timestamp': '2025-10-02 01:06:49.606951', 'step': 31194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:49.662753', 'step': 31194, 'epoch': 3}
{'type': 'loss', 'content': 0.010219961404800415, 'timestamp': '2025-10-02 01:06:49.665308', 'step': 31195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:49.720067', 'step': 31195, 'epoch': 3}
{'type': 'loss', 'content': 0.06724368780851364, 'timestamp': '2025-10-02 01:06:49.726083', 'step': 31196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:06:49.789856', 'step': 31196, 'epoch': 3}
{'type': 'loss', 'content': 0.006153719965368509, 'timestamp': '2025-10-02 01:06:49.801613', 'step': 31197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:49.858516', 'step': 31197, 'epoch': 3}
{'type': 'loss', 'content': 0.024633076041936874, 'timestamp': '2025-10-02 01:06:49.865758', 'step': 31198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:49.923669', 'step': 31198, 'epoch': 3}
{'type': 'loss', 'content': 0.018916960805654526, 'timestamp': '2025-10-02 01:06:49.925904', 'step': 31199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:49.981652', 'step': 31199, 'epoch': 3}
{'type': 'loss', 'content': 0.16065922379493713, 'timestamp': '2025-10-02 01:06:49.990020', 'step': 31200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:50.048638', 'step': 31200, 'epoch': 3}
{'type': 'loss', 'content': 0.0279685165733099, 'timestamp': '2025-10-02 01:06:50.051423', 'step': 31201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:50.106505', 'step': 31201, 'epoch': 3}
{'type': 'loss', 'content': 0.021228298544883728, 'timestamp': '2025-10-02 01:06:50.109357', 'step': 31202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:50.166356', 'step': 31202, 'epoch': 3}
{'type': 'loss', 'content': 0.006566477473825216, 'timestamp': '2025-10-02 01:06:50.168995', 'step': 31203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:50.225837', 'step': 31203, 'epoch': 3}
{'type': 'loss', 'content': 0.04573275148868561, 'timestamp': '2025-10-02 01:06:50.232274', 'step': 31204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:50.289732', 'step': 31204, 'epoch': 3}
{'type': 'loss', 'content': 0.0371544286608696, 'timestamp': '2025-10-02 01:06:50.295495', 'step': 31205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:50.352881', 'step': 31205, 'epoch': 3}
{'type': 'loss', 'content': 0.0026568325702100992, 'timestamp': '2025-10-02 01:06:50.362181', 'step': 31206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:50.420775', 'step': 31206, 'epoch': 3}
{'type': 'loss', 'content': 0.006175118964165449, 'timestamp': '2025-10-02 01:06:50.428102', 'step': 31207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:50.488329', 'step': 31207, 'epoch': 3}
{'type': 'loss', 'content': 0.000496123218908906, 'timestamp': '2025-10-02 01:06:50.497736', 'step': 31208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:50.565960', 'step': 31208, 'epoch': 3}
{'type': 'loss', 'content': 0.04146698862314224, 'timestamp': '2025-10-02 01:06:50.573726', 'step': 31209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:50.640669', 'step': 31209, 'epoch': 3}
{'type': 'loss', 'content': 0.1029743030667305, 'timestamp': '2025-10-02 01:06:50.644121', 'step': 31210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:50.705288', 'step': 31210, 'epoch': 3}
{'type': 'loss', 'content': 0.0825875848531723, 'timestamp': '2025-10-02 01:06:50.709495', 'step': 31211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:50.768415', 'step': 31211, 'epoch': 3}
{'type': 'loss', 'content': 0.017411213368177414, 'timestamp': '2025-10-02 01:06:50.775147', 'step': 31212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:50.831644', 'step': 31212, 'epoch': 3}
{'type': 'loss', 'content': 0.04901920631527901, 'timestamp': '2025-10-02 01:06:50.836014', 'step': 31213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:50.892560', 'step': 31213, 'epoch': 3}
{'type': 'loss', 'content': 0.02753271721303463, 'timestamp': '2025-10-02 01:06:50.896345', 'step': 31214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:50.952937', 'step': 31214, 'epoch': 3}
{'type': 'loss', 'content': 0.06607106328010559, 'timestamp': '2025-10-02 01:06:50.962154', 'step': 31215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:51.020420', 'step': 31215, 'epoch': 3}
{'type': 'loss', 'content': 0.011184572242200375, 'timestamp': '2025-10-02 01:06:51.026831', 'step': 31216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:51.080690', 'step': 31216, 'epoch': 3}
{'type': 'loss', 'content': 0.06527388840913773, 'timestamp': '2025-10-02 01:06:51.083327', 'step': 31217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:51.149480', 'step': 31217, 'epoch': 3}
{'type': 'loss', 'content': 0.009801645763218403, 'timestamp': '2025-10-02 01:06:51.159857', 'step': 31218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:51.216719', 'step': 31218, 'epoch': 3}
{'type': 'loss', 'content': 0.027232550084590912, 'timestamp': '2025-10-02 01:06:51.219244', 'step': 31219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:51.274452', 'step': 31219, 'epoch': 3}
{'type': 'loss', 'content': 0.041103433817625046, 'timestamp': '2025-10-02 01:06:51.280883', 'step': 31220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:51.335739', 'step': 31220, 'epoch': 3}
{'type': 'loss', 'content': 0.02522861957550049, 'timestamp': '2025-10-02 01:06:51.338553', 'step': 31221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:51.395196', 'step': 31221, 'epoch': 3}
{'type': 'loss', 'content': 0.058152757585048676, 'timestamp': '2025-10-02 01:06:51.398307', 'step': 31222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:51.456285', 'step': 31222, 'epoch': 3}
{'type': 'loss', 'content': 0.028383752331137657, 'timestamp': '2025-10-02 01:06:51.461018', 'step': 31223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:51.523357', 'step': 31223, 'epoch': 3}
{'type': 'loss', 'content': 0.026211682707071304, 'timestamp': '2025-10-02 01:06:51.533789', 'step': 31224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:51.590844', 'step': 31224, 'epoch': 3}
{'type': 'loss', 'content': 0.048558443784713745, 'timestamp': '2025-10-02 01:06:51.597770', 'step': 31225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:51.656043', 'step': 31225, 'epoch': 3}
{'type': 'loss', 'content': 0.009865145199000835, 'timestamp': '2025-10-02 01:06:51.660051', 'step': 31226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:51.717912', 'step': 31226, 'epoch': 3}
{'type': 'loss', 'content': 0.018342774361371994, 'timestamp': '2025-10-02 01:06:51.727413', 'step': 31227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:51.783980', 'step': 31227, 'epoch': 3}
{'type': 'loss', 'content': 0.07736025005578995, 'timestamp': '2025-10-02 01:06:51.791863', 'step': 31228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:51.848253', 'step': 31228, 'epoch': 3}
{'type': 'loss', 'content': 0.009535050019621849, 'timestamp': '2025-10-02 01:06:51.855186', 'step': 31229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:51.914886', 'step': 31229, 'epoch': 3}
{'type': 'loss', 'content': 0.009126636199653149, 'timestamp': '2025-10-02 01:06:51.923935', 'step': 31230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:51.981744', 'step': 31230, 'epoch': 3}
{'type': 'loss', 'content': 0.12424729019403458, 'timestamp': '2025-10-02 01:06:51.984473', 'step': 31231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:52.042477', 'step': 31231, 'epoch': 3}
{'type': 'loss', 'content': 0.02459586411714554, 'timestamp': '2025-10-02 01:06:52.048788', 'step': 31232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:52.103346', 'step': 31232, 'epoch': 3}
{'type': 'loss', 'content': 0.0474756695330143, 'timestamp': '2025-10-02 01:06:52.105937', 'step': 31233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:52.161535', 'step': 31233, 'epoch': 3}
{'type': 'loss', 'content': 0.03190966695547104, 'timestamp': '2025-10-02 01:06:52.167054', 'step': 31234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:52.230549', 'step': 31234, 'epoch': 3}
{'type': 'loss', 'content': 0.024718044325709343, 'timestamp': '2025-10-02 01:06:52.236010', 'step': 31235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:52.296403', 'step': 31235, 'epoch': 3}
{'type': 'loss', 'content': 0.15779022872447968, 'timestamp': '2025-10-02 01:06:52.302734', 'step': 31236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:52.360511', 'step': 31236, 'epoch': 3}
{'type': 'loss', 'content': 0.06622876226902008, 'timestamp': '2025-10-02 01:06:52.367417', 'step': 31237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:52.429324', 'step': 31237, 'epoch': 3}
{'type': 'loss', 'content': 0.0681009292602539, 'timestamp': '2025-10-02 01:06:52.432425', 'step': 31238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:52.488080', 'step': 31238, 'epoch': 3}
{'type': 'loss', 'content': 0.037802886217832565, 'timestamp': '2025-10-02 01:06:52.493527', 'step': 31239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:06:52.566247', 'step': 31239, 'epoch': 3}
{'type': 'loss', 'content': 0.010384869761765003, 'timestamp': '2025-10-02 01:06:52.579611', 'step': 31240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:52.638617', 'step': 31240, 'epoch': 3}
{'type': 'loss', 'content': 0.047987692058086395, 'timestamp': '2025-10-02 01:06:52.641825', 'step': 31241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:52.703515', 'step': 31241, 'epoch': 3}
{'type': 'loss', 'content': 0.02774891071021557, 'timestamp': '2025-10-02 01:06:52.713698', 'step': 31242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:52.769874', 'step': 31242, 'epoch': 3}
{'type': 'loss', 'content': 0.04470972716808319, 'timestamp': '2025-10-02 01:06:52.772385', 'step': 31243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:52.830759', 'step': 31243, 'epoch': 3}
{'type': 'loss', 'content': 0.013737889938056469, 'timestamp': '2025-10-02 01:06:52.837098', 'step': 31244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:52.897340', 'step': 31244, 'epoch': 3}
{'type': 'loss', 'content': 0.039174340665340424, 'timestamp': '2025-10-02 01:06:52.908609', 'step': 31245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:52.965143', 'step': 31245, 'epoch': 3}
{'type': 'loss', 'content': 0.016235243529081345, 'timestamp': '2025-10-02 01:06:52.970621', 'step': 31246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:53.026099', 'step': 31246, 'epoch': 3}
{'type': 'loss', 'content': 0.011090428568422794, 'timestamp': '2025-10-02 01:06:53.028733', 'step': 31247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:53.083694', 'step': 31247, 'epoch': 3}
{'type': 'loss', 'content': 0.03662648797035217, 'timestamp': '2025-10-02 01:06:53.090012', 'step': 31248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:53.144516', 'step': 31248, 'epoch': 3}
{'type': 'loss', 'content': 0.031849149614572525, 'timestamp': '2025-10-02 01:06:53.150193', 'step': 31249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:53.206278', 'step': 31249, 'epoch': 3}
{'type': 'loss', 'content': 0.007737800013273954, 'timestamp': '2025-10-02 01:06:53.215614', 'step': 31250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:53.271835', 'step': 31250, 'epoch': 3}
{'type': 'loss', 'content': 0.032516054809093475, 'timestamp': '2025-10-02 01:06:53.274879', 'step': 31251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:53.332636', 'step': 31251, 'epoch': 3}
{'type': 'loss', 'content': 0.03793803229928017, 'timestamp': '2025-10-02 01:06:53.341456', 'step': 31252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:53.396708', 'step': 31252, 'epoch': 3}
{'type': 'loss', 'content': 0.001935072592459619, 'timestamp': '2025-10-02 01:06:53.404112', 'step': 31253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:53.463991', 'step': 31253, 'epoch': 3}
{'type': 'loss', 'content': 0.02718130312860012, 'timestamp': '2025-10-02 01:06:53.470216', 'step': 31254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:53.529944', 'step': 31254, 'epoch': 3}
{'type': 'loss', 'content': 0.03447841852903366, 'timestamp': '2025-10-02 01:06:53.533881', 'step': 31255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:53.591294', 'step': 31255, 'epoch': 3}
{'type': 'loss', 'content': 0.015554793179035187, 'timestamp': '2025-10-02 01:06:53.597709', 'step': 31256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:53.653816', 'step': 31256, 'epoch': 3}
{'type': 'loss', 'content': 0.017748799175024033, 'timestamp': '2025-10-02 01:06:53.661048', 'step': 31257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:53.726666', 'step': 31257, 'epoch': 3}
{'type': 'loss', 'content': 0.03190293163061142, 'timestamp': '2025-10-02 01:06:53.732994', 'step': 31258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:53.790027', 'step': 31258, 'epoch': 3}
{'type': 'loss', 'content': 0.04339917004108429, 'timestamp': '2025-10-02 01:06:53.792738', 'step': 31259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:53.851536', 'step': 31259, 'epoch': 3}
{'type': 'loss', 'content': 0.028051387518644333, 'timestamp': '2025-10-02 01:06:53.857833', 'step': 31260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:06:53.927123', 'step': 31260, 'epoch': 3}
{'type': 'loss', 'content': 0.005835411604493856, 'timestamp': '2025-10-02 01:06:53.938613', 'step': 31261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:53.993756', 'step': 31261, 'epoch': 3}
{'type': 'loss', 'content': 0.04192626103758812, 'timestamp': '2025-10-02 01:06:54.000876', 'step': 31262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:06:54.064196', 'step': 31262, 'epoch': 3}
{'type': 'loss', 'content': 0.03335409611463547, 'timestamp': '2025-10-02 01:06:54.074804', 'step': 31263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:54.131352', 'step': 31263, 'epoch': 3}
{'type': 'loss', 'content': 0.04453833028674126, 'timestamp': '2025-10-02 01:06:54.139516', 'step': 31264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:54.192605', 'step': 31264, 'epoch': 3}
{'type': 'loss', 'content': 0.08013825118541718, 'timestamp': '2025-10-02 01:06:54.195117', 'step': 31265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:06:54.256295', 'step': 31265, 'epoch': 3}
{'type': 'loss', 'content': 0.003278105752542615, 'timestamp': '2025-10-02 01:06:54.266427', 'step': 31266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:54.328882', 'step': 31266, 'epoch': 3}
{'type': 'loss', 'content': 0.02845701016485691, 'timestamp': '2025-10-02 01:06:54.336100', 'step': 31267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:54.410067', 'step': 31267, 'epoch': 3}
{'type': 'loss', 'content': 0.010086938738822937, 'timestamp': '2025-10-02 01:06:54.421304', 'step': 31268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:54.486507', 'step': 31268, 'epoch': 3}
{'type': 'loss', 'content': 0.09374566376209259, 'timestamp': '2025-10-02 01:06:54.490484', 'step': 31269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:54.555710', 'step': 31269, 'epoch': 3}
{'type': 'loss', 'content': 0.04762224480509758, 'timestamp': '2025-10-02 01:06:54.558747', 'step': 31270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:54.617836', 'step': 31270, 'epoch': 3}
{'type': 'loss', 'content': 0.08012165874242783, 'timestamp': '2025-10-02 01:06:54.621656', 'step': 31271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:54.679478', 'step': 31271, 'epoch': 3}
{'type': 'loss', 'content': 0.016797956079244614, 'timestamp': '2025-10-02 01:06:54.685908', 'step': 31272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:54.743136', 'step': 31272, 'epoch': 3}
{'type': 'loss', 'content': 0.09344394505023956, 'timestamp': '2025-10-02 01:06:54.751398', 'step': 31273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:54.808700', 'step': 31273, 'epoch': 3}
{'type': 'loss', 'content': 0.038433630019426346, 'timestamp': '2025-10-02 01:06:54.812715', 'step': 31274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:54.871620', 'step': 31274, 'epoch': 3}
{'type': 'loss', 'content': 0.024940291419625282, 'timestamp': '2025-10-02 01:06:54.875044', 'step': 31275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:54.931322', 'step': 31275, 'epoch': 3}
{'type': 'loss', 'content': 0.031462255865335464, 'timestamp': '2025-10-02 01:06:54.939299', 'step': 31276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:54.994797', 'step': 31276, 'epoch': 3}
{'type': 'loss', 'content': 0.03688139468431473, 'timestamp': '2025-10-02 01:06:54.998031', 'step': 31277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:55.053026', 'step': 31277, 'epoch': 3}
{'type': 'loss', 'content': 0.06661268323659897, 'timestamp': '2025-10-02 01:06:55.056348', 'step': 31278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:55.117284', 'step': 31278, 'epoch': 3}
{'type': 'loss', 'content': 0.019182264804840088, 'timestamp': '2025-10-02 01:06:55.122850', 'step': 31279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:55.177955', 'step': 31279, 'epoch': 3}
{'type': 'loss', 'content': 0.1519198715686798, 'timestamp': '2025-10-02 01:06:55.184292', 'step': 31280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:55.239635', 'step': 31280, 'epoch': 3}
{'type': 'loss', 'content': 0.04579448327422142, 'timestamp': '2025-10-02 01:06:55.242995', 'step': 31281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:55.299308', 'step': 31281, 'epoch': 3}
{'type': 'loss', 'content': 0.14449216425418854, 'timestamp': '2025-10-02 01:06:55.303573', 'step': 31282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:06:55.371567', 'step': 31282, 'epoch': 3}
{'type': 'loss', 'content': 0.022804636508226395, 'timestamp': '2025-10-02 01:06:55.382407', 'step': 31283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:55.445449', 'step': 31283, 'epoch': 3}
{'type': 'loss', 'content': 0.04602840915322304, 'timestamp': '2025-10-02 01:06:55.455900', 'step': 31284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:55.517941', 'step': 31284, 'epoch': 3}
{'type': 'loss', 'content': 0.06434200704097748, 'timestamp': '2025-10-02 01:06:55.523226', 'step': 31285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:55.580889', 'step': 31285, 'epoch': 3}
{'type': 'loss', 'content': 0.027627892792224884, 'timestamp': '2025-10-02 01:06:55.588190', 'step': 31286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:06:55.650566', 'step': 31286, 'epoch': 3}
{'type': 'loss', 'content': 0.08174878358840942, 'timestamp': '2025-10-02 01:06:55.654569', 'step': 31287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:55.711520', 'step': 31287, 'epoch': 3}
{'type': 'loss', 'content': 0.04851836711168289, 'timestamp': '2025-10-02 01:06:55.718548', 'step': 31288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:55.775278', 'step': 31288, 'epoch': 3}
{'type': 'loss', 'content': 0.10903246700763702, 'timestamp': '2025-10-02 01:06:55.779416', 'step': 31289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:55.838081', 'step': 31289, 'epoch': 3}
{'type': 'loss', 'content': 0.09378822892904282, 'timestamp': '2025-10-02 01:06:55.840288', 'step': 31290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:55.897084', 'step': 31290, 'epoch': 3}
{'type': 'loss', 'content': 0.07082946598529816, 'timestamp': '2025-10-02 01:06:55.899709', 'step': 31291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:55.955711', 'step': 31291, 'epoch': 3}
{'type': 'loss', 'content': 0.07655931264162064, 'timestamp': '2025-10-02 01:06:55.962105', 'step': 31292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:56.018292', 'step': 31292, 'epoch': 3}
{'type': 'loss', 'content': 0.00897003710269928, 'timestamp': '2025-10-02 01:06:56.020901', 'step': 31293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:06:56.076175', 'step': 31293, 'epoch': 3}
{'type': 'loss', 'content': 0.004175812937319279, 'timestamp': '2025-10-02 01:06:56.078933', 'step': 31294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:56.134011', 'step': 31294, 'epoch': 3}
{'type': 'loss', 'content': 0.04371783509850502, 'timestamp': '2025-10-02 01:06:56.137655', 'step': 31295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:56.196408', 'step': 31295, 'epoch': 3}
{'type': 'loss', 'content': 0.029641956090927124, 'timestamp': '2025-10-02 01:06:56.203193', 'step': 31296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:56.276382', 'step': 31296, 'epoch': 3}
{'type': 'loss', 'content': 0.00038160764961503446, 'timestamp': '2025-10-02 01:06:56.287677', 'step': 31297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:56.361141', 'step': 31297, 'epoch': 3}
{'type': 'loss', 'content': 0.0396624319255352, 'timestamp': '2025-10-02 01:06:56.368508', 'step': 31298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:56.436794', 'step': 31298, 'epoch': 3}
{'type': 'loss', 'content': 0.10146618634462357, 'timestamp': '2025-10-02 01:06:56.441531', 'step': 31299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:06:56.507949', 'step': 31299, 'epoch': 3}
{'type': 'loss', 'content': 0.05571020767092705, 'timestamp': '2025-10-02 01:06:56.515526', 'step': 31300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:06:56.576856', 'step': 31300, 'epoch': 3}
{'type': 'loss', 'content': 0.044032879173755646, 'timestamp': '2025-10-02 01:06:56.581183', 'step': 31301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:56.642000', 'step': 31301, 'epoch': 3}
{'type': 'loss', 'content': 0.0642332211136818, 'timestamp': '2025-10-02 01:06:56.646124', 'step': 31302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:06:56.706062', 'step': 31302, 'epoch': 3}
{'type': 'loss', 'content': 0.05517955869436264, 'timestamp': '2025-10-02 01:06:56.710739', 'step': 31303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:56.776140', 'step': 31303, 'epoch': 3}
{'type': 'loss', 'content': 0.0365273654460907, 'timestamp': '2025-10-02 01:06:56.787340', 'step': 31304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:56.844886', 'step': 31304, 'epoch': 3}
{'type': 'loss', 'content': 0.03128589689731598, 'timestamp': '2025-10-02 01:06:56.847473', 'step': 31305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:56.905230', 'step': 31305, 'epoch': 3}
{'type': 'loss', 'content': 0.0472552552819252, 'timestamp': '2025-10-02 01:06:56.914558', 'step': 31306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:06:56.975540', 'step': 31306, 'epoch': 3}
{'type': 'loss', 'content': 0.023395204916596413, 'timestamp': '2025-10-02 01:06:56.984849', 'step': 31307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:57.043677', 'step': 31307, 'epoch': 3}
{'type': 'loss', 'content': 0.04435642063617706, 'timestamp': '2025-10-02 01:06:57.050429', 'step': 31308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:06:57.107365', 'step': 31308, 'epoch': 3}
{'type': 'loss', 'content': 0.05254716798663139, 'timestamp': '2025-10-02 01:06:57.110909', 'step': 31309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:06:57.169515', 'step': 31309, 'epoch': 3}
{'type': 'loss', 'content': 0.07318081706762314, 'timestamp': '2025-10-02 01:06:57.172843', 'step': 31310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:06:57.230553', 'step': 31310, 'epoch': 3}
{'type': 'loss', 'content': 0.04197282716631889, 'timestamp': '2025-10-02 01:06:57.236027', 'step': 31311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:06:57.292516', 'step': 31311, 'epoch': 3}
{'type': 'loss', 'content': 0.030691105872392654, 'timestamp': '2025-10-02 01:06:57.299726', 'step': 31312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:06:57.363349', 'step': 31312, 'epoch': 3}
{'type': 'loss', 'content': 0.06157634034752846, 'timestamp': '2025-10-02 01:06:57.374600', 'step': 31313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:06:57.432418', 'step': 31313, 'epoch': 3}
{'type': 'loss', 'content': 0.01769259199500084, 'timestamp': '2025-10-02 01:06:57.435550', 'step': 31314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:57.492311', 'step': 31314, 'epoch': 3}
{'type': 'loss', 'content': 0.016208196058869362, 'timestamp': '2025-10-02 01:06:57.495378', 'step': 31315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:06:57.554647', 'step': 31315, 'epoch': 3}
{'type': 'loss', 'content': 0.018057646229863167, 'timestamp': '2025-10-02 01:06:57.564976', 'step': 31316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:57.625035', 'step': 31316, 'epoch': 3}
{'type': 'loss', 'content': 0.05182242393493652, 'timestamp': '2025-10-02 01:06:57.633506', 'step': 31317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:06:57.696244', 'step': 31317, 'epoch': 3}
{'type': 'loss', 'content': 0.05507058650255203, 'timestamp': '2025-10-02 01:06:57.705981', 'step': 31318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:06:57.778843', 'step': 31318, 'epoch': 3}
{'type': 'loss', 'content': 0.04842784255743027, 'timestamp': '2025-10-02 01:06:57.785965', 'step': 31319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:06:57.851545', 'step': 31319, 'epoch': 3}
{'type': 'loss', 'content': 0.03461366146802902, 'timestamp': '2025-10-02 01:06:57.860989', 'step': 31320, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 01:07:24.866585', 'step': 31320, 'epoch': 3}
{'type': 'pplx', 'content': 90.16646265147584, 'timestamp': '2025-10-02 01:07:24.871502', 'step': 31320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:24.929320', 'step': 31320, 'epoch': 3}
{'type': 'loss', 'content': 0.012396959587931633, 'timestamp': '2025-10-02 01:07:24.934788', 'step': 31321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:07:25.009907', 'step': 31321, 'epoch': 3}
{'type': 'loss', 'content': 0.02192837931215763, 'timestamp': '2025-10-02 01:07:25.022313', 'step': 31322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:25.079653', 'step': 31322, 'epoch': 3}
{'type': 'loss', 'content': 0.04844369366765022, 'timestamp': '2025-10-02 01:07:25.082845', 'step': 31323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:25.147075', 'step': 31323, 'epoch': 3}
{'type': 'loss', 'content': 0.042369335889816284, 'timestamp': '2025-10-02 01:07:25.153866', 'step': 31324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:25.228398', 'step': 31324, 'epoch': 3}
{'type': 'loss', 'content': 0.018936552107334137, 'timestamp': '2025-10-02 01:07:25.233801', 'step': 31325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:25.295533', 'step': 31325, 'epoch': 3}
{'type': 'loss', 'content': 0.04452258348464966, 'timestamp': '2025-10-02 01:07:25.298904', 'step': 31326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:25.371573', 'step': 31326, 'epoch': 3}
{'type': 'loss', 'content': 0.0001661662827245891, 'timestamp': '2025-10-02 01:07:25.374994', 'step': 31327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:25.441020', 'step': 31327, 'epoch': 3}
{'type': 'loss', 'content': 0.015988120809197426, 'timestamp': '2025-10-02 01:07:25.451049', 'step': 31328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:25.510514', 'step': 31328, 'epoch': 3}
{'type': 'loss', 'content': 0.008200008422136307, 'timestamp': '2025-10-02 01:07:25.521394', 'step': 31329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:25.579095', 'step': 31329, 'epoch': 3}
{'type': 'loss', 'content': 0.05591419339179993, 'timestamp': '2025-10-02 01:07:25.586616', 'step': 31330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:25.642636', 'step': 31330, 'epoch': 3}
{'type': 'loss', 'content': 0.02467994950711727, 'timestamp': '2025-10-02 01:07:25.650308', 'step': 31331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:25.706248', 'step': 31331, 'epoch': 3}
{'type': 'loss', 'content': 0.057196907699108124, 'timestamp': '2025-10-02 01:07:25.713527', 'step': 31332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:25.770262', 'step': 31332, 'epoch': 3}
{'type': 'loss', 'content': 0.11690789461135864, 'timestamp': '2025-10-02 01:07:25.773333', 'step': 31333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:25.829811', 'step': 31333, 'epoch': 3}
{'type': 'loss', 'content': 0.018994063138961792, 'timestamp': '2025-10-02 01:07:25.838816', 'step': 31334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:25.903007', 'step': 31334, 'epoch': 3}
{'type': 'loss', 'content': 0.06553126126527786, 'timestamp': '2025-10-02 01:07:25.905866', 'step': 31335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:25.961442', 'step': 31335, 'epoch': 3}
{'type': 'loss', 'content': 0.017686588689684868, 'timestamp': '2025-10-02 01:07:25.971257', 'step': 31336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:26.026562', 'step': 31336, 'epoch': 3}
{'type': 'loss', 'content': 0.05603209137916565, 'timestamp': '2025-10-02 01:07:26.029945', 'step': 31337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:26.084455', 'step': 31337, 'epoch': 3}
{'type': 'loss', 'content': 0.05445392429828644, 'timestamp': '2025-10-02 01:07:26.090110', 'step': 31338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:26.147845', 'step': 31338, 'epoch': 3}
{'type': 'loss', 'content': 0.026575103402137756, 'timestamp': '2025-10-02 01:07:26.150585', 'step': 31339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:26.207021', 'step': 31339, 'epoch': 3}
{'type': 'loss', 'content': 0.023895831778645515, 'timestamp': '2025-10-02 01:07:26.214582', 'step': 31340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:26.269735', 'step': 31340, 'epoch': 3}
{'type': 'loss', 'content': 0.12227090448141098, 'timestamp': '2025-10-02 01:07:26.272614', 'step': 31341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:26.328531', 'step': 31341, 'epoch': 3}
{'type': 'loss', 'content': 0.05586773157119751, 'timestamp': '2025-10-02 01:07:26.330917', 'step': 31342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:26.385874', 'step': 31342, 'epoch': 3}
{'type': 'loss', 'content': 0.040850237011909485, 'timestamp': '2025-10-02 01:07:26.394742', 'step': 31343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:26.451802', 'step': 31343, 'epoch': 3}
{'type': 'loss', 'content': 0.03183247148990631, 'timestamp': '2025-10-02 01:07:26.459434', 'step': 31344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:26.523171', 'step': 31344, 'epoch': 3}
{'type': 'loss', 'content': 0.009155487641692162, 'timestamp': '2025-10-02 01:07:26.529841', 'step': 31345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:26.585724', 'step': 31345, 'epoch': 3}
{'type': 'loss', 'content': 0.033566541969776154, 'timestamp': '2025-10-02 01:07:26.590855', 'step': 31346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:26.645937', 'step': 31346, 'epoch': 3}
{'type': 'loss', 'content': 0.06920024752616882, 'timestamp': '2025-10-02 01:07:26.648243', 'step': 31347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:26.702989', 'step': 31347, 'epoch': 3}
{'type': 'loss', 'content': 0.029057851061224937, 'timestamp': '2025-10-02 01:07:26.709169', 'step': 31348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:26.763670', 'step': 31348, 'epoch': 3}
{'type': 'loss', 'content': 0.07404584437608719, 'timestamp': '2025-10-02 01:07:26.766076', 'step': 31349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:07:26.828327', 'step': 31349, 'epoch': 3}
{'type': 'loss', 'content': 0.006567330099642277, 'timestamp': '2025-10-02 01:07:26.839158', 'step': 31350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:26.894658', 'step': 31350, 'epoch': 3}
{'type': 'loss', 'content': 0.028076786547899246, 'timestamp': '2025-10-02 01:07:26.904162', 'step': 31351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:26.960281', 'step': 31351, 'epoch': 3}
{'type': 'loss', 'content': 0.03069884330034256, 'timestamp': '2025-10-02 01:07:26.970606', 'step': 31352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:27.024926', 'step': 31352, 'epoch': 3}
{'type': 'loss', 'content': 0.027306510135531425, 'timestamp': '2025-10-02 01:07:27.027248', 'step': 31353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:27.081934', 'step': 31353, 'epoch': 3}
{'type': 'loss', 'content': 0.03692566975951195, 'timestamp': '2025-10-02 01:07:27.089063', 'step': 31354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:27.144450', 'step': 31354, 'epoch': 3}
{'type': 'loss', 'content': 0.07538963109254837, 'timestamp': '2025-10-02 01:07:27.146876', 'step': 31355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:27.201662', 'step': 31355, 'epoch': 3}
{'type': 'loss', 'content': 0.004241812974214554, 'timestamp': '2025-10-02 01:07:27.211177', 'step': 31356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:07:27.272113', 'step': 31356, 'epoch': 3}
{'type': 'loss', 'content': 0.07515834271907806, 'timestamp': '2025-10-02 01:07:27.283815', 'step': 31357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:27.338883', 'step': 31357, 'epoch': 3}
{'type': 'loss', 'content': 0.03321662172675133, 'timestamp': '2025-10-02 01:07:27.341227', 'step': 31358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:07:27.405979', 'step': 31358, 'epoch': 3}
{'type': 'loss', 'content': 0.05919203534722328, 'timestamp': '2025-10-02 01:07:27.416804', 'step': 31359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:27.471961', 'step': 31359, 'epoch': 3}
{'type': 'loss', 'content': 0.13136743009090424, 'timestamp': '2025-10-02 01:07:27.478431', 'step': 31360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:27.532513', 'step': 31360, 'epoch': 3}
{'type': 'loss', 'content': 0.025900062173604965, 'timestamp': '2025-10-02 01:07:27.536400', 'step': 31361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:27.591329', 'step': 31361, 'epoch': 3}
{'type': 'loss', 'content': 0.09346779435873032, 'timestamp': '2025-10-02 01:07:27.593702', 'step': 31362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:27.649407', 'step': 31362, 'epoch': 3}
{'type': 'loss', 'content': 0.007261432707309723, 'timestamp': '2025-10-02 01:07:27.651776', 'step': 31363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:27.706327', 'step': 31363, 'epoch': 3}
{'type': 'loss', 'content': 0.06993532180786133, 'timestamp': '2025-10-02 01:07:27.712708', 'step': 31364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:27.767716', 'step': 31364, 'epoch': 3}
{'type': 'loss', 'content': 0.01188048068434, 'timestamp': '2025-10-02 01:07:27.770744', 'step': 31365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:27.826463', 'step': 31365, 'epoch': 3}
{'type': 'loss', 'content': 0.04743153974413872, 'timestamp': '2025-10-02 01:07:27.828874', 'step': 31366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:27.883303', 'step': 31366, 'epoch': 3}
{'type': 'loss', 'content': 0.08145967870950699, 'timestamp': '2025-10-02 01:07:27.885631', 'step': 31367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:07:27.939632', 'step': 31367, 'epoch': 3}
{'type': 'loss', 'content': 0.07077260315418243, 'timestamp': '2025-10-02 01:07:27.945959', 'step': 31368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:28.000997', 'step': 31368, 'epoch': 3}
{'type': 'loss', 'content': 0.03481527045369148, 'timestamp': '2025-10-02 01:07:28.003555', 'step': 31369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:28.057518', 'step': 31369, 'epoch': 3}
{'type': 'loss', 'content': 0.03833211213350296, 'timestamp': '2025-10-02 01:07:28.059798', 'step': 31370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:28.114851', 'step': 31370, 'epoch': 3}
{'type': 'loss', 'content': 0.06239292025566101, 'timestamp': '2025-10-02 01:07:28.121870', 'step': 31371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:28.180735', 'step': 31371, 'epoch': 3}
{'type': 'loss', 'content': 0.021064957603812218, 'timestamp': '2025-10-02 01:07:28.191691', 'step': 31372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:28.246164', 'step': 31372, 'epoch': 3}
{'type': 'loss', 'content': 0.0012414404191076756, 'timestamp': '2025-10-02 01:07:28.256401', 'step': 31373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:28.311167', 'step': 31373, 'epoch': 3}
{'type': 'loss', 'content': 0.02618258073925972, 'timestamp': '2025-10-02 01:07:28.313474', 'step': 31374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:28.367592', 'step': 31374, 'epoch': 3}
{'type': 'loss', 'content': 0.059936802834272385, 'timestamp': '2025-10-02 01:07:28.369888', 'step': 31375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:28.424684', 'step': 31375, 'epoch': 3}
{'type': 'loss', 'content': 0.025261761620640755, 'timestamp': '2025-10-02 01:07:28.434817', 'step': 31376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:28.489072', 'step': 31376, 'epoch': 3}
{'type': 'loss', 'content': 0.05962366983294487, 'timestamp': '2025-10-02 01:07:28.491260', 'step': 31377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:28.545548', 'step': 31377, 'epoch': 3}
{'type': 'loss', 'content': 0.1285157948732376, 'timestamp': '2025-10-02 01:07:28.547971', 'step': 31378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:28.617390', 'step': 31378, 'epoch': 3}
{'type': 'loss', 'content': 0.015698380768299103, 'timestamp': '2025-10-02 01:07:28.622765', 'step': 31379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:28.677951', 'step': 31379, 'epoch': 3}
{'type': 'loss', 'content': 0.030946476384997368, 'timestamp': '2025-10-02 01:07:28.684580', 'step': 31380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:28.739078', 'step': 31380, 'epoch': 3}
{'type': 'loss', 'content': 0.09597687423229218, 'timestamp': '2025-10-02 01:07:28.741383', 'step': 31381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:28.796835', 'step': 31381, 'epoch': 3}
{'type': 'loss', 'content': 0.06042616814374924, 'timestamp': '2025-10-02 01:07:28.800934', 'step': 31382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:28.855907', 'step': 31382, 'epoch': 3}
{'type': 'loss', 'content': 0.03792235255241394, 'timestamp': '2025-10-02 01:07:28.861383', 'step': 31383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:28.920642', 'step': 31383, 'epoch': 3}
{'type': 'loss', 'content': 0.08635780960321426, 'timestamp': '2025-10-02 01:07:28.927316', 'step': 31384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:28.986852', 'step': 31384, 'epoch': 3}
{'type': 'loss', 'content': 0.048964813351631165, 'timestamp': '2025-10-02 01:07:28.997810', 'step': 31385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:29.052835', 'step': 31385, 'epoch': 3}
{'type': 'loss', 'content': 0.05075324699282646, 'timestamp': '2025-10-02 01:07:29.055233', 'step': 31386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:29.110012', 'step': 31386, 'epoch': 3}
{'type': 'loss', 'content': 0.034883175045251846, 'timestamp': '2025-10-02 01:07:29.117203', 'step': 31387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:29.171556', 'step': 31387, 'epoch': 3}
{'type': 'loss', 'content': 0.08301237225532532, 'timestamp': '2025-10-02 01:07:29.177610', 'step': 31388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:29.231080', 'step': 31388, 'epoch': 3}
{'type': 'loss', 'content': 0.04583284631371498, 'timestamp': '2025-10-02 01:07:29.233522', 'step': 31389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:29.289624', 'step': 31389, 'epoch': 3}
{'type': 'loss', 'content': 0.01754809357225895, 'timestamp': '2025-10-02 01:07:29.292147', 'step': 31390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:29.348749', 'step': 31390, 'epoch': 3}
{'type': 'loss', 'content': 0.01905299536883831, 'timestamp': '2025-10-02 01:07:29.351374', 'step': 31391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:29.407143', 'step': 31391, 'epoch': 3}
{'type': 'loss', 'content': 0.025561049580574036, 'timestamp': '2025-10-02 01:07:29.413107', 'step': 31392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:29.467129', 'step': 31392, 'epoch': 3}
{'type': 'loss', 'content': 0.01926584169268608, 'timestamp': '2025-10-02 01:07:29.469728', 'step': 31393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:29.526074', 'step': 31393, 'epoch': 3}
{'type': 'loss', 'content': 0.011269734241068363, 'timestamp': '2025-10-02 01:07:29.535625', 'step': 31394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:29.590476', 'step': 31394, 'epoch': 3}
{'type': 'loss', 'content': 0.03497680649161339, 'timestamp': '2025-10-02 01:07:29.592845', 'step': 31395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:29.648576', 'step': 31395, 'epoch': 3}
{'type': 'loss', 'content': 0.02607875131070614, 'timestamp': '2025-10-02 01:07:29.654489', 'step': 31396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:29.707993', 'step': 31396, 'epoch': 3}
{'type': 'loss', 'content': 0.03437408432364464, 'timestamp': '2025-10-02 01:07:29.710305', 'step': 31397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:29.764761', 'step': 31397, 'epoch': 3}
{'type': 'loss', 'content': 0.013718219473958015, 'timestamp': '2025-10-02 01:07:29.767300', 'step': 31398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:29.822831', 'step': 31398, 'epoch': 3}
{'type': 'loss', 'content': 0.017321567982435226, 'timestamp': '2025-10-02 01:07:29.825228', 'step': 31399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:29.879908', 'step': 31399, 'epoch': 3}
{'type': 'loss', 'content': 0.036742839962244034, 'timestamp': '2025-10-02 01:07:29.886320', 'step': 31400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:29.941838', 'step': 31400, 'epoch': 3}
{'type': 'loss', 'content': 0.019494395703077316, 'timestamp': '2025-10-02 01:07:29.944785', 'step': 31401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:29.998986', 'step': 31401, 'epoch': 3}
{'type': 'loss', 'content': 0.059479862451553345, 'timestamp': '2025-10-02 01:07:30.001387', 'step': 31402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:30.056415', 'step': 31402, 'epoch': 3}
{'type': 'loss', 'content': 0.09319300949573517, 'timestamp': '2025-10-02 01:07:30.059746', 'step': 31403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:30.114118', 'step': 31403, 'epoch': 3}
{'type': 'loss', 'content': 0.003530135378241539, 'timestamp': '2025-10-02 01:07:30.120912', 'step': 31404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:30.175503', 'step': 31404, 'epoch': 3}
{'type': 'loss', 'content': 0.017876792699098587, 'timestamp': '2025-10-02 01:07:30.178561', 'step': 31405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:07:30.241365', 'step': 31405, 'epoch': 3}
{'type': 'loss', 'content': 0.03802791237831116, 'timestamp': '2025-10-02 01:07:30.251996', 'step': 31406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:30.309693', 'step': 31406, 'epoch': 3}
{'type': 'loss', 'content': 0.021795157343149185, 'timestamp': '2025-10-02 01:07:30.318862', 'step': 31407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:30.373922', 'step': 31407, 'epoch': 3}
{'type': 'loss', 'content': 0.054316017776727676, 'timestamp': '2025-10-02 01:07:30.380019', 'step': 31408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:30.434503', 'step': 31408, 'epoch': 3}
{'type': 'loss', 'content': 0.0004704801249317825, 'timestamp': '2025-10-02 01:07:30.443890', 'step': 31409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:30.500924', 'step': 31409, 'epoch': 3}
{'type': 'loss', 'content': 0.06555483490228653, 'timestamp': '2025-10-02 01:07:30.503493', 'step': 31410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:30.558354', 'step': 31410, 'epoch': 3}
{'type': 'loss', 'content': 0.03628834709525108, 'timestamp': '2025-10-02 01:07:30.567568', 'step': 31411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:30.622624', 'step': 31411, 'epoch': 3}
{'type': 'loss', 'content': 0.03260235860943794, 'timestamp': '2025-10-02 01:07:30.632007', 'step': 31412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:30.693022', 'step': 31412, 'epoch': 3}
{'type': 'loss', 'content': 0.008625474758446217, 'timestamp': '2025-10-02 01:07:30.695548', 'step': 31413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:30.750025', 'step': 31413, 'epoch': 3}
{'type': 'loss', 'content': 0.1229008361697197, 'timestamp': '2025-10-02 01:07:30.752419', 'step': 31414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:30.808891', 'step': 31414, 'epoch': 3}
{'type': 'loss', 'content': 0.07546824216842651, 'timestamp': '2025-10-02 01:07:30.811436', 'step': 31415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:30.880763', 'step': 31415, 'epoch': 3}
{'type': 'loss', 'content': 0.018316324800252914, 'timestamp': '2025-10-02 01:07:30.887061', 'step': 31416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:30.941786', 'step': 31416, 'epoch': 3}
{'type': 'loss', 'content': 0.004782185424119234, 'timestamp': '2025-10-02 01:07:30.949182', 'step': 31417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:31.005629', 'step': 31417, 'epoch': 3}
{'type': 'loss', 'content': 0.0891813263297081, 'timestamp': '2025-10-02 01:07:31.011356', 'step': 31418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:31.074210', 'step': 31418, 'epoch': 3}
{'type': 'loss', 'content': 0.00547768222168088, 'timestamp': '2025-10-02 01:07:31.079150', 'step': 31419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:31.138802', 'step': 31419, 'epoch': 3}
{'type': 'loss', 'content': 0.016462542116642, 'timestamp': '2025-10-02 01:07:31.146635', 'step': 31420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:31.203192', 'step': 31420, 'epoch': 3}
{'type': 'loss', 'content': 0.012808025814592838, 'timestamp': '2025-10-02 01:07:31.210504', 'step': 31421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:31.267402', 'step': 31421, 'epoch': 3}
{'type': 'loss', 'content': 0.1426553875207901, 'timestamp': '2025-10-02 01:07:31.269816', 'step': 31422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:31.324979', 'step': 31422, 'epoch': 3}
{'type': 'loss', 'content': 0.03550427407026291, 'timestamp': '2025-10-02 01:07:31.328353', 'step': 31423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:31.383453', 'step': 31423, 'epoch': 3}
{'type': 'loss', 'content': 0.05250927433371544, 'timestamp': '2025-10-02 01:07:31.389512', 'step': 31424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:31.443261', 'step': 31424, 'epoch': 3}
{'type': 'loss', 'content': 0.038130924105644226, 'timestamp': '2025-10-02 01:07:31.445803', 'step': 31425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:31.511994', 'step': 31425, 'epoch': 3}
{'type': 'loss', 'content': 0.02735966071486473, 'timestamp': '2025-10-02 01:07:31.522431', 'step': 31426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:31.580932', 'step': 31426, 'epoch': 3}
{'type': 'loss', 'content': 0.031040960922837257, 'timestamp': '2025-10-02 01:07:31.586402', 'step': 31427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:31.645698', 'step': 31427, 'epoch': 3}
{'type': 'loss', 'content': 0.013836704194545746, 'timestamp': '2025-10-02 01:07:31.656700', 'step': 31428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:07:31.731900', 'step': 31428, 'epoch': 3}
{'type': 'loss', 'content': 0.011735305190086365, 'timestamp': '2025-10-02 01:07:31.744845', 'step': 31429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:31.807694', 'step': 31429, 'epoch': 3}
{'type': 'loss', 'content': 0.06281153112649918, 'timestamp': '2025-10-02 01:07:31.811159', 'step': 31430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:31.872867', 'step': 31430, 'epoch': 3}
{'type': 'loss', 'content': 0.10639063268899918, 'timestamp': '2025-10-02 01:07:31.875591', 'step': 31431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:31.930338', 'step': 31431, 'epoch': 3}
{'type': 'loss', 'content': 0.05493176728487015, 'timestamp': '2025-10-02 01:07:31.936820', 'step': 31432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:31.991959', 'step': 31432, 'epoch': 3}
{'type': 'loss', 'content': 0.06952080875635147, 'timestamp': '2025-10-02 01:07:31.999201', 'step': 31433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:32.053719', 'step': 31433, 'epoch': 3}
{'type': 'loss', 'content': 0.01783180609345436, 'timestamp': '2025-10-02 01:07:32.056412', 'step': 31434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:32.111537', 'step': 31434, 'epoch': 3}
{'type': 'loss', 'content': 0.01441527996212244, 'timestamp': '2025-10-02 01:07:32.118617', 'step': 31435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:32.175654', 'step': 31435, 'epoch': 3}
{'type': 'loss', 'content': 0.06580428779125214, 'timestamp': '2025-10-02 01:07:32.181414', 'step': 31436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:32.234905', 'step': 31436, 'epoch': 3}
{'type': 'loss', 'content': 0.003672555787488818, 'timestamp': '2025-10-02 01:07:32.237647', 'step': 31437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:32.292883', 'step': 31437, 'epoch': 3}
{'type': 'loss', 'content': 0.007372030522674322, 'timestamp': '2025-10-02 01:07:32.295562', 'step': 31438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:32.350108', 'step': 31438, 'epoch': 3}
{'type': 'loss', 'content': 0.15378065407276154, 'timestamp': '2025-10-02 01:07:32.352611', 'step': 31439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:32.408268', 'step': 31439, 'epoch': 3}
{'type': 'loss', 'content': 0.03506907820701599, 'timestamp': '2025-10-02 01:07:32.414804', 'step': 31440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:32.469713', 'step': 31440, 'epoch': 3}
{'type': 'loss', 'content': 0.11308775842189789, 'timestamp': '2025-10-02 01:07:32.472585', 'step': 31441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:32.527497', 'step': 31441, 'epoch': 3}
{'type': 'loss', 'content': 0.055329736322164536, 'timestamp': '2025-10-02 01:07:32.529800', 'step': 31442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:32.585659', 'step': 31442, 'epoch': 3}
{'type': 'loss', 'content': 0.01308321300894022, 'timestamp': '2025-10-02 01:07:32.588052', 'step': 31443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:32.642819', 'step': 31443, 'epoch': 3}
{'type': 'loss', 'content': 0.10652061551809311, 'timestamp': '2025-10-02 01:07:32.650429', 'step': 31444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:32.705430', 'step': 31444, 'epoch': 3}
{'type': 'loss', 'content': 0.02727297879755497, 'timestamp': '2025-10-02 01:07:32.707747', 'step': 31445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:32.762621', 'step': 31445, 'epoch': 3}
{'type': 'loss', 'content': 0.05213131010532379, 'timestamp': '2025-10-02 01:07:32.764768', 'step': 31446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:32.820518', 'step': 31446, 'epoch': 3}
{'type': 'loss', 'content': 0.0314878411591053, 'timestamp': '2025-10-02 01:07:32.822918', 'step': 31447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:32.877296', 'step': 31447, 'epoch': 3}
{'type': 'loss', 'content': 0.023152558133006096, 'timestamp': '2025-10-02 01:07:32.883376', 'step': 31448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:32.938132', 'step': 31448, 'epoch': 3}
{'type': 'loss', 'content': 0.029849005863070488, 'timestamp': '2025-10-02 01:07:32.945030', 'step': 31449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:33.000808', 'step': 31449, 'epoch': 3}
{'type': 'loss', 'content': 0.007760221604257822, 'timestamp': '2025-10-02 01:07:33.003052', 'step': 31450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:33.058086', 'step': 31450, 'epoch': 3}
{'type': 'loss', 'content': 0.03366682305932045, 'timestamp': '2025-10-02 01:07:33.060980', 'step': 31451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:33.122491', 'step': 31451, 'epoch': 3}
{'type': 'loss', 'content': 0.005525094456970692, 'timestamp': '2025-10-02 01:07:33.133711', 'step': 31452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:33.188226', 'step': 31452, 'epoch': 3}
{'type': 'loss', 'content': 0.03156481310725212, 'timestamp': '2025-10-02 01:07:33.190675', 'step': 31453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:33.247519', 'step': 31453, 'epoch': 3}
{'type': 'loss', 'content': 0.00011143970914417878, 'timestamp': '2025-10-02 01:07:33.253279', 'step': 31454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:33.309421', 'step': 31454, 'epoch': 3}
{'type': 'loss', 'content': 0.027279779314994812, 'timestamp': '2025-10-02 01:07:33.315004', 'step': 31455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:07:33.378632', 'step': 31455, 'epoch': 3}
{'type': 'loss', 'content': 0.004491589963436127, 'timestamp': '2025-10-02 01:07:33.390245', 'step': 31456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:33.445020', 'step': 31456, 'epoch': 3}
{'type': 'loss', 'content': 0.0779527872800827, 'timestamp': '2025-10-02 01:07:33.450522', 'step': 31457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:33.507152', 'step': 31457, 'epoch': 3}
{'type': 'loss', 'content': 0.016646234318614006, 'timestamp': '2025-10-02 01:07:33.514640', 'step': 31458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:33.571994', 'step': 31458, 'epoch': 3}
{'type': 'loss', 'content': 0.09204951673746109, 'timestamp': '2025-10-02 01:07:33.577419', 'step': 31459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:33.632370', 'step': 31459, 'epoch': 3}
{'type': 'loss', 'content': 0.0424659289419651, 'timestamp': '2025-10-02 01:07:33.638362', 'step': 31460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:33.697256', 'step': 31460, 'epoch': 3}
{'type': 'loss', 'content': 0.025659065693616867, 'timestamp': '2025-10-02 01:07:33.708224', 'step': 31461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:33.763727', 'step': 31461, 'epoch': 3}
{'type': 'loss', 'content': 0.047710712999105453, 'timestamp': '2025-10-02 01:07:33.765906', 'step': 31462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:33.821019', 'step': 31462, 'epoch': 3}
{'type': 'loss', 'content': 0.0310605950653553, 'timestamp': '2025-10-02 01:07:33.823465', 'step': 31463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:33.878064', 'step': 31463, 'epoch': 3}
{'type': 'loss', 'content': 0.04279139265418053, 'timestamp': '2025-10-02 01:07:33.883980', 'step': 31464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:33.938785', 'step': 31464, 'epoch': 3}
{'type': 'loss', 'content': 0.01978800818324089, 'timestamp': '2025-10-02 01:07:33.941298', 'step': 31465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:07:34.010220', 'step': 31465, 'epoch': 3}
{'type': 'loss', 'content': 0.03492486849427223, 'timestamp': '2025-10-02 01:07:34.022511', 'step': 31466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:34.078100', 'step': 31466, 'epoch': 3}
{'type': 'loss', 'content': 0.005844429135322571, 'timestamp': '2025-10-02 01:07:34.080713', 'step': 31467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:34.135898', 'step': 31467, 'epoch': 3}
{'type': 'loss', 'content': 0.004014317411929369, 'timestamp': '2025-10-02 01:07:34.143043', 'step': 31468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:07:34.213511', 'step': 31468, 'epoch': 3}
{'type': 'loss', 'content': 0.016175569966435432, 'timestamp': '2025-10-02 01:07:34.227069', 'step': 31469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:34.287945', 'step': 31469, 'epoch': 3}
{'type': 'loss', 'content': 0.08516430109739304, 'timestamp': '2025-10-02 01:07:34.298142', 'step': 31470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:34.357179', 'step': 31470, 'epoch': 3}
{'type': 'loss', 'content': 0.020906442776322365, 'timestamp': '2025-10-02 01:07:34.366693', 'step': 31471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:34.422488', 'step': 31471, 'epoch': 3}
{'type': 'loss', 'content': 0.030127478763461113, 'timestamp': '2025-10-02 01:07:34.430026', 'step': 31472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:34.485879', 'step': 31472, 'epoch': 3}
{'type': 'loss', 'content': 0.018814612179994583, 'timestamp': '2025-10-02 01:07:34.488803', 'step': 31473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:34.552331', 'step': 31473, 'epoch': 3}
{'type': 'loss', 'content': 0.007916313596069813, 'timestamp': '2025-10-02 01:07:34.562743', 'step': 31474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:34.618906', 'step': 31474, 'epoch': 3}
{'type': 'loss', 'content': 0.06424279510974884, 'timestamp': '2025-10-02 01:07:34.621261', 'step': 31475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:34.685726', 'step': 31475, 'epoch': 3}
{'type': 'loss', 'content': 0.004855224397033453, 'timestamp': '2025-10-02 01:07:34.696976', 'step': 31476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:34.752694', 'step': 31476, 'epoch': 3}
{'type': 'loss', 'content': 0.017901966348290443, 'timestamp': '2025-10-02 01:07:34.762889', 'step': 31477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:34.820156', 'step': 31477, 'epoch': 3}
{'type': 'loss', 'content': 0.06458902359008789, 'timestamp': '2025-10-02 01:07:34.823991', 'step': 31478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:34.880580', 'step': 31478, 'epoch': 3}
{'type': 'loss', 'content': 0.06858804076910019, 'timestamp': '2025-10-02 01:07:34.883638', 'step': 31479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:34.939692', 'step': 31479, 'epoch': 3}
{'type': 'loss', 'content': 0.033694565296173096, 'timestamp': '2025-10-02 01:07:34.946785', 'step': 31480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:35.004516', 'step': 31480, 'epoch': 3}
{'type': 'loss', 'content': 0.017859283834695816, 'timestamp': '2025-10-02 01:07:35.013491', 'step': 31481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:35.070540', 'step': 31481, 'epoch': 3}
{'type': 'loss', 'content': 0.10928840935230255, 'timestamp': '2025-10-02 01:07:35.073144', 'step': 31482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:35.131638', 'step': 31482, 'epoch': 3}
{'type': 'loss', 'content': 0.0431184284389019, 'timestamp': '2025-10-02 01:07:35.134782', 'step': 31483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:35.198518', 'step': 31483, 'epoch': 3}
{'type': 'loss', 'content': 0.033500343561172485, 'timestamp': '2025-10-02 01:07:35.209742', 'step': 31484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:35.264496', 'step': 31484, 'epoch': 3}
{'type': 'loss', 'content': 0.0631672814488411, 'timestamp': '2025-10-02 01:07:35.267488', 'step': 31485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:35.323256', 'step': 31485, 'epoch': 3}
{'type': 'loss', 'content': 0.039215654134750366, 'timestamp': '2025-10-02 01:07:35.328886', 'step': 31486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:35.386813', 'step': 31486, 'epoch': 3}
{'type': 'loss', 'content': 0.005093125626444817, 'timestamp': '2025-10-02 01:07:35.393957', 'step': 31487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:35.451002', 'step': 31487, 'epoch': 3}
{'type': 'loss', 'content': 0.003778371261432767, 'timestamp': '2025-10-02 01:07:35.457224', 'step': 31488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:35.514682', 'step': 31488, 'epoch': 3}
{'type': 'loss', 'content': 0.13620708882808685, 'timestamp': '2025-10-02 01:07:35.516851', 'step': 31489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:35.574693', 'step': 31489, 'epoch': 3}
{'type': 'loss', 'content': 0.05828182026743889, 'timestamp': '2025-10-02 01:07:35.579803', 'step': 31490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:35.637716', 'step': 31490, 'epoch': 3}
{'type': 'loss', 'content': 0.0590292327105999, 'timestamp': '2025-10-02 01:07:35.640668', 'step': 31491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:35.698644', 'step': 31491, 'epoch': 3}
{'type': 'loss', 'content': 0.08549603074789047, 'timestamp': '2025-10-02 01:07:35.704798', 'step': 31492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:35.759232', 'step': 31492, 'epoch': 3}
{'type': 'loss', 'content': 0.07857996225357056, 'timestamp': '2025-10-02 01:07:35.769144', 'step': 31493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:35.828760', 'step': 31493, 'epoch': 3}
{'type': 'loss', 'content': 0.02440713904798031, 'timestamp': '2025-10-02 01:07:35.838930', 'step': 31494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:35.893537', 'step': 31494, 'epoch': 3}
{'type': 'loss', 'content': 0.048977240920066833, 'timestamp': '2025-10-02 01:07:35.896276', 'step': 31495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:35.950902', 'step': 31495, 'epoch': 3}
{'type': 'loss', 'content': 0.06668835878372192, 'timestamp': '2025-10-02 01:07:35.957991', 'step': 31496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:36.014440', 'step': 31496, 'epoch': 3}
{'type': 'loss', 'content': 0.03143325820565224, 'timestamp': '2025-10-02 01:07:36.021787', 'step': 31497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:36.076330', 'step': 31497, 'epoch': 3}
{'type': 'loss', 'content': 0.04165111482143402, 'timestamp': '2025-10-02 01:07:36.078857', 'step': 31498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:36.134890', 'step': 31498, 'epoch': 3}
{'type': 'loss', 'content': 0.03943910077214241, 'timestamp': '2025-10-02 01:07:36.144402', 'step': 31499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:36.199660', 'step': 31499, 'epoch': 3}
{'type': 'loss', 'content': 0.032866548746824265, 'timestamp': '2025-10-02 01:07:36.205549', 'step': 31500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 31500', 'timestamp': '2025-10-02 01:07:36.595436', 'step': 31500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:36.652478', 'step': 31500, 'epoch': 3}
{'type': 'loss', 'content': 0.04761020466685295, 'timestamp': '2025-10-02 01:07:36.654945', 'step': 31501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:36.716147', 'step': 31501, 'epoch': 3}
{'type': 'loss', 'content': 0.010928882285952568, 'timestamp': '2025-10-02 01:07:36.726586', 'step': 31502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:36.781443', 'step': 31502, 'epoch': 3}
{'type': 'loss', 'content': 0.014733243733644485, 'timestamp': '2025-10-02 01:07:36.783907', 'step': 31503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:36.839423', 'step': 31503, 'epoch': 3}
{'type': 'loss', 'content': 0.012470412999391556, 'timestamp': '2025-10-02 01:07:36.845640', 'step': 31504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:36.906432', 'step': 31504, 'epoch': 3}
{'type': 'loss', 'content': 0.0003113814163953066, 'timestamp': '2025-10-02 01:07:36.917767', 'step': 31505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:36.973632', 'step': 31505, 'epoch': 3}
{'type': 'loss', 'content': 0.021896513178944588, 'timestamp': '2025-10-02 01:07:36.976221', 'step': 31506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:37.030867', 'step': 31506, 'epoch': 3}
{'type': 'loss', 'content': 0.03767973184585571, 'timestamp': '2025-10-02 01:07:37.033388', 'step': 31507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:37.087694', 'step': 31507, 'epoch': 3}
{'type': 'loss', 'content': 0.16496020555496216, 'timestamp': '2025-10-02 01:07:37.093759', 'step': 31508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:37.148565', 'step': 31508, 'epoch': 3}
{'type': 'loss', 'content': 0.0028578126803040504, 'timestamp': '2025-10-02 01:07:37.154170', 'step': 31509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:37.209557', 'step': 31509, 'epoch': 3}
{'type': 'loss', 'content': 0.019385064020752907, 'timestamp': '2025-10-02 01:07:37.215150', 'step': 31510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:37.270808', 'step': 31510, 'epoch': 3}
{'type': 'loss', 'content': 0.015103439800441265, 'timestamp': '2025-10-02 01:07:37.278114', 'step': 31511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:37.334852', 'step': 31511, 'epoch': 3}
{'type': 'loss', 'content': 0.014879485592246056, 'timestamp': '2025-10-02 01:07:37.342958', 'step': 31512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:37.396505', 'step': 31512, 'epoch': 3}
{'type': 'loss', 'content': 0.11655673384666443, 'timestamp': '2025-10-02 01:07:37.398873', 'step': 31513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:37.454110', 'step': 31513, 'epoch': 3}
{'type': 'loss', 'content': 0.008033350110054016, 'timestamp': '2025-10-02 01:07:37.456731', 'step': 31514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:37.511881', 'step': 31514, 'epoch': 3}
{'type': 'loss', 'content': 0.029050659388303757, 'timestamp': '2025-10-02 01:07:37.519000', 'step': 31515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:37.572930', 'step': 31515, 'epoch': 3}
{'type': 'loss', 'content': 0.07602635771036148, 'timestamp': '2025-10-02 01:07:37.579011', 'step': 31516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:37.632739', 'step': 31516, 'epoch': 3}
{'type': 'loss', 'content': 0.03545365482568741, 'timestamp': '2025-10-02 01:07:37.635281', 'step': 31517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:37.689872', 'step': 31517, 'epoch': 3}
{'type': 'loss', 'content': 0.035328060388565063, 'timestamp': '2025-10-02 01:07:37.692033', 'step': 31518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:37.745696', 'step': 31518, 'epoch': 3}
{'type': 'loss', 'content': 0.03780899941921234, 'timestamp': '2025-10-02 01:07:37.748044', 'step': 31519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:37.801783', 'step': 31519, 'epoch': 3}
{'type': 'loss', 'content': 0.017980357632040977, 'timestamp': '2025-10-02 01:07:37.808020', 'step': 31520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:37.861446', 'step': 31520, 'epoch': 3}
{'type': 'loss', 'content': 0.021938588470220566, 'timestamp': '2025-10-02 01:07:37.863618', 'step': 31521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:07:37.932507', 'step': 31521, 'epoch': 3}
{'type': 'loss', 'content': 0.04545660316944122, 'timestamp': '2025-10-02 01:07:37.944770', 'step': 31522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:38.003958', 'step': 31522, 'epoch': 3}
{'type': 'loss', 'content': 0.03456978127360344, 'timestamp': '2025-10-02 01:07:38.014132', 'step': 31523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:38.076733', 'step': 31523, 'epoch': 3}
{'type': 'loss', 'content': 0.09206680208444595, 'timestamp': '2025-10-02 01:07:38.083156', 'step': 31524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:38.137347', 'step': 31524, 'epoch': 3}
{'type': 'loss', 'content': 0.03003375604748726, 'timestamp': '2025-10-02 01:07:38.140615', 'step': 31525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:38.195407', 'step': 31525, 'epoch': 3}
{'type': 'loss', 'content': 0.05657289922237396, 'timestamp': '2025-10-02 01:07:38.197882', 'step': 31526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:38.252916', 'step': 31526, 'epoch': 3}
{'type': 'loss', 'content': 0.0025189651641994715, 'timestamp': '2025-10-02 01:07:38.255475', 'step': 31527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:38.309650', 'step': 31527, 'epoch': 3}
{'type': 'loss', 'content': 0.01548053976148367, 'timestamp': '2025-10-02 01:07:38.316157', 'step': 31528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:38.370152', 'step': 31528, 'epoch': 3}
{'type': 'loss', 'content': 0.03147676959633827, 'timestamp': '2025-10-02 01:07:38.372324', 'step': 31529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:38.427660', 'step': 31529, 'epoch': 3}
{'type': 'loss', 'content': 0.01228305697441101, 'timestamp': '2025-10-02 01:07:38.436963', 'step': 31530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:38.491579', 'step': 31530, 'epoch': 3}
{'type': 'loss', 'content': 0.06087043881416321, 'timestamp': '2025-10-02 01:07:38.493598', 'step': 31531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:38.548731', 'step': 31531, 'epoch': 3}
{'type': 'loss', 'content': 0.06031615287065506, 'timestamp': '2025-10-02 01:07:38.555752', 'step': 31532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:38.609769', 'step': 31532, 'epoch': 3}
{'type': 'loss', 'content': 0.04145142808556557, 'timestamp': '2025-10-02 01:07:38.612096', 'step': 31533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:38.666773', 'step': 31533, 'epoch': 3}
{'type': 'loss', 'content': 0.08519865572452545, 'timestamp': '2025-10-02 01:07:38.669038', 'step': 31534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:38.723650', 'step': 31534, 'epoch': 3}
{'type': 'loss', 'content': 0.009744122624397278, 'timestamp': '2025-10-02 01:07:38.730906', 'step': 31535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:38.792288', 'step': 31535, 'epoch': 3}
{'type': 'loss', 'content': 0.00980497058480978, 'timestamp': '2025-10-02 01:07:38.803545', 'step': 31536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:38.861629', 'step': 31536, 'epoch': 3}
{'type': 'loss', 'content': 0.05679073557257652, 'timestamp': '2025-10-02 01:07:38.872570', 'step': 31537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:38.926871', 'step': 31537, 'epoch': 3}
{'type': 'loss', 'content': 0.016203748062253, 'timestamp': '2025-10-02 01:07:38.928928', 'step': 31538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:38.984373', 'step': 31538, 'epoch': 3}
{'type': 'loss', 'content': 0.052570369094610214, 'timestamp': '2025-10-02 01:07:38.993906', 'step': 31539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:39.048917', 'step': 31539, 'epoch': 3}
{'type': 'loss', 'content': 0.044682614505290985, 'timestamp': '2025-10-02 01:07:39.055612', 'step': 31540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:39.109507', 'step': 31540, 'epoch': 3}
{'type': 'loss', 'content': 0.07062679529190063, 'timestamp': '2025-10-02 01:07:39.116910', 'step': 31541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:39.173069', 'step': 31541, 'epoch': 3}
{'type': 'loss', 'content': 0.01358839776366949, 'timestamp': '2025-10-02 01:07:39.175547', 'step': 31542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:39.229240', 'step': 31542, 'epoch': 3}
{'type': 'loss', 'content': 0.11927791684865952, 'timestamp': '2025-10-02 01:07:39.232534', 'step': 31543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:07:39.301999', 'step': 31543, 'epoch': 3}
{'type': 'loss', 'content': 0.00028730768826790154, 'timestamp': '2025-10-02 01:07:39.315062', 'step': 31544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:39.368830', 'step': 31544, 'epoch': 3}
{'type': 'loss', 'content': 0.01686706766486168, 'timestamp': '2025-10-02 01:07:39.371921', 'step': 31545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:39.427068', 'step': 31545, 'epoch': 3}
{'type': 'loss', 'content': 0.00040210841689258814, 'timestamp': '2025-10-02 01:07:39.429640', 'step': 31546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:07:39.491305', 'step': 31546, 'epoch': 3}
{'type': 'loss', 'content': 0.035591643303632736, 'timestamp': '2025-10-02 01:07:39.501953', 'step': 31547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:39.557516', 'step': 31547, 'epoch': 3}
{'type': 'loss', 'content': 0.023349257186055183, 'timestamp': '2025-10-02 01:07:39.563956', 'step': 31548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:39.623661', 'step': 31548, 'epoch': 3}
{'type': 'loss', 'content': 0.03512562811374664, 'timestamp': '2025-10-02 01:07:39.634631', 'step': 31549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:39.689142', 'step': 31549, 'epoch': 3}
{'type': 'loss', 'content': 0.03824557363986969, 'timestamp': '2025-10-02 01:07:39.691802', 'step': 31550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:39.746740', 'step': 31550, 'epoch': 3}
{'type': 'loss', 'content': 0.0628328025341034, 'timestamp': '2025-10-02 01:07:39.754130', 'step': 31551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:39.808690', 'step': 31551, 'epoch': 3}
{'type': 'loss', 'content': 0.08117853850126266, 'timestamp': '2025-10-02 01:07:39.816988', 'step': 31552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:07:39.878151', 'step': 31552, 'epoch': 3}
{'type': 'loss', 'content': 0.007893159054219723, 'timestamp': '2025-10-02 01:07:39.889700', 'step': 31553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:39.944088', 'step': 31553, 'epoch': 3}
{'type': 'loss', 'content': 0.054504379630088806, 'timestamp': '2025-10-02 01:07:39.946678', 'step': 31554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:40.001142', 'step': 31554, 'epoch': 3}
{'type': 'loss', 'content': 0.0303228497505188, 'timestamp': '2025-10-02 01:07:40.008707', 'step': 31555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:40.064964', 'step': 31555, 'epoch': 3}
{'type': 'loss', 'content': 0.04564288631081581, 'timestamp': '2025-10-02 01:07:40.070872', 'step': 31556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:40.123999', 'step': 31556, 'epoch': 3}
{'type': 'loss', 'content': 0.07985610514879227, 'timestamp': '2025-10-02 01:07:40.126534', 'step': 31557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:40.180189', 'step': 31557, 'epoch': 3}
{'type': 'loss', 'content': 0.07805849611759186, 'timestamp': '2025-10-02 01:07:40.182667', 'step': 31558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:40.237211', 'step': 31558, 'epoch': 3}
{'type': 'loss', 'content': 0.05089007318019867, 'timestamp': '2025-10-02 01:07:40.239641', 'step': 31559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:40.293768', 'step': 31559, 'epoch': 3}
{'type': 'loss', 'content': 0.09180307388305664, 'timestamp': '2025-10-02 01:07:40.301013', 'step': 31560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:40.355180', 'step': 31560, 'epoch': 3}
{'type': 'loss', 'content': 0.02417084574699402, 'timestamp': '2025-10-02 01:07:40.357685', 'step': 31561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:40.412526', 'step': 31561, 'epoch': 3}
{'type': 'loss', 'content': 0.11784716695547104, 'timestamp': '2025-10-02 01:07:40.415324', 'step': 31562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:40.469642', 'step': 31562, 'epoch': 3}
{'type': 'loss', 'content': 0.04774888977408409, 'timestamp': '2025-10-02 01:07:40.472248', 'step': 31563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:40.526676', 'step': 31563, 'epoch': 3}
{'type': 'loss', 'content': 0.048379864543676376, 'timestamp': '2025-10-02 01:07:40.533207', 'step': 31564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:40.596492', 'step': 31564, 'epoch': 3}
{'type': 'loss', 'content': 0.030381469056010246, 'timestamp': '2025-10-02 01:07:40.599057', 'step': 31565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:40.653202', 'step': 31565, 'epoch': 3}
{'type': 'loss', 'content': 0.031088663265109062, 'timestamp': '2025-10-02 01:07:40.660916', 'step': 31566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:40.715845', 'step': 31566, 'epoch': 3}
{'type': 'loss', 'content': 0.04137474671006203, 'timestamp': '2025-10-02 01:07:40.718255', 'step': 31567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:40.773611', 'step': 31567, 'epoch': 3}
{'type': 'loss', 'content': 0.038876160979270935, 'timestamp': '2025-10-02 01:07:40.779583', 'step': 31568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:40.833849', 'step': 31568, 'epoch': 3}
{'type': 'loss', 'content': 0.10061298310756683, 'timestamp': '2025-10-02 01:07:40.836679', 'step': 31569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:40.891603', 'step': 31569, 'epoch': 3}
{'type': 'loss', 'content': 0.06104673817753792, 'timestamp': '2025-10-02 01:07:40.899288', 'step': 31570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:40.953676', 'step': 31570, 'epoch': 3}
{'type': 'loss', 'content': 0.021947981789708138, 'timestamp': '2025-10-02 01:07:40.956097', 'step': 31571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:07:41.018308', 'step': 31571, 'epoch': 3}
{'type': 'loss', 'content': 0.038157373666763306, 'timestamp': '2025-10-02 01:07:41.029752', 'step': 31572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:41.083436', 'step': 31572, 'epoch': 3}
{'type': 'loss', 'content': 0.046713341027498245, 'timestamp': '2025-10-02 01:07:41.085951', 'step': 31573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:41.141332', 'step': 31573, 'epoch': 3}
{'type': 'loss', 'content': 0.11064425855875015, 'timestamp': '2025-10-02 01:07:41.147120', 'step': 31574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:41.202317', 'step': 31574, 'epoch': 3}
{'type': 'loss', 'content': 0.10605919361114502, 'timestamp': '2025-10-02 01:07:41.205024', 'step': 31575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:41.260750', 'step': 31575, 'epoch': 3}
{'type': 'loss', 'content': 0.07265495508909225, 'timestamp': '2025-10-02 01:07:41.266933', 'step': 31576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:41.321714', 'step': 31576, 'epoch': 3}
{'type': 'loss', 'content': 0.0611802376806736, 'timestamp': '2025-10-02 01:07:41.327931', 'step': 31577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:41.381769', 'step': 31577, 'epoch': 3}
{'type': 'loss', 'content': 0.08800828456878662, 'timestamp': '2025-10-02 01:07:41.384132', 'step': 31578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:41.438673', 'step': 31578, 'epoch': 3}
{'type': 'loss', 'content': 0.04992116242647171, 'timestamp': '2025-10-02 01:07:41.441437', 'step': 31579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:41.496094', 'step': 31579, 'epoch': 3}
{'type': 'loss', 'content': 0.05694124102592468, 'timestamp': '2025-10-02 01:07:41.506262', 'step': 31580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:41.559622', 'step': 31580, 'epoch': 3}
{'type': 'loss', 'content': 0.06478226184844971, 'timestamp': '2025-10-02 01:07:41.561955', 'step': 31581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:41.616839', 'step': 31581, 'epoch': 3}
{'type': 'loss', 'content': 0.024484921246767044, 'timestamp': '2025-10-02 01:07:41.619721', 'step': 31582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:41.675413', 'step': 31582, 'epoch': 3}
{'type': 'loss', 'content': 0.01266638096421957, 'timestamp': '2025-10-02 01:07:41.677980', 'step': 31583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:41.732228', 'step': 31583, 'epoch': 3}
{'type': 'loss', 'content': 0.05422699451446533, 'timestamp': '2025-10-02 01:07:41.738489', 'step': 31584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:41.791712', 'step': 31584, 'epoch': 3}
{'type': 'loss', 'content': 0.25712093710899353, 'timestamp': '2025-10-02 01:07:41.794031', 'step': 31585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:41.848611', 'step': 31585, 'epoch': 3}
{'type': 'loss', 'content': 0.033684391528367996, 'timestamp': '2025-10-02 01:07:41.856197', 'step': 31586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:41.912802', 'step': 31586, 'epoch': 3}
{'type': 'loss', 'content': 0.02584237977862358, 'timestamp': '2025-10-02 01:07:41.915404', 'step': 31587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:41.970342', 'step': 31587, 'epoch': 3}
{'type': 'loss', 'content': 0.04505457356572151, 'timestamp': '2025-10-02 01:07:41.976357', 'step': 31588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:42.030261', 'step': 31588, 'epoch': 3}
{'type': 'loss', 'content': 0.029596887528896332, 'timestamp': '2025-10-02 01:07:42.032842', 'step': 31589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:42.087802', 'step': 31589, 'epoch': 3}
{'type': 'loss', 'content': 0.05474790930747986, 'timestamp': '2025-10-02 01:07:42.090458', 'step': 31590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:42.144685', 'step': 31590, 'epoch': 3}
{'type': 'loss', 'content': 0.06630612909793854, 'timestamp': '2025-10-02 01:07:42.147525', 'step': 31591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:42.201738', 'step': 31591, 'epoch': 3}
{'type': 'loss', 'content': 0.05323157086968422, 'timestamp': '2025-10-02 01:07:42.211833', 'step': 31592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:42.266587', 'step': 31592, 'epoch': 3}
{'type': 'loss', 'content': 0.17384250462055206, 'timestamp': '2025-10-02 01:07:42.269242', 'step': 31593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:42.325279', 'step': 31593, 'epoch': 3}
{'type': 'loss', 'content': 0.006614352576434612, 'timestamp': '2025-10-02 01:07:42.332847', 'step': 31594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:42.388268', 'step': 31594, 'epoch': 3}
{'type': 'loss', 'content': 0.06245603784918785, 'timestamp': '2025-10-02 01:07:42.390603', 'step': 31595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:42.451603', 'step': 31595, 'epoch': 3}
{'type': 'loss', 'content': 0.08343476057052612, 'timestamp': '2025-10-02 01:07:42.462840', 'step': 31596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:42.517343', 'step': 31596, 'epoch': 3}
{'type': 'loss', 'content': 0.004806171637028456, 'timestamp': '2025-10-02 01:07:42.525135', 'step': 31597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:42.585608', 'step': 31597, 'epoch': 3}
{'type': 'loss', 'content': 0.014548799023032188, 'timestamp': '2025-10-02 01:07:42.595745', 'step': 31598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:42.651297', 'step': 31598, 'epoch': 3}
{'type': 'loss', 'content': 0.011592515744268894, 'timestamp': '2025-10-02 01:07:42.658781', 'step': 31599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:42.712327', 'step': 31599, 'epoch': 3}
{'type': 'loss', 'content': 0.0666322335600853, 'timestamp': '2025-10-02 01:07:42.718690', 'step': 31600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:42.774216', 'step': 31600, 'epoch': 3}
{'type': 'loss', 'content': 0.0791410580277443, 'timestamp': '2025-10-02 01:07:42.776705', 'step': 31601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:42.831646', 'step': 31601, 'epoch': 3}
{'type': 'loss', 'content': 0.04321960732340813, 'timestamp': '2025-10-02 01:07:42.834649', 'step': 31602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:42.889155', 'step': 31602, 'epoch': 3}
{'type': 'loss', 'content': 0.10623923689126968, 'timestamp': '2025-10-02 01:07:42.891667', 'step': 31603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:42.945696', 'step': 31603, 'epoch': 3}
{'type': 'loss', 'content': 0.03036687895655632, 'timestamp': '2025-10-02 01:07:42.951708', 'step': 31604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:43.005794', 'step': 31604, 'epoch': 3}
{'type': 'loss', 'content': 0.029831480234861374, 'timestamp': '2025-10-02 01:07:43.016094', 'step': 31605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:43.071103', 'step': 31605, 'epoch': 3}
{'type': 'loss', 'content': 0.04588873311877251, 'timestamp': '2025-10-02 01:07:43.074443', 'step': 31606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:43.130835', 'step': 31606, 'epoch': 3}
{'type': 'loss', 'content': 0.030789706856012344, 'timestamp': '2025-10-02 01:07:43.140318', 'step': 31607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:43.195916', 'step': 31607, 'epoch': 3}
{'type': 'loss', 'content': 0.09546075016260147, 'timestamp': '2025-10-02 01:07:43.202927', 'step': 31608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:43.259857', 'step': 31608, 'epoch': 3}
{'type': 'loss', 'content': 0.009500782005488873, 'timestamp': '2025-10-02 01:07:43.262746', 'step': 31609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:43.319170', 'step': 31609, 'epoch': 3}
{'type': 'loss', 'content': 0.04357311874628067, 'timestamp': '2025-10-02 01:07:43.321887', 'step': 31610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:43.378532', 'step': 31610, 'epoch': 3}
{'type': 'loss', 'content': 0.0049665686674416065, 'timestamp': '2025-10-02 01:07:43.381688', 'step': 31611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:43.438736', 'step': 31611, 'epoch': 3}
{'type': 'loss', 'content': 0.05665052682161331, 'timestamp': '2025-10-02 01:07:43.445197', 'step': 31612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:07:43.512807', 'step': 31612, 'epoch': 3}
{'type': 'loss', 'content': 0.009802213869988918, 'timestamp': '2025-10-02 01:07:43.525756', 'step': 31613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:43.582507', 'step': 31613, 'epoch': 3}
{'type': 'loss', 'content': 0.024398043751716614, 'timestamp': '2025-10-02 01:07:43.590209', 'step': 31614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:43.645877', 'step': 31614, 'epoch': 3}
{'type': 'loss', 'content': 0.01746251806616783, 'timestamp': '2025-10-02 01:07:43.648966', 'step': 31615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:43.704987', 'step': 31615, 'epoch': 3}
{'type': 'loss', 'content': 0.064982570707798, 'timestamp': '2025-10-02 01:07:43.711383', 'step': 31616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:43.766152', 'step': 31616, 'epoch': 3}
{'type': 'loss', 'content': 0.04692467674612999, 'timestamp': '2025-10-02 01:07:43.768663', 'step': 31617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:43.826073', 'step': 31617, 'epoch': 3}
{'type': 'loss', 'content': 0.03524232283234596, 'timestamp': '2025-10-02 01:07:43.833812', 'step': 31618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:43.891092', 'step': 31618, 'epoch': 3}
{'type': 'loss', 'content': 0.005736000370234251, 'timestamp': '2025-10-02 01:07:43.898560', 'step': 31619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:43.955698', 'step': 31619, 'epoch': 3}
{'type': 'loss', 'content': 0.07226989418268204, 'timestamp': '2025-10-02 01:07:43.965837', 'step': 31620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:44.023542', 'step': 31620, 'epoch': 3}
{'type': 'loss', 'content': 0.0202158335596323, 'timestamp': '2025-10-02 01:07:44.033851', 'step': 31621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:07:44.099015', 'step': 31621, 'epoch': 3}
{'type': 'loss', 'content': 0.000281174317933619, 'timestamp': '2025-10-02 01:07:44.109684', 'step': 31622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:44.165888', 'step': 31622, 'epoch': 3}
{'type': 'loss', 'content': 0.03184229135513306, 'timestamp': '2025-10-02 01:07:44.169909', 'step': 31623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:44.225003', 'step': 31623, 'epoch': 3}
{'type': 'loss', 'content': 0.06050051748752594, 'timestamp': '2025-10-02 01:07:44.232996', 'step': 31624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:44.287104', 'step': 31624, 'epoch': 3}
{'type': 'loss', 'content': 0.025121547281742096, 'timestamp': '2025-10-02 01:07:44.293116', 'step': 31625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:44.347333', 'step': 31625, 'epoch': 3}
{'type': 'loss', 'content': 0.03124038688838482, 'timestamp': '2025-10-02 01:07:44.350205', 'step': 31626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:44.406384', 'step': 31626, 'epoch': 3}
{'type': 'loss', 'content': 0.03849644586443901, 'timestamp': '2025-10-02 01:07:44.415925', 'step': 31627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:44.470139', 'step': 31627, 'epoch': 3}
{'type': 'loss', 'content': 0.07881371676921844, 'timestamp': '2025-10-02 01:07:44.477699', 'step': 31628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:44.532955', 'step': 31628, 'epoch': 3}
{'type': 'loss', 'content': 0.031270768493413925, 'timestamp': '2025-10-02 01:07:44.535584', 'step': 31629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:44.590615', 'step': 31629, 'epoch': 3}
{'type': 'loss', 'content': 0.04649341106414795, 'timestamp': '2025-10-02 01:07:44.598413', 'step': 31630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:44.654286', 'step': 31630, 'epoch': 3}
{'type': 'loss', 'content': 0.07412578165531158, 'timestamp': '2025-10-02 01:07:44.656586', 'step': 31631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:44.711155', 'step': 31631, 'epoch': 3}
{'type': 'loss', 'content': 0.020129157230257988, 'timestamp': '2025-10-02 01:07:44.717302', 'step': 31632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:44.770669', 'step': 31632, 'epoch': 3}
{'type': 'loss', 'content': 0.022185184061527252, 'timestamp': '2025-10-02 01:07:44.773218', 'step': 31633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:44.828696', 'step': 31633, 'epoch': 3}
{'type': 'loss', 'content': 0.14043916761875153, 'timestamp': '2025-10-02 01:07:44.831251', 'step': 31634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:44.886681', 'step': 31634, 'epoch': 3}
{'type': 'loss', 'content': 0.005513348616659641, 'timestamp': '2025-10-02 01:07:44.889380', 'step': 31635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:44.943078', 'step': 31635, 'epoch': 3}
{'type': 'loss', 'content': 0.03561576455831528, 'timestamp': '2025-10-02 01:07:44.950521', 'step': 31636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:45.004518', 'step': 31636, 'epoch': 3}
{'type': 'loss', 'content': 0.06298363953828812, 'timestamp': '2025-10-02 01:07:45.007151', 'step': 31637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:45.061128', 'step': 31637, 'epoch': 3}
{'type': 'loss', 'content': 0.10221648216247559, 'timestamp': '2025-10-02 01:07:45.063449', 'step': 31638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:45.118107', 'step': 31638, 'epoch': 3}
{'type': 'loss', 'content': 0.012543032877147198, 'timestamp': '2025-10-02 01:07:45.125670', 'step': 31639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:45.179908', 'step': 31639, 'epoch': 3}
{'type': 'loss', 'content': 0.028051747009158134, 'timestamp': '2025-10-02 01:07:45.185862', 'step': 31640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:07:45.246664', 'step': 31640, 'epoch': 3}
{'type': 'loss', 'content': 0.021557366475462914, 'timestamp': '2025-10-02 01:07:45.258170', 'step': 31641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:45.313041', 'step': 31641, 'epoch': 3}
{'type': 'loss', 'content': 0.1188281998038292, 'timestamp': '2025-10-02 01:07:45.315696', 'step': 31642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:45.369099', 'step': 31642, 'epoch': 3}
{'type': 'loss', 'content': 0.014595414511859417, 'timestamp': '2025-10-02 01:07:45.372124', 'step': 31643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:45.425824', 'step': 31643, 'epoch': 3}
{'type': 'loss', 'content': 0.14365476369857788, 'timestamp': '2025-10-02 01:07:45.431964', 'step': 31644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:45.485007', 'step': 31644, 'epoch': 3}
{'type': 'loss', 'content': 0.000587832008022815, 'timestamp': '2025-10-02 01:07:45.494873', 'step': 31645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:45.549667', 'step': 31645, 'epoch': 3}
{'type': 'loss', 'content': 0.01683199405670166, 'timestamp': '2025-10-02 01:07:45.552157', 'step': 31646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:07:45.624878', 'step': 31646, 'epoch': 3}
{'type': 'loss', 'content': 0.014443564228713512, 'timestamp': '2025-10-02 01:07:45.637481', 'step': 31647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:45.691892', 'step': 31647, 'epoch': 3}
{'type': 'loss', 'content': 0.04025646299123764, 'timestamp': '2025-10-02 01:07:45.700455', 'step': 31648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:45.755485', 'step': 31648, 'epoch': 3}
{'type': 'loss', 'content': 0.07544250786304474, 'timestamp': '2025-10-02 01:07:45.765258', 'step': 31649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:45.819289', 'step': 31649, 'epoch': 3}
{'type': 'loss', 'content': 0.061247505247592926, 'timestamp': '2025-10-02 01:07:45.821972', 'step': 31650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:45.877141', 'step': 31650, 'epoch': 3}
{'type': 'loss', 'content': 0.0915442407131195, 'timestamp': '2025-10-02 01:07:45.879862', 'step': 31651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:45.934340', 'step': 31651, 'epoch': 3}
{'type': 'loss', 'content': 0.0609242282807827, 'timestamp': '2025-10-02 01:07:45.940112', 'step': 31652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:45.994704', 'step': 31652, 'epoch': 3}
{'type': 'loss', 'content': 0.06781210005283356, 'timestamp': '2025-10-02 01:07:45.997245', 'step': 31653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:46.052070', 'step': 31653, 'epoch': 3}
{'type': 'loss', 'content': 0.06785046309232712, 'timestamp': '2025-10-02 01:07:46.054671', 'step': 31654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:46.110154', 'step': 31654, 'epoch': 3}
{'type': 'loss', 'content': 0.0857255756855011, 'timestamp': '2025-10-02 01:07:46.113836', 'step': 31655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:46.168408', 'step': 31655, 'epoch': 3}
{'type': 'loss', 'content': 0.06112586706876755, 'timestamp': '2025-10-02 01:07:46.174650', 'step': 31656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:46.229069', 'step': 31656, 'epoch': 3}
{'type': 'loss', 'content': 0.01036383118480444, 'timestamp': '2025-10-02 01:07:46.235081', 'step': 31657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:46.289480', 'step': 31657, 'epoch': 3}
{'type': 'loss', 'content': 0.10550852119922638, 'timestamp': '2025-10-02 01:07:46.291778', 'step': 31658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:46.346879', 'step': 31658, 'epoch': 3}
{'type': 'loss', 'content': 0.0005817945348098874, 'timestamp': '2025-10-02 01:07:46.349285', 'step': 31659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:46.403707', 'step': 31659, 'epoch': 3}
{'type': 'loss', 'content': 0.0533926859498024, 'timestamp': '2025-10-02 01:07:46.409794', 'step': 31660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:46.463984', 'step': 31660, 'epoch': 3}
{'type': 'loss', 'content': 0.09902861714363098, 'timestamp': '2025-10-02 01:07:46.466566', 'step': 31661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:46.520015', 'step': 31661, 'epoch': 3}
{'type': 'loss', 'content': 0.11225586384534836, 'timestamp': '2025-10-02 01:07:46.522471', 'step': 31662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:46.578168', 'step': 31662, 'epoch': 3}
{'type': 'loss', 'content': 0.0159930232912302, 'timestamp': '2025-10-02 01:07:46.587673', 'step': 31663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:46.642380', 'step': 31663, 'epoch': 3}
{'type': 'loss', 'content': 0.05100615695118904, 'timestamp': '2025-10-02 01:07:46.649160', 'step': 31664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:07:46.711273', 'step': 31664, 'epoch': 3}
{'type': 'loss', 'content': 0.024800170212984085, 'timestamp': '2025-10-02 01:07:46.723035', 'step': 31665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:46.778298', 'step': 31665, 'epoch': 3}
{'type': 'loss', 'content': 0.046144112944602966, 'timestamp': '2025-10-02 01:07:46.787849', 'step': 31666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:46.843822', 'step': 31666, 'epoch': 3}
{'type': 'loss', 'content': 0.0639745369553566, 'timestamp': '2025-10-02 01:07:46.846313', 'step': 31667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:46.901883', 'step': 31667, 'epoch': 3}
{'type': 'loss', 'content': 0.02028094418346882, 'timestamp': '2025-10-02 01:07:46.911103', 'step': 31668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:46.965360', 'step': 31668, 'epoch': 3}
{'type': 'loss', 'content': 0.01281978003680706, 'timestamp': '2025-10-02 01:07:46.967612', 'step': 31669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:47.021374', 'step': 31669, 'epoch': 3}
{'type': 'loss', 'content': 0.04712790995836258, 'timestamp': '2025-10-02 01:07:47.027577', 'step': 31670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:47.082232', 'step': 31670, 'epoch': 3}
{'type': 'loss', 'content': 0.011771280318498611, 'timestamp': '2025-10-02 01:07:47.088074', 'step': 31671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:47.142923', 'step': 31671, 'epoch': 3}
{'type': 'loss', 'content': 0.02158961445093155, 'timestamp': '2025-10-02 01:07:47.151333', 'step': 31672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:47.205235', 'step': 31672, 'epoch': 3}
{'type': 'loss', 'content': 0.0766475647687912, 'timestamp': '2025-10-02 01:07:47.207869', 'step': 31673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:47.263935', 'step': 31673, 'epoch': 3}
{'type': 'loss', 'content': 0.022569824010133743, 'timestamp': '2025-10-02 01:07:47.266437', 'step': 31674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:47.320994', 'step': 31674, 'epoch': 3}
{'type': 'loss', 'content': 0.051074858754873276, 'timestamp': '2025-10-02 01:07:47.323965', 'step': 31675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:47.381554', 'step': 31675, 'epoch': 3}
{'type': 'loss', 'content': 0.048419345170259476, 'timestamp': '2025-10-02 01:07:47.391873', 'step': 31676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:47.446312', 'step': 31676, 'epoch': 3}
{'type': 'loss', 'content': 0.04543242231011391, 'timestamp': '2025-10-02 01:07:47.448609', 'step': 31677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:47.502903', 'step': 31677, 'epoch': 3}
{'type': 'loss', 'content': 0.042564280331134796, 'timestamp': '2025-10-02 01:07:47.505111', 'step': 31678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:47.561560', 'step': 31678, 'epoch': 3}
{'type': 'loss', 'content': 0.018705135211348534, 'timestamp': '2025-10-02 01:07:47.566164', 'step': 31679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:07:47.638352', 'step': 31679, 'epoch': 3}
{'type': 'loss', 'content': 0.02045828476548195, 'timestamp': '2025-10-02 01:07:47.651803', 'step': 31680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:47.707267', 'step': 31680, 'epoch': 3}
{'type': 'loss', 'content': 0.118706114590168, 'timestamp': '2025-10-02 01:07:47.709728', 'step': 31681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:47.763613', 'step': 31681, 'epoch': 3}
{'type': 'loss', 'content': 0.11587665975093842, 'timestamp': '2025-10-02 01:07:47.766378', 'step': 31682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:47.821606', 'step': 31682, 'epoch': 3}
{'type': 'loss', 'content': 0.01992226578295231, 'timestamp': '2025-10-02 01:07:47.827722', 'step': 31683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:47.882995', 'step': 31683, 'epoch': 3}
{'type': 'loss', 'content': 0.041967082768678665, 'timestamp': '2025-10-02 01:07:47.889195', 'step': 31684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:47.942479', 'step': 31684, 'epoch': 3}
{'type': 'loss', 'content': 0.018834475427865982, 'timestamp': '2025-10-02 01:07:47.945080', 'step': 31685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:47.999311', 'step': 31685, 'epoch': 3}
{'type': 'loss', 'content': 0.040566250681877136, 'timestamp': '2025-10-02 01:07:48.005489', 'step': 31686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:48.060138', 'step': 31686, 'epoch': 3}
{'type': 'loss', 'content': 0.015972763299942017, 'timestamp': '2025-10-02 01:07:48.062532', 'step': 31687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:48.117158', 'step': 31687, 'epoch': 3}
{'type': 'loss', 'content': 0.09965909272432327, 'timestamp': '2025-10-02 01:07:48.122921', 'step': 31688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:48.175793', 'step': 31688, 'epoch': 3}
{'type': 'loss', 'content': 0.08630459010601044, 'timestamp': '2025-10-02 01:07:48.178172', 'step': 31689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:48.234990', 'step': 31689, 'epoch': 3}
{'type': 'loss', 'content': 0.015046472661197186, 'timestamp': '2025-10-02 01:07:48.237221', 'step': 31690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:48.293892', 'step': 31690, 'epoch': 3}
{'type': 'loss', 'content': 0.032019369304180145, 'timestamp': '2025-10-02 01:07:48.296942', 'step': 31691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:48.351661', 'step': 31691, 'epoch': 3}
{'type': 'loss', 'content': 0.0993519276380539, 'timestamp': '2025-10-02 01:07:48.360008', 'step': 31692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:48.413679', 'step': 31692, 'epoch': 3}
{'type': 'loss', 'content': 0.04188245162367821, 'timestamp': '2025-10-02 01:07:48.416044', 'step': 31693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:48.469787', 'step': 31693, 'epoch': 3}
{'type': 'loss', 'content': 0.0756048932671547, 'timestamp': '2025-10-02 01:07:48.471784', 'step': 31694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:48.525663', 'step': 31694, 'epoch': 3}
{'type': 'loss', 'content': 0.08354432880878448, 'timestamp': '2025-10-02 01:07:48.528184', 'step': 31695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:48.582748', 'step': 31695, 'epoch': 3}
{'type': 'loss', 'content': 0.06314410269260406, 'timestamp': '2025-10-02 01:07:48.592878', 'step': 31696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:48.646557', 'step': 31696, 'epoch': 3}
{'type': 'loss', 'content': 0.048462435603141785, 'timestamp': '2025-10-02 01:07:48.649112', 'step': 31697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:48.703885', 'step': 31697, 'epoch': 3}
{'type': 'loss', 'content': 0.029368514195084572, 'timestamp': '2025-10-02 01:07:48.713259', 'step': 31698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:48.768528', 'step': 31698, 'epoch': 3}
{'type': 'loss', 'content': 0.04247674718499184, 'timestamp': '2025-10-02 01:07:48.774459', 'step': 31699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:48.828418', 'step': 31699, 'epoch': 3}
{'type': 'loss', 'content': 0.0268816277384758, 'timestamp': '2025-10-02 01:07:48.834696', 'step': 31700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:48.887825', 'step': 31700, 'epoch': 3}
{'type': 'loss', 'content': 0.08001937717199326, 'timestamp': '2025-10-02 01:07:48.890395', 'step': 31701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:48.945416', 'step': 31701, 'epoch': 3}
{'type': 'loss', 'content': 0.024923117831349373, 'timestamp': '2025-10-02 01:07:48.954960', 'step': 31702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:49.010299', 'step': 31702, 'epoch': 3}
{'type': 'loss', 'content': 0.039588563144207, 'timestamp': '2025-10-02 01:07:49.012769', 'step': 31703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:07:49.066213', 'step': 31703, 'epoch': 3}
{'type': 'loss', 'content': 0.13779127597808838, 'timestamp': '2025-10-02 01:07:49.072125', 'step': 31704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:49.127673', 'step': 31704, 'epoch': 3}
{'type': 'loss', 'content': 0.052321821451187134, 'timestamp': '2025-10-02 01:07:49.133780', 'step': 31705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:49.187775', 'step': 31705, 'epoch': 3}
{'type': 'loss', 'content': 0.11726711690425873, 'timestamp': '2025-10-02 01:07:49.190230', 'step': 31706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:49.244545', 'step': 31706, 'epoch': 3}
{'type': 'loss', 'content': 0.05645251274108887, 'timestamp': '2025-10-02 01:07:49.252122', 'step': 31707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:49.312185', 'step': 31707, 'epoch': 3}
{'type': 'loss', 'content': 0.0630129724740982, 'timestamp': '2025-10-02 01:07:49.323127', 'step': 31708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:49.377162', 'step': 31708, 'epoch': 3}
{'type': 'loss', 'content': 0.025561504065990448, 'timestamp': '2025-10-02 01:07:49.383416', 'step': 31709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:49.437672', 'step': 31709, 'epoch': 3}
{'type': 'loss', 'content': 0.015617892146110535, 'timestamp': '2025-10-02 01:07:49.439990', 'step': 31710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:49.494838', 'step': 31710, 'epoch': 3}
{'type': 'loss', 'content': 0.03634301945567131, 'timestamp': '2025-10-02 01:07:49.497574', 'step': 31711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:49.553176', 'step': 31711, 'epoch': 3}
{'type': 'loss', 'content': 0.01798638515174389, 'timestamp': '2025-10-02 01:07:49.563319', 'step': 31712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:49.617177', 'step': 31712, 'epoch': 3}
{'type': 'loss', 'content': 0.05396123230457306, 'timestamp': '2025-10-02 01:07:49.619670', 'step': 31713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:49.673846', 'step': 31713, 'epoch': 3}
{'type': 'loss', 'content': 0.09509579092264175, 'timestamp': '2025-10-02 01:07:49.676700', 'step': 31714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:49.731271', 'step': 31714, 'epoch': 3}
{'type': 'loss', 'content': 0.028480548411607742, 'timestamp': '2025-10-02 01:07:49.737560', 'step': 31715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:49.792902', 'step': 31715, 'epoch': 3}
{'type': 'loss', 'content': 0.009290585294365883, 'timestamp': '2025-10-02 01:07:49.803225', 'step': 31716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:49.857775', 'step': 31716, 'epoch': 3}
{'type': 'loss', 'content': 0.0018575957510620356, 'timestamp': '2025-10-02 01:07:49.861571', 'step': 31717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:49.917307', 'step': 31717, 'epoch': 3}
{'type': 'loss', 'content': 0.021150970831513405, 'timestamp': '2025-10-02 01:07:49.926840', 'step': 31718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:07:49.990181', 'step': 31718, 'epoch': 3}
{'type': 'loss', 'content': 0.04907970130443573, 'timestamp': '2025-10-02 01:07:50.001008', 'step': 31719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:50.055343', 'step': 31719, 'epoch': 3}
{'type': 'loss', 'content': 0.040427371859550476, 'timestamp': '2025-10-02 01:07:50.061499', 'step': 31720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:07:50.123784', 'step': 31720, 'epoch': 3}
{'type': 'loss', 'content': 0.009666938334703445, 'timestamp': '2025-10-02 01:07:50.135292', 'step': 31721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:50.190370', 'step': 31721, 'epoch': 3}
{'type': 'loss', 'content': 0.05290338769555092, 'timestamp': '2025-10-02 01:07:50.192801', 'step': 31722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:50.248925', 'step': 31722, 'epoch': 3}
{'type': 'loss', 'content': 0.06879256665706635, 'timestamp': '2025-10-02 01:07:50.251398', 'step': 31723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:50.305428', 'step': 31723, 'epoch': 3}
{'type': 'loss', 'content': 0.03283685818314552, 'timestamp': '2025-10-02 01:07:50.311459', 'step': 31724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:50.365322', 'step': 31724, 'epoch': 3}
{'type': 'loss', 'content': 0.032460808753967285, 'timestamp': '2025-10-02 01:07:50.367747', 'step': 31725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:50.422825', 'step': 31725, 'epoch': 3}
{'type': 'loss', 'content': 0.045169152319431305, 'timestamp': '2025-10-02 01:07:50.425058', 'step': 31726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:50.479231', 'step': 31726, 'epoch': 3}
{'type': 'loss', 'content': 0.04775567725300789, 'timestamp': '2025-10-02 01:07:50.481769', 'step': 31727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:50.537261', 'step': 31727, 'epoch': 3}
{'type': 'loss', 'content': 0.05127289146184921, 'timestamp': '2025-10-02 01:07:50.543570', 'step': 31728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:50.598587', 'step': 31728, 'epoch': 3}
{'type': 'loss', 'content': 0.043104901909828186, 'timestamp': '2025-10-02 01:07:50.604731', 'step': 31729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:50.658611', 'step': 31729, 'epoch': 3}
{'type': 'loss', 'content': 0.10103728622198105, 'timestamp': '2025-10-02 01:07:50.661493', 'step': 31730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:50.716625', 'step': 31730, 'epoch': 3}
{'type': 'loss', 'content': 0.07221721857786179, 'timestamp': '2025-10-02 01:07:50.722567', 'step': 31731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:50.781926', 'step': 31731, 'epoch': 3}
{'type': 'loss', 'content': 0.009290492162108421, 'timestamp': '2025-10-02 01:07:50.792909', 'step': 31732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:07:50.861604', 'step': 31732, 'epoch': 3}
{'type': 'loss', 'content': 0.0010051075369119644, 'timestamp': '2025-10-02 01:07:50.875032', 'step': 31733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:50.931306', 'step': 31733, 'epoch': 3}
{'type': 'loss', 'content': 0.02561744675040245, 'timestamp': '2025-10-02 01:07:50.933826', 'step': 31734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:50.988262', 'step': 31734, 'epoch': 3}
{'type': 'loss', 'content': 0.004221637267619371, 'timestamp': '2025-10-02 01:07:50.990999', 'step': 31735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:51.055583', 'step': 31735, 'epoch': 3}
{'type': 'loss', 'content': 0.030236458405852318, 'timestamp': '2025-10-02 01:07:51.066782', 'step': 31736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:51.120767', 'step': 31736, 'epoch': 3}
{'type': 'loss', 'content': 0.011866644956171513, 'timestamp': '2025-10-02 01:07:51.123229', 'step': 31737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:51.176886', 'step': 31737, 'epoch': 3}
{'type': 'loss', 'content': 0.06479615718126297, 'timestamp': '2025-10-02 01:07:51.180324', 'step': 31738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:51.234769', 'step': 31738, 'epoch': 3}
{'type': 'loss', 'content': 0.038970351219177246, 'timestamp': '2025-10-02 01:07:51.237593', 'step': 31739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:51.292279', 'step': 31739, 'epoch': 3}
{'type': 'loss', 'content': 0.02241295948624611, 'timestamp': '2025-10-02 01:07:51.298343', 'step': 31740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:51.352919', 'step': 31740, 'epoch': 3}
{'type': 'loss', 'content': 0.05196775123476982, 'timestamp': '2025-10-02 01:07:51.355303', 'step': 31741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:51.409136', 'step': 31741, 'epoch': 3}
{'type': 'loss', 'content': 0.10514795780181885, 'timestamp': '2025-10-02 01:07:51.411963', 'step': 31742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:51.467000', 'step': 31742, 'epoch': 3}
{'type': 'loss', 'content': 0.003847177140414715, 'timestamp': '2025-10-02 01:07:51.474426', 'step': 31743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:51.529894', 'step': 31743, 'epoch': 3}
{'type': 'loss', 'content': 0.008698441088199615, 'timestamp': '2025-10-02 01:07:51.537859', 'step': 31744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:51.592676', 'step': 31744, 'epoch': 3}
{'type': 'loss', 'content': 0.02130059525370598, 'timestamp': '2025-10-02 01:07:51.602064', 'step': 31745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:51.656868', 'step': 31745, 'epoch': 3}
{'type': 'loss', 'content': 0.020095860585570335, 'timestamp': '2025-10-02 01:07:51.662836', 'step': 31746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:51.718924', 'step': 31746, 'epoch': 3}
{'type': 'loss', 'content': 0.025923997163772583, 'timestamp': '2025-10-02 01:07:51.721471', 'step': 31747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:51.775806', 'step': 31747, 'epoch': 3}
{'type': 'loss', 'content': 0.03497361019253731, 'timestamp': '2025-10-02 01:07:51.782643', 'step': 31748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:51.837607', 'step': 31748, 'epoch': 3}
{'type': 'loss', 'content': 0.04710887372493744, 'timestamp': '2025-10-02 01:07:51.840128', 'step': 31749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:51.894432', 'step': 31749, 'epoch': 3}
{'type': 'loss', 'content': 0.09601089358329773, 'timestamp': '2025-10-02 01:07:51.897606', 'step': 31750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:07:51.960343', 'step': 31750, 'epoch': 3}
{'type': 'loss', 'content': 0.04897475242614746, 'timestamp': '2025-10-02 01:07:51.971013', 'step': 31751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:52.025384', 'step': 31751, 'epoch': 3}
{'type': 'loss', 'content': 0.04713531211018562, 'timestamp': '2025-10-02 01:07:52.031310', 'step': 31752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:52.085250', 'step': 31752, 'epoch': 3}
{'type': 'loss', 'content': 0.019735336303710938, 'timestamp': '2025-10-02 01:07:52.087611', 'step': 31753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:07:52.141878', 'step': 31753, 'epoch': 3}
{'type': 'loss', 'content': 0.1280490756034851, 'timestamp': '2025-10-02 01:07:52.144773', 'step': 31754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:52.200743', 'step': 31754, 'epoch': 3}
{'type': 'loss', 'content': 0.04372012987732887, 'timestamp': '2025-10-02 01:07:52.206694', 'step': 31755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:52.263453', 'step': 31755, 'epoch': 3}
{'type': 'loss', 'content': 0.04002421349287033, 'timestamp': '2025-10-02 01:07:52.271984', 'step': 31756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:07:52.328240', 'step': 31756, 'epoch': 3}
{'type': 'loss', 'content': 0.08648601174354553, 'timestamp': '2025-10-02 01:07:52.330618', 'step': 31757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:52.388639', 'step': 31757, 'epoch': 3}
{'type': 'loss', 'content': 0.014585105702280998, 'timestamp': '2025-10-02 01:07:52.398201', 'step': 31758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:52.463302', 'step': 31758, 'epoch': 3}
{'type': 'loss', 'content': 0.03559979051351547, 'timestamp': '2025-10-02 01:07:52.473787', 'step': 31759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:52.538507', 'step': 31759, 'epoch': 3}
{'type': 'loss', 'content': 0.01100958976894617, 'timestamp': '2025-10-02 01:07:52.549779', 'step': 31760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:52.612854', 'step': 31760, 'epoch': 3}
{'type': 'loss', 'content': 0.002344183623790741, 'timestamp': '2025-10-02 01:07:52.624220', 'step': 31761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:52.680665', 'step': 31761, 'epoch': 3}
{'type': 'loss', 'content': 0.011958344839513302, 'timestamp': '2025-10-02 01:07:52.683791', 'step': 31762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:52.741165', 'step': 31762, 'epoch': 3}
{'type': 'loss', 'content': 0.1371917426586151, 'timestamp': '2025-10-02 01:07:52.743811', 'step': 31763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:07:52.799199', 'step': 31763, 'epoch': 3}
{'type': 'loss', 'content': 0.019945349544286728, 'timestamp': '2025-10-02 01:07:52.805035', 'step': 31764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:52.862031', 'step': 31764, 'epoch': 3}
{'type': 'loss', 'content': 0.03223090246319771, 'timestamp': '2025-10-02 01:07:52.868032', 'step': 31765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:52.923896', 'step': 31765, 'epoch': 3}
{'type': 'loss', 'content': 0.06500774621963501, 'timestamp': '2025-10-02 01:07:52.926395', 'step': 31766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:52.981795', 'step': 31766, 'epoch': 3}
{'type': 'loss', 'content': 0.07300033420324326, 'timestamp': '2025-10-02 01:07:52.989401', 'step': 31767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:53.044110', 'step': 31767, 'epoch': 3}
{'type': 'loss', 'content': 0.01966547593474388, 'timestamp': '2025-10-02 01:07:53.050424', 'step': 31768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:53.105682', 'step': 31768, 'epoch': 3}
{'type': 'loss', 'content': 0.07485470175743103, 'timestamp': '2025-10-02 01:07:53.108960', 'step': 31769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:53.165494', 'step': 31769, 'epoch': 3}
{'type': 'loss', 'content': 0.09543714672327042, 'timestamp': '2025-10-02 01:07:53.167894', 'step': 31770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:53.223659', 'step': 31770, 'epoch': 3}
{'type': 'loss', 'content': 0.024950919672846794, 'timestamp': '2025-10-02 01:07:53.226803', 'step': 31771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:53.283667', 'step': 31771, 'epoch': 3}
{'type': 'loss', 'content': 0.031130313873291016, 'timestamp': '2025-10-02 01:07:53.293769', 'step': 31772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:53.348549', 'step': 31772, 'epoch': 3}
{'type': 'loss', 'content': 0.15294253826141357, 'timestamp': '2025-10-02 01:07:53.353757', 'step': 31773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:07:53.416512', 'step': 31773, 'epoch': 3}
{'type': 'loss', 'content': 0.0753641277551651, 'timestamp': '2025-10-02 01:07:53.427328', 'step': 31774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:53.484299', 'step': 31774, 'epoch': 3}
{'type': 'loss', 'content': 0.021345779299736023, 'timestamp': '2025-10-02 01:07:53.493841', 'step': 31775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:53.550266', 'step': 31775, 'epoch': 3}
{'type': 'loss', 'content': 0.025708351284265518, 'timestamp': '2025-10-02 01:07:53.556210', 'step': 31776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:53.610615', 'step': 31776, 'epoch': 3}
{'type': 'loss', 'content': 0.02660403773188591, 'timestamp': '2025-10-02 01:07:53.616809', 'step': 31777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:53.679751', 'step': 31777, 'epoch': 3}
{'type': 'loss', 'content': 0.030059034004807472, 'timestamp': '2025-10-02 01:07:53.690287', 'step': 31778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:53.746173', 'step': 31778, 'epoch': 3}
{'type': 'loss', 'content': 0.03218967840075493, 'timestamp': '2025-10-02 01:07:53.748803', 'step': 31779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:53.805606', 'step': 31779, 'epoch': 3}
{'type': 'loss', 'content': 0.07354319095611572, 'timestamp': '2025-10-02 01:07:53.811696', 'step': 31780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:53.866151', 'step': 31780, 'epoch': 3}
{'type': 'loss', 'content': 0.02848316729068756, 'timestamp': '2025-10-02 01:07:53.868483', 'step': 31781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:53.923849', 'step': 31781, 'epoch': 3}
{'type': 'loss', 'content': 0.0071912361308932304, 'timestamp': '2025-10-02 01:07:53.926228', 'step': 31782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:53.981145', 'step': 31782, 'epoch': 3}
{'type': 'loss', 'content': 0.04774971678853035, 'timestamp': '2025-10-02 01:07:53.983540', 'step': 31783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:54.037895', 'step': 31783, 'epoch': 3}
{'type': 'loss', 'content': 0.04772988334298134, 'timestamp': '2025-10-02 01:07:54.044694', 'step': 31784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:54.098583', 'step': 31784, 'epoch': 3}
{'type': 'loss', 'content': 0.014848430640995502, 'timestamp': '2025-10-02 01:07:54.108317', 'step': 31785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:54.163415', 'step': 31785, 'epoch': 3}
{'type': 'loss', 'content': 0.019981540739536285, 'timestamp': '2025-10-02 01:07:54.165712', 'step': 31786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:54.222037', 'step': 31786, 'epoch': 3}
{'type': 'loss', 'content': 0.017083778977394104, 'timestamp': '2025-10-02 01:07:54.224668', 'step': 31787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:54.279505', 'step': 31787, 'epoch': 3}
{'type': 'loss', 'content': 0.028503594920039177, 'timestamp': '2025-10-02 01:07:54.285578', 'step': 31788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:54.339341', 'step': 31788, 'epoch': 3}
{'type': 'loss', 'content': 0.0766250342130661, 'timestamp': '2025-10-02 01:07:54.341900', 'step': 31789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:54.397100', 'step': 31789, 'epoch': 3}
{'type': 'loss', 'content': 0.07040835916996002, 'timestamp': '2025-10-02 01:07:54.399720', 'step': 31790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:54.456137', 'step': 31790, 'epoch': 3}
{'type': 'loss', 'content': 0.0687502846121788, 'timestamp': '2025-10-02 01:07:54.459842', 'step': 31791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:54.520619', 'step': 31791, 'epoch': 3}
{'type': 'loss', 'content': 0.003319408278912306, 'timestamp': '2025-10-02 01:07:54.531539', 'step': 31792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:54.591934', 'step': 31792, 'epoch': 3}
{'type': 'loss', 'content': 0.027714841067790985, 'timestamp': '2025-10-02 01:07:54.603251', 'step': 31793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:54.657966', 'step': 31793, 'epoch': 3}
{'type': 'loss', 'content': 0.014788614585995674, 'timestamp': '2025-10-02 01:07:54.664038', 'step': 31794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:54.718451', 'step': 31794, 'epoch': 3}
{'type': 'loss', 'content': 0.02449261024594307, 'timestamp': '2025-10-02 01:07:54.725866', 'step': 31795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:54.782299', 'step': 31795, 'epoch': 3}
{'type': 'loss', 'content': 0.046700477600097656, 'timestamp': '2025-10-02 01:07:54.792622', 'step': 31796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:54.846433', 'step': 31796, 'epoch': 3}
{'type': 'loss', 'content': 0.12257108837366104, 'timestamp': '2025-10-02 01:07:54.848880', 'step': 31797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:54.903106', 'step': 31797, 'epoch': 3}
{'type': 'loss', 'content': 0.04484548047184944, 'timestamp': '2025-10-02 01:07:54.905820', 'step': 31798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:54.965164', 'step': 31798, 'epoch': 3}
{'type': 'loss', 'content': 0.0048361108638346195, 'timestamp': '2025-10-02 01:07:54.975316', 'step': 31799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:07:55.039312', 'step': 31799, 'epoch': 3}
{'type': 'loss', 'content': 0.003907077945768833, 'timestamp': '2025-10-02 01:07:55.050970', 'step': 31800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:55.104655', 'step': 31800, 'epoch': 3}
{'type': 'loss', 'content': 0.08216537535190582, 'timestamp': '2025-10-02 01:07:55.107227', 'step': 31801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:55.166634', 'step': 31801, 'epoch': 3}
{'type': 'loss', 'content': 0.04174541309475899, 'timestamp': '2025-10-02 01:07:55.176783', 'step': 31802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:55.239021', 'step': 31802, 'epoch': 3}
{'type': 'loss', 'content': 0.0010855015134438872, 'timestamp': '2025-10-02 01:07:55.249466', 'step': 31803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:55.303857', 'step': 31803, 'epoch': 3}
{'type': 'loss', 'content': 0.039017945528030396, 'timestamp': '2025-10-02 01:07:55.310564', 'step': 31804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:55.364356', 'step': 31804, 'epoch': 3}
{'type': 'loss', 'content': 0.01850365288555622, 'timestamp': '2025-10-02 01:07:55.366740', 'step': 31805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:55.420875', 'step': 31805, 'epoch': 3}
{'type': 'loss', 'content': 0.030718684196472168, 'timestamp': '2025-10-02 01:07:55.423420', 'step': 31806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:55.477760', 'step': 31806, 'epoch': 3}
{'type': 'loss', 'content': 0.06802722066640854, 'timestamp': '2025-10-02 01:07:55.480215', 'step': 31807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:55.534366', 'step': 31807, 'epoch': 3}
{'type': 'loss', 'content': 0.0470484122633934, 'timestamp': '2025-10-02 01:07:55.540820', 'step': 31808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:55.594489', 'step': 31808, 'epoch': 3}
{'type': 'loss', 'content': 0.0002490764018148184, 'timestamp': '2025-10-02 01:07:55.597065', 'step': 31809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:55.652914', 'step': 31809, 'epoch': 3}
{'type': 'loss', 'content': 0.01955452933907509, 'timestamp': '2025-10-02 01:07:55.655798', 'step': 31810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:55.709680', 'step': 31810, 'epoch': 3}
{'type': 'loss', 'content': 0.05799734592437744, 'timestamp': '2025-10-02 01:07:55.712336', 'step': 31811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:55.768805', 'step': 31811, 'epoch': 3}
{'type': 'loss', 'content': 0.011864257045090199, 'timestamp': '2025-10-02 01:07:55.775225', 'step': 31812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:55.828981', 'step': 31812, 'epoch': 3}
{'type': 'loss', 'content': 0.06231198459863663, 'timestamp': '2025-10-02 01:07:55.831849', 'step': 31813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:55.887060', 'step': 31813, 'epoch': 3}
{'type': 'loss', 'content': 0.03376540169119835, 'timestamp': '2025-10-02 01:07:55.896414', 'step': 31814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:55.951762', 'step': 31814, 'epoch': 3}
{'type': 'loss', 'content': 0.039259422570466995, 'timestamp': '2025-10-02 01:07:55.954094', 'step': 31815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:07:56.016304', 'step': 31815, 'epoch': 3}
{'type': 'loss', 'content': 0.016364213079214096, 'timestamp': '2025-10-02 01:07:56.027737', 'step': 31816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:56.081370', 'step': 31816, 'epoch': 3}
{'type': 'loss', 'content': 0.13306042551994324, 'timestamp': '2025-10-02 01:07:56.083741', 'step': 31817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:56.138982', 'step': 31817, 'epoch': 3}
{'type': 'loss', 'content': 0.043543312698602676, 'timestamp': '2025-10-02 01:07:56.141359', 'step': 31818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:56.197628', 'step': 31818, 'epoch': 3}
{'type': 'loss', 'content': 0.016949938610196114, 'timestamp': '2025-10-02 01:07:56.200182', 'step': 31819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:56.254667', 'step': 31819, 'epoch': 3}
{'type': 'loss', 'content': 0.04443693161010742, 'timestamp': '2025-10-02 01:07:56.260598', 'step': 31820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:56.314895', 'step': 31820, 'epoch': 3}
{'type': 'loss', 'content': 0.12548242509365082, 'timestamp': '2025-10-02 01:07:56.320976', 'step': 31821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:56.380923', 'step': 31821, 'epoch': 3}
{'type': 'loss', 'content': 0.022898729890584946, 'timestamp': '2025-10-02 01:07:56.388594', 'step': 31822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:56.443360', 'step': 31822, 'epoch': 3}
{'type': 'loss', 'content': 0.016470789909362793, 'timestamp': '2025-10-02 01:07:56.449321', 'step': 31823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:56.506242', 'step': 31823, 'epoch': 3}
{'type': 'loss', 'content': 0.0099757956340909, 'timestamp': '2025-10-02 01:07:56.514636', 'step': 31824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:56.568504', 'step': 31824, 'epoch': 3}
{'type': 'loss', 'content': 0.09211336821317673, 'timestamp': '2025-10-02 01:07:56.571105', 'step': 31825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:56.625873', 'step': 31825, 'epoch': 3}
{'type': 'loss', 'content': 0.030049163848161697, 'timestamp': '2025-10-02 01:07:56.628313', 'step': 31826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:56.683427', 'step': 31826, 'epoch': 3}
{'type': 'loss', 'content': 0.004956346936523914, 'timestamp': '2025-10-02 01:07:56.685752', 'step': 31827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:56.739960', 'step': 31827, 'epoch': 3}
{'type': 'loss', 'content': 0.07532629370689392, 'timestamp': '2025-10-02 01:07:56.746818', 'step': 31828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:56.800856', 'step': 31828, 'epoch': 3}
{'type': 'loss', 'content': 0.023218834772706032, 'timestamp': '2025-10-02 01:07:56.810477', 'step': 31829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:56.865758', 'step': 31829, 'epoch': 3}
{'type': 'loss', 'content': 0.03211845085024834, 'timestamp': '2025-10-02 01:07:56.868389', 'step': 31830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:56.922578', 'step': 31830, 'epoch': 3}
{'type': 'loss', 'content': 0.0007073507877066731, 'timestamp': '2025-10-02 01:07:56.925361', 'step': 31831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:56.980289', 'step': 31831, 'epoch': 3}
{'type': 'loss', 'content': 0.03063557855784893, 'timestamp': '2025-10-02 01:07:56.986876', 'step': 31832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:07:57.041596', 'step': 31832, 'epoch': 3}
{'type': 'loss', 'content': 0.06462249159812927, 'timestamp': '2025-10-02 01:07:57.051817', 'step': 31833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:57.107209', 'step': 31833, 'epoch': 3}
{'type': 'loss', 'content': 0.07750329375267029, 'timestamp': '2025-10-02 01:07:57.113316', 'step': 31834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:57.168011', 'step': 31834, 'epoch': 3}
{'type': 'loss', 'content': 0.009976129978895187, 'timestamp': '2025-10-02 01:07:57.170539', 'step': 31835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:57.229554', 'step': 31835, 'epoch': 3}
{'type': 'loss', 'content': 0.02295629121363163, 'timestamp': '2025-10-02 01:07:57.240515', 'step': 31836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:57.294523', 'step': 31836, 'epoch': 3}
{'type': 'loss', 'content': 0.07335203140974045, 'timestamp': '2025-10-02 01:07:57.297405', 'step': 31837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:57.368016', 'step': 31837, 'epoch': 3}
{'type': 'loss', 'content': 0.028270570561289787, 'timestamp': '2025-10-02 01:07:57.378192', 'step': 31838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:07:57.432704', 'step': 31838, 'epoch': 3}
{'type': 'loss', 'content': 0.053700610995292664, 'timestamp': '2025-10-02 01:07:57.435650', 'step': 31839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:57.490521', 'step': 31839, 'epoch': 3}
{'type': 'loss', 'content': 0.04597603157162666, 'timestamp': '2025-10-02 01:07:57.497055', 'step': 31840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:57.551744', 'step': 31840, 'epoch': 3}
{'type': 'loss', 'content': 0.02687874250113964, 'timestamp': '2025-10-02 01:07:57.554720', 'step': 31841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:57.610960', 'step': 31841, 'epoch': 3}
{'type': 'loss', 'content': 0.044573720544576645, 'timestamp': '2025-10-02 01:07:57.613563', 'step': 31842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:57.667901', 'step': 31842, 'epoch': 3}
{'type': 'loss', 'content': 0.025812290608882904, 'timestamp': '2025-10-02 01:07:57.670754', 'step': 31843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:57.725641', 'step': 31843, 'epoch': 3}
{'type': 'loss', 'content': 0.05689777806401253, 'timestamp': '2025-10-02 01:07:57.732262', 'step': 31844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:07:57.785914', 'step': 31844, 'epoch': 3}
{'type': 'loss', 'content': 0.07687481492757797, 'timestamp': '2025-10-02 01:07:57.788277', 'step': 31845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:57.842186', 'step': 31845, 'epoch': 3}
{'type': 'loss', 'content': 0.0729369968175888, 'timestamp': '2025-10-02 01:07:57.844978', 'step': 31846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:57.900671', 'step': 31846, 'epoch': 3}
{'type': 'loss', 'content': 0.014830861240625381, 'timestamp': '2025-10-02 01:07:57.902993', 'step': 31847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:57.957206', 'step': 31847, 'epoch': 3}
{'type': 'loss', 'content': 0.01218023058027029, 'timestamp': '2025-10-02 01:07:57.967346', 'step': 31848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:07:58.028129', 'step': 31848, 'epoch': 3}
{'type': 'loss', 'content': 0.010913846082985401, 'timestamp': '2025-10-02 01:07:58.039896', 'step': 31849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:58.094026', 'step': 31849, 'epoch': 3}
{'type': 'loss', 'content': 0.09073278307914734, 'timestamp': '2025-10-02 01:07:58.097018', 'step': 31850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:58.153516', 'step': 31850, 'epoch': 3}
{'type': 'loss', 'content': 0.020181575790047646, 'timestamp': '2025-10-02 01:07:58.156301', 'step': 31851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:07:58.209873', 'step': 31851, 'epoch': 3}
{'type': 'loss', 'content': 0.062410976737737656, 'timestamp': '2025-10-02 01:07:58.215955', 'step': 31852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:58.269697', 'step': 31852, 'epoch': 3}
{'type': 'loss', 'content': 0.046194449067115784, 'timestamp': '2025-10-02 01:07:58.272636', 'step': 31853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:58.332495', 'step': 31853, 'epoch': 3}
{'type': 'loss', 'content': 0.0008057336672209203, 'timestamp': '2025-10-02 01:07:58.342692', 'step': 31854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:58.397666', 'step': 31854, 'epoch': 3}
{'type': 'loss', 'content': 0.020507289096713066, 'timestamp': '2025-10-02 01:07:58.400085', 'step': 31855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:58.462806', 'step': 31855, 'epoch': 3}
{'type': 'loss', 'content': 0.025104543194174767, 'timestamp': '2025-10-02 01:07:58.474051', 'step': 31856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:07:58.531368', 'step': 31856, 'epoch': 3}
{'type': 'loss', 'content': 0.08822686225175858, 'timestamp': '2025-10-02 01:07:58.535754', 'step': 31857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:07:58.593665', 'step': 31857, 'epoch': 3}
{'type': 'loss', 'content': 0.03301646560430527, 'timestamp': '2025-10-02 01:07:58.602999', 'step': 31858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:07:58.658507', 'step': 31858, 'epoch': 3}
{'type': 'loss', 'content': 0.0008570690406486392, 'timestamp': '2025-10-02 01:07:58.660781', 'step': 31859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:58.714858', 'step': 31859, 'epoch': 3}
{'type': 'loss', 'content': 0.12381845712661743, 'timestamp': '2025-10-02 01:07:58.721131', 'step': 31860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:58.774914', 'step': 31860, 'epoch': 3}
{'type': 'loss', 'content': 0.007897191680967808, 'timestamp': '2025-10-02 01:07:58.778098', 'step': 31861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:58.833680', 'step': 31861, 'epoch': 3}
{'type': 'loss', 'content': 0.08693799376487732, 'timestamp': '2025-10-02 01:07:58.836000', 'step': 31862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:58.891336', 'step': 31862, 'epoch': 3}
{'type': 'loss', 'content': 0.06327226012945175, 'timestamp': '2025-10-02 01:07:58.893743', 'step': 31863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:07:58.948796', 'step': 31863, 'epoch': 3}
{'type': 'loss', 'content': 0.017449326813220978, 'timestamp': '2025-10-02 01:07:58.954770', 'step': 31864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:59.012575', 'step': 31864, 'epoch': 3}
{'type': 'loss', 'content': 0.03831269592046738, 'timestamp': '2025-10-02 01:07:59.023491', 'step': 31865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:07:59.081993', 'step': 31865, 'epoch': 3}
{'type': 'loss', 'content': 0.03886980563402176, 'timestamp': '2025-10-02 01:07:59.092191', 'step': 31866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:59.147862', 'step': 31866, 'epoch': 3}
{'type': 'loss', 'content': 0.07497749477624893, 'timestamp': '2025-10-02 01:07:59.150462', 'step': 31867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:59.204066', 'step': 31867, 'epoch': 3}
{'type': 'loss', 'content': 0.03145073726773262, 'timestamp': '2025-10-02 01:07:59.210132', 'step': 31868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:07:59.283160', 'step': 31868, 'epoch': 3}
{'type': 'loss', 'content': 0.01299344189465046, 'timestamp': '2025-10-02 01:07:59.297844', 'step': 31869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:07:59.361624', 'step': 31869, 'epoch': 3}
{'type': 'loss', 'content': 0.010961092077195644, 'timestamp': '2025-10-02 01:07:59.372080', 'step': 31870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:07:59.426604', 'step': 31870, 'epoch': 3}
{'type': 'loss', 'content': 0.03998032212257385, 'timestamp': '2025-10-02 01:07:59.429087', 'step': 31871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:07:59.483151', 'step': 31871, 'epoch': 3}
{'type': 'loss', 'content': 0.08155933022499084, 'timestamp': '2025-10-02 01:07:59.489051', 'step': 31872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:07:59.543011', 'step': 31872, 'epoch': 3}
{'type': 'loss', 'content': 0.03995143622159958, 'timestamp': '2025-10-02 01:07:59.545159', 'step': 31873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:07:59.599425', 'step': 31873, 'epoch': 3}
{'type': 'loss', 'content': 0.06052374094724655, 'timestamp': '2025-10-02 01:07:59.602284', 'step': 31874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:07:59.656739', 'step': 31874, 'epoch': 3}
{'type': 'loss', 'content': 0.018306909129023552, 'timestamp': '2025-10-02 01:07:59.659223', 'step': 31875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:07:59.713786', 'step': 31875, 'epoch': 3}
{'type': 'loss', 'content': 0.042696915566921234, 'timestamp': '2025-10-02 01:07:59.719745', 'step': 31876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:07:59.787368', 'step': 31876, 'epoch': 3}
{'type': 'loss', 'content': 0.03251514583826065, 'timestamp': '2025-10-02 01:07:59.792244', 'step': 31877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:07:59.878338', 'step': 31877, 'epoch': 3}
{'type': 'loss', 'content': 0.019115952774882317, 'timestamp': '2025-10-02 01:07:59.884127', 'step': 31878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:07:59.940717', 'step': 31878, 'epoch': 3}
{'type': 'loss', 'content': 0.022427299991250038, 'timestamp': '2025-10-02 01:07:59.948121', 'step': 31879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:00.002524', 'step': 31879, 'epoch': 3}
{'type': 'loss', 'content': 0.06732526421546936, 'timestamp': '2025-10-02 01:08:00.009242', 'step': 31880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:00.063241', 'step': 31880, 'epoch': 3}
{'type': 'loss', 'content': 0.03169415146112442, 'timestamp': '2025-10-02 01:08:00.065821', 'step': 31881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:00.120159', 'step': 31881, 'epoch': 3}
{'type': 'loss', 'content': 0.021069129928946495, 'timestamp': '2025-10-02 01:08:00.122484', 'step': 31882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:00.177071', 'step': 31882, 'epoch': 3}
{'type': 'loss', 'content': 0.047388955950737, 'timestamp': '2025-10-02 01:08:00.179978', 'step': 31883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:00.235542', 'step': 31883, 'epoch': 3}
{'type': 'loss', 'content': 0.05464472249150276, 'timestamp': '2025-10-02 01:08:00.242709', 'step': 31884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:00.296601', 'step': 31884, 'epoch': 3}
{'type': 'loss', 'content': 0.05637907609343529, 'timestamp': '2025-10-02 01:08:00.299211', 'step': 31885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:00.355216', 'step': 31885, 'epoch': 3}
{'type': 'loss', 'content': 0.039200518280267715, 'timestamp': '2025-10-02 01:08:00.357861', 'step': 31886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:00.413858', 'step': 31886, 'epoch': 3}
{'type': 'loss', 'content': 0.08141159266233444, 'timestamp': '2025-10-02 01:08:00.416053', 'step': 31887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:00.470261', 'step': 31887, 'epoch': 3}
{'type': 'loss', 'content': 0.057610590010881424, 'timestamp': '2025-10-02 01:08:00.476448', 'step': 31888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:00.530022', 'step': 31888, 'epoch': 3}
{'type': 'loss', 'content': 0.08866462111473083, 'timestamp': '2025-10-02 01:08:00.532619', 'step': 31889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:00.587349', 'step': 31889, 'epoch': 3}
{'type': 'loss', 'content': 0.030407747253775597, 'timestamp': '2025-10-02 01:08:00.589814', 'step': 31890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:00.645872', 'step': 31890, 'epoch': 3}
{'type': 'loss', 'content': 0.0007136882632039487, 'timestamp': '2025-10-02 01:08:00.655203', 'step': 31891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:00.710270', 'step': 31891, 'epoch': 3}
{'type': 'loss', 'content': 0.04616958647966385, 'timestamp': '2025-10-02 01:08:00.720562', 'step': 31892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:00.775699', 'step': 31892, 'epoch': 3}
{'type': 'loss', 'content': 0.03248926252126694, 'timestamp': '2025-10-02 01:08:00.778156', 'step': 31893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:00.832883', 'step': 31893, 'epoch': 3}
{'type': 'loss', 'content': 0.04630287364125252, 'timestamp': '2025-10-02 01:08:00.835283', 'step': 31894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:00.890239', 'step': 31894, 'epoch': 3}
{'type': 'loss', 'content': 0.018596753478050232, 'timestamp': '2025-10-02 01:08:00.899597', 'step': 31895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:00.954852', 'step': 31895, 'epoch': 3}
{'type': 'loss', 'content': 0.029557587578892708, 'timestamp': '2025-10-02 01:08:00.961092', 'step': 31896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:01.015190', 'step': 31896, 'epoch': 3}
{'type': 'loss', 'content': 0.04236378148198128, 'timestamp': '2025-10-02 01:08:01.018530', 'step': 31897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:01.073330', 'step': 31897, 'epoch': 3}
{'type': 'loss', 'content': 0.06806391477584839, 'timestamp': '2025-10-02 01:08:01.075977', 'step': 31898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:01.130081', 'step': 31898, 'epoch': 3}
{'type': 'loss', 'content': 0.05581718683242798, 'timestamp': '2025-10-02 01:08:01.132365', 'step': 31899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:01.190063', 'step': 31899, 'epoch': 3}
{'type': 'loss', 'content': 0.03924554958939552, 'timestamp': '2025-10-02 01:08:01.200371', 'step': 31900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:01.253658', 'step': 31900, 'epoch': 3}
{'type': 'loss', 'content': 0.046991076320409775, 'timestamp': '2025-10-02 01:08:01.256069', 'step': 31901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:01.309739', 'step': 31901, 'epoch': 3}
{'type': 'loss', 'content': 0.009561068378388882, 'timestamp': '2025-10-02 01:08:01.312296', 'step': 31902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:01.366614', 'step': 31902, 'epoch': 3}
{'type': 'loss', 'content': 0.06305357068777084, 'timestamp': '2025-10-02 01:08:01.370308', 'step': 31903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:08:01.427618', 'step': 31903, 'epoch': 3}
{'type': 'loss', 'content': 0.04732520505785942, 'timestamp': '2025-10-02 01:08:01.433793', 'step': 31904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:01.489515', 'step': 31904, 'epoch': 3}
{'type': 'loss', 'content': 0.011650975793600082, 'timestamp': '2025-10-02 01:08:01.492131', 'step': 31905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:08:01.554870', 'step': 31905, 'epoch': 3}
{'type': 'loss', 'content': 0.020208975300192833, 'timestamp': '2025-10-02 01:08:01.565337', 'step': 31906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:01.621652', 'step': 31906, 'epoch': 3}
{'type': 'loss', 'content': 0.047552771866321564, 'timestamp': '2025-10-02 01:08:01.624415', 'step': 31907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:01.680504', 'step': 31907, 'epoch': 3}
{'type': 'loss', 'content': 0.0559370219707489, 'timestamp': '2025-10-02 01:08:01.687569', 'step': 31908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:01.746202', 'step': 31908, 'epoch': 3}
{'type': 'loss', 'content': 0.08767818659543991, 'timestamp': '2025-10-02 01:08:01.748688', 'step': 31909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:08:01.826634', 'step': 31909, 'epoch': 3}
{'type': 'loss', 'content': 0.044773560017347336, 'timestamp': '2025-10-02 01:08:01.840071', 'step': 31910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:01.895445', 'step': 31910, 'epoch': 3}
{'type': 'loss', 'content': 0.06283561885356903, 'timestamp': '2025-10-02 01:08:01.898349', 'step': 31911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:01.953496', 'step': 31911, 'epoch': 3}
{'type': 'loss', 'content': 0.1121964380145073, 'timestamp': '2025-10-02 01:08:01.959986', 'step': 31912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:02.015805', 'step': 31912, 'epoch': 3}
{'type': 'loss', 'content': 0.08529701828956604, 'timestamp': '2025-10-02 01:08:02.018105', 'step': 31913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:02.073835', 'step': 31913, 'epoch': 3}
{'type': 'loss', 'content': 0.03621818870306015, 'timestamp': '2025-10-02 01:08:02.081320', 'step': 31914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:02.137207', 'step': 31914, 'epoch': 3}
{'type': 'loss', 'content': 0.04459230974316597, 'timestamp': '2025-10-02 01:08:02.139714', 'step': 31915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:02.196140', 'step': 31915, 'epoch': 3}
{'type': 'loss', 'content': 0.05962980166077614, 'timestamp': '2025-10-02 01:08:02.202888', 'step': 31916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:02.261130', 'step': 31916, 'epoch': 3}
{'type': 'loss', 'content': 0.01362732145935297, 'timestamp': '2025-10-02 01:08:02.263862', 'step': 31917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:02.320375', 'step': 31917, 'epoch': 3}
{'type': 'loss', 'content': 0.05986660346388817, 'timestamp': '2025-10-02 01:08:02.329755', 'step': 31918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:02.385152', 'step': 31918, 'epoch': 3}
{'type': 'loss', 'content': 0.05583341047167778, 'timestamp': '2025-10-02 01:08:02.387637', 'step': 31919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:08:02.456630', 'step': 31919, 'epoch': 3}
{'type': 'loss', 'content': 0.03679926320910454, 'timestamp': '2025-10-02 01:08:02.469339', 'step': 31920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:02.524245', 'step': 31920, 'epoch': 3}
{'type': 'loss', 'content': 0.051919080317020416, 'timestamp': '2025-10-02 01:08:02.528014', 'step': 31921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:08:02.596041', 'step': 31921, 'epoch': 3}
{'type': 'loss', 'content': 0.009060838259756565, 'timestamp': '2025-10-02 01:08:02.606865', 'step': 31922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:02.666088', 'step': 31922, 'epoch': 3}
{'type': 'loss', 'content': 0.09499112516641617, 'timestamp': '2025-10-02 01:08:02.668864', 'step': 31923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:02.728726', 'step': 31923, 'epoch': 3}
{'type': 'loss', 'content': 0.04709424450993538, 'timestamp': '2025-10-02 01:08:02.734724', 'step': 31924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:02.790533', 'step': 31924, 'epoch': 3}
{'type': 'loss', 'content': 0.011366641148924828, 'timestamp': '2025-10-02 01:08:02.800475', 'step': 31925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:08:02.854866', 'step': 31925, 'epoch': 3}
{'type': 'loss', 'content': 0.0871349424123764, 'timestamp': '2025-10-02 01:08:02.858721', 'step': 31926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:08:02.913507', 'step': 31926, 'epoch': 3}
{'type': 'loss', 'content': 0.02511231042444706, 'timestamp': '2025-10-02 01:08:02.915866', 'step': 31927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:02.976292', 'step': 31927, 'epoch': 3}
{'type': 'loss', 'content': 0.01748177781701088, 'timestamp': '2025-10-02 01:08:02.987233', 'step': 31928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:03.042265', 'step': 31928, 'epoch': 3}
{'type': 'loss', 'content': 0.028683559969067574, 'timestamp': '2025-10-02 01:08:03.044367', 'step': 31929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:03.098696', 'step': 31929, 'epoch': 3}
{'type': 'loss', 'content': 0.027186185121536255, 'timestamp': '2025-10-02 01:08:03.100952', 'step': 31930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:03.155816', 'step': 31930, 'epoch': 3}
{'type': 'loss', 'content': 0.053757231682538986, 'timestamp': '2025-10-02 01:08:03.158681', 'step': 31931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:03.213536', 'step': 31931, 'epoch': 3}
{'type': 'loss', 'content': 0.0028116332832723856, 'timestamp': '2025-10-02 01:08:03.221849', 'step': 31932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:03.276128', 'step': 31932, 'epoch': 3}
{'type': 'loss', 'content': 0.04393959417939186, 'timestamp': '2025-10-02 01:08:03.283689', 'step': 31933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:03.339215', 'step': 31933, 'epoch': 3}
{'type': 'loss', 'content': 0.011696490459144115, 'timestamp': '2025-10-02 01:08:03.341704', 'step': 31934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:03.397589', 'step': 31934, 'epoch': 3}
{'type': 'loss', 'content': 0.04734449461102486, 'timestamp': '2025-10-02 01:08:03.407125', 'step': 31935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:03.460960', 'step': 31935, 'epoch': 3}
{'type': 'loss', 'content': 0.1237027570605278, 'timestamp': '2025-10-02 01:08:03.466896', 'step': 31936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:03.520717', 'step': 31936, 'epoch': 3}
{'type': 'loss', 'content': 0.10122153908014297, 'timestamp': '2025-10-02 01:08:03.523163', 'step': 31937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:08:03.577626', 'step': 31937, 'epoch': 3}
{'type': 'loss', 'content': 0.031105132773518562, 'timestamp': '2025-10-02 01:08:03.579873', 'step': 31938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:03.634547', 'step': 31938, 'epoch': 3}
{'type': 'loss', 'content': 0.03583299741148949, 'timestamp': '2025-10-02 01:08:03.640356', 'step': 31939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:03.695173', 'step': 31939, 'epoch': 3}
{'type': 'loss', 'content': 0.06686341017484665, 'timestamp': '2025-10-02 01:08:03.703110', 'step': 31940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:03.757190', 'step': 31940, 'epoch': 3}
{'type': 'loss', 'content': 0.05308402702212334, 'timestamp': '2025-10-02 01:08:03.759655', 'step': 31941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:03.813566', 'step': 31941, 'epoch': 3}
{'type': 'loss', 'content': 0.023901499807834625, 'timestamp': '2025-10-02 01:08:03.816054', 'step': 31942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:03.871099', 'step': 31942, 'epoch': 3}
{'type': 'loss', 'content': 0.019288212060928345, 'timestamp': '2025-10-02 01:08:03.873493', 'step': 31943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:03.927376', 'step': 31943, 'epoch': 3}
{'type': 'loss', 'content': 0.07559913396835327, 'timestamp': '2025-10-02 01:08:03.933239', 'step': 31944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:03.987512', 'step': 31944, 'epoch': 3}
{'type': 'loss', 'content': 0.04410505294799805, 'timestamp': '2025-10-02 01:08:03.989906', 'step': 31945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:04.045725', 'step': 31945, 'epoch': 3}
{'type': 'loss', 'content': 0.006281652953475714, 'timestamp': '2025-10-02 01:08:04.048428', 'step': 31946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:04.103080', 'step': 31946, 'epoch': 3}
{'type': 'loss', 'content': 0.01531952153891325, 'timestamp': '2025-10-02 01:08:04.105495', 'step': 31947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:04.159366', 'step': 31947, 'epoch': 3}
{'type': 'loss', 'content': 0.022542186081409454, 'timestamp': '2025-10-02 01:08:04.166061', 'step': 31948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:04.220216', 'step': 31948, 'epoch': 3}
{'type': 'loss', 'content': 0.06017469987273216, 'timestamp': '2025-10-02 01:08:04.222705', 'step': 31949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:04.277455', 'step': 31949, 'epoch': 3}
{'type': 'loss', 'content': 0.041049640625715256, 'timestamp': '2025-10-02 01:08:04.280796', 'step': 31950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:04.335608', 'step': 31950, 'epoch': 3}
{'type': 'loss', 'content': 0.03398542478680611, 'timestamp': '2025-10-02 01:08:04.338215', 'step': 31951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:04.393119', 'step': 31951, 'epoch': 3}
{'type': 'loss', 'content': 0.024415908381342888, 'timestamp': '2025-10-02 01:08:04.399846', 'step': 31952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:04.453862', 'step': 31952, 'epoch': 3}
{'type': 'loss', 'content': 0.035057924687862396, 'timestamp': '2025-10-02 01:08:04.461518', 'step': 31953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:04.520305', 'step': 31953, 'epoch': 3}
{'type': 'loss', 'content': 0.017696548253297806, 'timestamp': '2025-10-02 01:08:04.530490', 'step': 31954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:04.585138', 'step': 31954, 'epoch': 3}
{'type': 'loss', 'content': 0.03082207590341568, 'timestamp': '2025-10-02 01:08:04.591066', 'step': 31955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:04.648820', 'step': 31955, 'epoch': 3}
{'type': 'loss', 'content': 0.022261913865804672, 'timestamp': '2025-10-02 01:08:04.655020', 'step': 31956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:04.708613', 'step': 31956, 'epoch': 3}
{'type': 'loss', 'content': 0.08477940410375595, 'timestamp': '2025-10-02 01:08:04.711701', 'step': 31957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:08:04.773086', 'step': 31957, 'epoch': 3}
{'type': 'loss', 'content': 0.0057820528745651245, 'timestamp': '2025-10-02 01:08:04.783533', 'step': 31958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:04.837952', 'step': 31958, 'epoch': 3}
{'type': 'loss', 'content': 0.025825228542089462, 'timestamp': '2025-10-02 01:08:04.845374', 'step': 31959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:04.899849', 'step': 31959, 'epoch': 3}
{'type': 'loss', 'content': 0.10605380684137344, 'timestamp': '2025-10-02 01:08:04.905999', 'step': 31960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:04.960146', 'step': 31960, 'epoch': 3}
{'type': 'loss', 'content': 0.0007670623599551618, 'timestamp': '2025-10-02 01:08:04.962569', 'step': 31961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:05.017458', 'step': 31961, 'epoch': 3}
{'type': 'loss', 'content': 0.043057870119810104, 'timestamp': '2025-10-02 01:08:05.019657', 'step': 31962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:05.075833', 'step': 31962, 'epoch': 3}
{'type': 'loss', 'content': 0.03433436155319214, 'timestamp': '2025-10-02 01:08:05.083399', 'step': 31963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:05.138492', 'step': 31963, 'epoch': 3}
{'type': 'loss', 'content': 0.028747886419296265, 'timestamp': '2025-10-02 01:08:05.144913', 'step': 31964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:05.198701', 'step': 31964, 'epoch': 3}
{'type': 'loss', 'content': 0.0711614340543747, 'timestamp': '2025-10-02 01:08:05.201345', 'step': 31965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:08:05.256834', 'step': 31965, 'epoch': 3}
{'type': 'loss', 'content': 0.027217160910367966, 'timestamp': '2025-10-02 01:08:05.259484', 'step': 31966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:05.315897', 'step': 31966, 'epoch': 3}
{'type': 'loss', 'content': 0.02830759435892105, 'timestamp': '2025-10-02 01:08:05.318698', 'step': 31967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:05.373118', 'step': 31967, 'epoch': 3}
{'type': 'loss', 'content': 0.04863209277391434, 'timestamp': '2025-10-02 01:08:05.383281', 'step': 31968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:05.438386', 'step': 31968, 'epoch': 3}
{'type': 'loss', 'content': 0.016706613823771477, 'timestamp': '2025-10-02 01:08:05.440917', 'step': 31969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:05.497234', 'step': 31969, 'epoch': 3}
{'type': 'loss', 'content': 0.02462892420589924, 'timestamp': '2025-10-02 01:08:05.506765', 'step': 31970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:05.561673', 'step': 31970, 'epoch': 3}
{'type': 'loss', 'content': 0.036995694041252136, 'timestamp': '2025-10-02 01:08:05.567503', 'step': 31971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:05.621816', 'step': 31971, 'epoch': 3}
{'type': 'loss', 'content': 0.07273194193840027, 'timestamp': '2025-10-02 01:08:05.628079', 'step': 31972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:05.682670', 'step': 31972, 'epoch': 3}
{'type': 'loss', 'content': 0.0992221087217331, 'timestamp': '2025-10-02 01:08:05.684839', 'step': 31973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:05.740618', 'step': 31973, 'epoch': 3}
{'type': 'loss', 'content': 0.1148650199174881, 'timestamp': '2025-10-02 01:08:05.744748', 'step': 31974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:05.804888', 'step': 31974, 'epoch': 3}
{'type': 'loss', 'content': 0.0253088790923357, 'timestamp': '2025-10-02 01:08:05.814468', 'step': 31975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:05.868702', 'step': 31975, 'epoch': 3}
{'type': 'loss', 'content': 0.07356802374124527, 'timestamp': '2025-10-02 01:08:05.875429', 'step': 31976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:05.929023', 'step': 31976, 'epoch': 3}
{'type': 'loss', 'content': 0.06502717733383179, 'timestamp': '2025-10-02 01:08:05.931376', 'step': 31977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:05.985667', 'step': 31977, 'epoch': 3}
{'type': 'loss', 'content': 0.030610954388976097, 'timestamp': '2025-10-02 01:08:05.988274', 'step': 31978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:06.042674', 'step': 31978, 'epoch': 3}
{'type': 'loss', 'content': 0.03777434676885605, 'timestamp': '2025-10-02 01:08:06.045227', 'step': 31979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:06.099670', 'step': 31979, 'epoch': 3}
{'type': 'loss', 'content': 0.054814863950014114, 'timestamp': '2025-10-02 01:08:06.106075', 'step': 31980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:08:06.173075', 'step': 31980, 'epoch': 3}
{'type': 'loss', 'content': 0.021029135212302208, 'timestamp': '2025-10-02 01:08:06.186038', 'step': 31981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:06.242399', 'step': 31981, 'epoch': 3}
{'type': 'loss', 'content': 0.006643933244049549, 'timestamp': '2025-10-02 01:08:06.251939', 'step': 31982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:06.306380', 'step': 31982, 'epoch': 3}
{'type': 'loss', 'content': 0.04490317031741142, 'timestamp': '2025-10-02 01:08:06.308850', 'step': 31983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:06.362879', 'step': 31983, 'epoch': 3}
{'type': 'loss', 'content': 0.10480112582445145, 'timestamp': '2025-10-02 01:08:06.369307', 'step': 31984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:08:06.424047', 'step': 31984, 'epoch': 3}
{'type': 'loss', 'content': 0.03630029782652855, 'timestamp': '2025-10-02 01:08:06.426989', 'step': 31985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:08:06.481702', 'step': 31985, 'epoch': 3}
{'type': 'loss', 'content': 0.058668069541454315, 'timestamp': '2025-10-02 01:08:06.484109', 'step': 31986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:06.540714', 'step': 31986, 'epoch': 3}
{'type': 'loss', 'content': 0.011273694224655628, 'timestamp': '2025-10-02 01:08:06.543243', 'step': 31987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:06.597910', 'step': 31987, 'epoch': 3}
{'type': 'loss', 'content': 0.0379275381565094, 'timestamp': '2025-10-02 01:08:06.604459', 'step': 31988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:06.659212', 'step': 31988, 'epoch': 3}
{'type': 'loss', 'content': 0.00029506825376302004, 'timestamp': '2025-10-02 01:08:06.666720', 'step': 31989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:06.721927', 'step': 31989, 'epoch': 3}
{'type': 'loss', 'content': 0.04252920299768448, 'timestamp': '2025-10-02 01:08:06.724622', 'step': 31990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:06.780205', 'step': 31990, 'epoch': 3}
{'type': 'loss', 'content': 0.04819389805197716, 'timestamp': '2025-10-02 01:08:06.783160', 'step': 31991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:08:06.845091', 'step': 31991, 'epoch': 3}
{'type': 'loss', 'content': 0.029941491782665253, 'timestamp': '2025-10-02 01:08:06.856302', 'step': 31992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:06.911799', 'step': 31992, 'epoch': 3}
{'type': 'loss', 'content': 0.11383865773677826, 'timestamp': '2025-10-02 01:08:06.914763', 'step': 31993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:06.971129', 'step': 31993, 'epoch': 3}
{'type': 'loss', 'content': 0.023119589313864708, 'timestamp': '2025-10-02 01:08:06.980656', 'step': 31994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:07.036669', 'step': 31994, 'epoch': 3}
{'type': 'loss', 'content': 0.0035405538510531187, 'timestamp': '2025-10-02 01:08:07.039212', 'step': 31995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:07.093979', 'step': 31995, 'epoch': 3}
{'type': 'loss', 'content': 0.012364418245851994, 'timestamp': '2025-10-02 01:08:07.099991', 'step': 31996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:07.154648', 'step': 31996, 'epoch': 3}
{'type': 'loss', 'content': 0.020724277943372726, 'timestamp': '2025-10-02 01:08:07.157560', 'step': 31997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:07.211719', 'step': 31997, 'epoch': 3}
{'type': 'loss', 'content': 0.06722747534513474, 'timestamp': '2025-10-02 01:08:07.214577', 'step': 31998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:07.270333', 'step': 31998, 'epoch': 3}
{'type': 'loss', 'content': 0.06399495899677277, 'timestamp': '2025-10-02 01:08:07.272818', 'step': 31999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:07.328301', 'step': 31999, 'epoch': 3}
{'type': 'loss', 'content': 0.03570994362235069, 'timestamp': '2025-10-02 01:08:07.338634', 'step': 32000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 32000', 'timestamp': '2025-10-02 01:08:07.752515', 'step': 32000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:07.808227', 'step': 32000, 'epoch': 3}
{'type': 'loss', 'content': 0.10340690612792969, 'timestamp': '2025-10-02 01:08:07.810660', 'step': 32001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:07.865679', 'step': 32001, 'epoch': 3}
{'type': 'loss', 'content': 0.044413261115550995, 'timestamp': '2025-10-02 01:08:07.868234', 'step': 32002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:07.922676', 'step': 32002, 'epoch': 3}
{'type': 'loss', 'content': 0.014356566593050957, 'timestamp': '2025-10-02 01:08:07.929764', 'step': 32003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:07.984088', 'step': 32003, 'epoch': 3}
{'type': 'loss', 'content': 0.06862174719572067, 'timestamp': '2025-10-02 01:08:07.990275', 'step': 32004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:08:08.051590', 'step': 32004, 'epoch': 3}
{'type': 'loss', 'content': 0.021499289199709892, 'timestamp': '2025-10-02 01:08:08.062885', 'step': 32005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:08.117833', 'step': 32005, 'epoch': 3}
{'type': 'loss', 'content': 0.02205243706703186, 'timestamp': '2025-10-02 01:08:08.122472', 'step': 32006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:08.191747', 'step': 32006, 'epoch': 3}
{'type': 'loss', 'content': 0.0045705256052315235, 'timestamp': '2025-10-02 01:08:08.197560', 'step': 32007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:08:08.315382', 'step': 32007, 'epoch': 3}
{'type': 'loss', 'content': 0.017504645511507988, 'timestamp': '2025-10-02 01:08:08.324396', 'step': 32008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:08.402066', 'step': 32008, 'epoch': 3}
{'type': 'loss', 'content': 0.004368524998426437, 'timestamp': '2025-10-02 01:08:08.412340', 'step': 32009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:08.474180', 'step': 32009, 'epoch': 3}
{'type': 'loss', 'content': 0.1079927310347557, 'timestamp': '2025-10-02 01:08:08.481684', 'step': 32010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:08.544287', 'step': 32010, 'epoch': 3}
{'type': 'loss', 'content': 0.06112273782491684, 'timestamp': '2025-10-02 01:08:08.547591', 'step': 32011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:08.619761', 'step': 32011, 'epoch': 3}
{'type': 'loss', 'content': 0.0578596293926239, 'timestamp': '2025-10-02 01:08:08.626900', 'step': 32012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:08.691852', 'step': 32012, 'epoch': 3}
{'type': 'loss', 'content': 0.07004896551370621, 'timestamp': '2025-10-02 01:08:08.700585', 'step': 32013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:08.767429', 'step': 32013, 'epoch': 3}
{'type': 'loss', 'content': 0.05631143972277641, 'timestamp': '2025-10-02 01:08:08.775740', 'step': 32014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:08.836381', 'step': 32014, 'epoch': 3}
{'type': 'loss', 'content': 0.011109367944300175, 'timestamp': '2025-10-02 01:08:08.839682', 'step': 32015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:08.897274', 'step': 32015, 'epoch': 3}
{'type': 'loss', 'content': 0.05942638963460922, 'timestamp': '2025-10-02 01:08:08.908321', 'step': 32016, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 01:08:39.478868', 'step': 32016, 'epoch': 3}
{'type': 'pplx', 'content': 96.81838975426949, 'timestamp': '2025-10-02 01:08:39.483706', 'step': 32016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:39.559381', 'step': 32016, 'epoch': 3}
{'type': 'loss', 'content': 0.0851333811879158, 'timestamp': '2025-10-02 01:08:39.562537', 'step': 32017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:39.664709', 'step': 32017, 'epoch': 3}
{'type': 'loss', 'content': 0.07455407828092575, 'timestamp': '2025-10-02 01:08:39.668979', 'step': 32018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:39.764385', 'step': 32018, 'epoch': 3}
{'type': 'loss', 'content': 0.17782604694366455, 'timestamp': '2025-10-02 01:08:39.771238', 'step': 32019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:39.863676', 'step': 32019, 'epoch': 3}
{'type': 'loss', 'content': 0.0033308914862573147, 'timestamp': '2025-10-02 01:08:39.882299', 'step': 32020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:39.977484', 'step': 32020, 'epoch': 3}
{'type': 'loss', 'content': 0.029616985470056534, 'timestamp': '2025-10-02 01:08:39.995114', 'step': 32021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:08:40.102429', 'step': 32021, 'epoch': 3}
{'type': 'loss', 'content': 0.0024358925875276327, 'timestamp': '2025-10-02 01:08:40.114373', 'step': 32022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:40.190723', 'step': 32022, 'epoch': 3}
{'type': 'loss', 'content': 0.017206350341439247, 'timestamp': '2025-10-02 01:08:40.200900', 'step': 32023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:40.267029', 'step': 32023, 'epoch': 3}
{'type': 'loss', 'content': 0.000770147773437202, 'timestamp': '2025-10-02 01:08:40.286739', 'step': 32024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:40.366689', 'step': 32024, 'epoch': 3}
{'type': 'loss', 'content': 0.04733188822865486, 'timestamp': '2025-10-02 01:08:40.371462', 'step': 32025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:40.434177', 'step': 32025, 'epoch': 3}
{'type': 'loss', 'content': 0.06503162533044815, 'timestamp': '2025-10-02 01:08:40.440135', 'step': 32026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:08:40.523590', 'step': 32026, 'epoch': 3}
{'type': 'loss', 'content': 0.037410616874694824, 'timestamp': '2025-10-02 01:08:40.535543', 'step': 32027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:08:40.604242', 'step': 32027, 'epoch': 3}
{'type': 'loss', 'content': 0.007333702873438597, 'timestamp': '2025-10-02 01:08:40.615681', 'step': 32028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:08:40.675031', 'step': 32028, 'epoch': 3}
{'type': 'loss', 'content': 0.05093517154455185, 'timestamp': '2025-10-02 01:08:40.679629', 'step': 32029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:40.750859', 'step': 32029, 'epoch': 3}
{'type': 'loss', 'content': 0.11062534153461456, 'timestamp': '2025-10-02 01:08:40.765571', 'step': 32030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:40.850860', 'step': 32030, 'epoch': 3}
{'type': 'loss', 'content': 0.038314010947942734, 'timestamp': '2025-10-02 01:08:40.854439', 'step': 32031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:40.913645', 'step': 32031, 'epoch': 3}
{'type': 'loss', 'content': 0.01835893839597702, 'timestamp': '2025-10-02 01:08:40.932722', 'step': 32032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:41.000699', 'step': 32032, 'epoch': 3}
{'type': 'loss', 'content': 0.06042473390698433, 'timestamp': '2025-10-02 01:08:41.015990', 'step': 32033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:41.094844', 'step': 32033, 'epoch': 3}
{'type': 'loss', 'content': 0.005090180318802595, 'timestamp': '2025-10-02 01:08:41.100121', 'step': 32034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:41.168253', 'step': 32034, 'epoch': 3}
{'type': 'loss', 'content': 0.012543884105980396, 'timestamp': '2025-10-02 01:08:41.180366', 'step': 32035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:41.260387', 'step': 32035, 'epoch': 3}
{'type': 'loss', 'content': 0.052777837961912155, 'timestamp': '2025-10-02 01:08:41.267694', 'step': 32036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:41.345881', 'step': 32036, 'epoch': 3}
{'type': 'loss', 'content': 0.015042724087834358, 'timestamp': '2025-10-02 01:08:41.351792', 'step': 32037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:41.418104', 'step': 32037, 'epoch': 3}
{'type': 'loss', 'content': 0.06550028920173645, 'timestamp': '2025-10-02 01:08:41.423637', 'step': 32038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:41.484393', 'step': 32038, 'epoch': 3}
{'type': 'loss', 'content': 0.00786673929542303, 'timestamp': '2025-10-02 01:08:41.493772', 'step': 32039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:41.574085', 'step': 32039, 'epoch': 3}
{'type': 'loss', 'content': 0.03297712281346321, 'timestamp': '2025-10-02 01:08:41.593844', 'step': 32040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:41.652804', 'step': 32040, 'epoch': 3}
{'type': 'loss', 'content': 0.005179980304092169, 'timestamp': '2025-10-02 01:08:41.660307', 'step': 32041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:41.722707', 'step': 32041, 'epoch': 3}
{'type': 'loss', 'content': 0.060185085982084274, 'timestamp': '2025-10-02 01:08:41.728679', 'step': 32042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:08:41.796983', 'step': 32042, 'epoch': 3}
{'type': 'loss', 'content': 0.023013882339000702, 'timestamp': '2025-10-02 01:08:41.807487', 'step': 32043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:41.870516', 'step': 32043, 'epoch': 3}
{'type': 'loss', 'content': 0.09977582097053528, 'timestamp': '2025-10-02 01:08:41.878221', 'step': 32044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:41.943694', 'step': 32044, 'epoch': 3}
{'type': 'loss', 'content': 0.06107757240533829, 'timestamp': '2025-10-02 01:08:41.961500', 'step': 32045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:42.027692', 'step': 32045, 'epoch': 3}
{'type': 'loss', 'content': 0.005804453510791063, 'timestamp': '2025-10-02 01:08:42.037888', 'step': 32046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:08:42.100325', 'step': 32046, 'epoch': 3}
{'type': 'loss', 'content': 0.0370253250002861, 'timestamp': '2025-10-02 01:08:42.105583', 'step': 32047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:42.167043', 'step': 32047, 'epoch': 3}
{'type': 'loss', 'content': 0.0828242227435112, 'timestamp': '2025-10-02 01:08:42.187152', 'step': 32048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:42.246656', 'step': 32048, 'epoch': 3}
{'type': 'loss', 'content': 0.02481972426176071, 'timestamp': '2025-10-02 01:08:42.252179', 'step': 32049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:08:42.330759', 'step': 32049, 'epoch': 3}
{'type': 'loss', 'content': 0.00539857754483819, 'timestamp': '2025-10-02 01:08:42.341228', 'step': 32050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:08:42.419444', 'step': 32050, 'epoch': 3}
{'type': 'loss', 'content': 0.0035458337515592575, 'timestamp': '2025-10-02 01:08:42.431899', 'step': 32051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:42.494818', 'step': 32051, 'epoch': 3}
{'type': 'loss', 'content': 0.0017554470105096698, 'timestamp': '2025-10-02 01:08:42.503616', 'step': 32052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:42.567793', 'step': 32052, 'epoch': 3}
{'type': 'loss', 'content': 0.04804694652557373, 'timestamp': '2025-10-02 01:08:42.573912', 'step': 32053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:42.635523', 'step': 32053, 'epoch': 3}
{'type': 'loss', 'content': 0.03908621519804001, 'timestamp': '2025-10-02 01:08:42.639148', 'step': 32054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:42.698378', 'step': 32054, 'epoch': 3}
{'type': 'loss', 'content': 0.11167904734611511, 'timestamp': '2025-10-02 01:08:42.703562', 'step': 32055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:42.764186', 'step': 32055, 'epoch': 3}
{'type': 'loss', 'content': 0.042909957468509674, 'timestamp': '2025-10-02 01:08:42.771399', 'step': 32056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:42.844560', 'step': 32056, 'epoch': 3}
{'type': 'loss', 'content': 0.015768449753522873, 'timestamp': '2025-10-02 01:08:42.849084', 'step': 32057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:08:42.926952', 'step': 32057, 'epoch': 3}
{'type': 'loss', 'content': 0.019269391894340515, 'timestamp': '2025-10-02 01:08:42.943370', 'step': 32058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:08:43.032392', 'step': 32058, 'epoch': 3}
{'type': 'loss', 'content': 0.048649538308382034, 'timestamp': '2025-10-02 01:08:43.042870', 'step': 32059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:43.103436', 'step': 32059, 'epoch': 3}
{'type': 'loss', 'content': 0.03190295398235321, 'timestamp': '2025-10-02 01:08:43.111283', 'step': 32060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:43.174327', 'step': 32060, 'epoch': 3}
{'type': 'loss', 'content': 0.06869111210107803, 'timestamp': '2025-10-02 01:08:43.185303', 'step': 32061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:43.247389', 'step': 32061, 'epoch': 3}
{'type': 'loss', 'content': 0.08867810666561127, 'timestamp': '2025-10-02 01:08:43.266516', 'step': 32062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:43.327749', 'step': 32062, 'epoch': 3}
{'type': 'loss', 'content': 0.07446472346782684, 'timestamp': '2025-10-02 01:08:43.333616', 'step': 32063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:43.397210', 'step': 32063, 'epoch': 3}
{'type': 'loss', 'content': 0.009698383510112762, 'timestamp': '2025-10-02 01:08:43.406029', 'step': 32064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:43.466963', 'step': 32064, 'epoch': 3}
{'type': 'loss', 'content': 0.06361273676156998, 'timestamp': '2025-10-02 01:08:43.477224', 'step': 32065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:43.539278', 'step': 32065, 'epoch': 3}
{'type': 'loss', 'content': 0.010054802522063255, 'timestamp': '2025-10-02 01:08:43.544368', 'step': 32066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:43.604337', 'step': 32066, 'epoch': 3}
{'type': 'loss', 'content': 0.024472368881106377, 'timestamp': '2025-10-02 01:08:43.610205', 'step': 32067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:43.670664', 'step': 32067, 'epoch': 3}
{'type': 'loss', 'content': 0.023551160469651222, 'timestamp': '2025-10-02 01:08:43.678354', 'step': 32068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:43.773326', 'step': 32068, 'epoch': 3}
{'type': 'loss', 'content': 0.02803633362054825, 'timestamp': '2025-10-02 01:08:43.792058', 'step': 32069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:43.876267', 'step': 32069, 'epoch': 3}
{'type': 'loss', 'content': 0.044405821710824966, 'timestamp': '2025-10-02 01:08:43.881112', 'step': 32070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:43.971885', 'step': 32070, 'epoch': 3}
{'type': 'loss', 'content': 0.07306606322526932, 'timestamp': '2025-10-02 01:08:43.995272', 'step': 32071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:44.072297', 'step': 32071, 'epoch': 3}
{'type': 'loss', 'content': 0.03707170858979225, 'timestamp': '2025-10-02 01:08:44.095074', 'step': 32072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:44.157304', 'step': 32072, 'epoch': 3}
{'type': 'loss', 'content': 0.039541106671094894, 'timestamp': '2025-10-02 01:08:44.173524', 'step': 32073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:44.265611', 'step': 32073, 'epoch': 3}
{'type': 'loss', 'content': 0.1409239023923874, 'timestamp': '2025-10-02 01:08:44.273115', 'step': 32074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:44.364508', 'step': 32074, 'epoch': 3}
{'type': 'loss', 'content': 0.03655758127570152, 'timestamp': '2025-10-02 01:08:44.368875', 'step': 32075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:44.428073', 'step': 32075, 'epoch': 3}
{'type': 'loss', 'content': 0.046246159821748734, 'timestamp': '2025-10-02 01:08:44.436659', 'step': 32076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:44.508038', 'step': 32076, 'epoch': 3}
{'type': 'loss', 'content': 0.05652958154678345, 'timestamp': '2025-10-02 01:08:44.511962', 'step': 32077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:44.595282', 'step': 32077, 'epoch': 3}
{'type': 'loss', 'content': 0.019683504477143288, 'timestamp': '2025-10-02 01:08:44.614913', 'step': 32078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:44.677279', 'step': 32078, 'epoch': 3}
{'type': 'loss', 'content': 0.01900026574730873, 'timestamp': '2025-10-02 01:08:44.696274', 'step': 32079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:44.777678', 'step': 32079, 'epoch': 3}
{'type': 'loss', 'content': 0.0368242971599102, 'timestamp': '2025-10-02 01:08:44.785074', 'step': 32080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:44.876912', 'step': 32080, 'epoch': 3}
{'type': 'loss', 'content': 0.03276665508747101, 'timestamp': '2025-10-02 01:08:44.884292', 'step': 32081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:44.955024', 'step': 32081, 'epoch': 3}
{'type': 'loss', 'content': 0.013634869828820229, 'timestamp': '2025-10-02 01:08:44.971461', 'step': 32082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:45.030417', 'step': 32082, 'epoch': 3}
{'type': 'loss', 'content': 0.10863052308559418, 'timestamp': '2025-10-02 01:08:45.036343', 'step': 32083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:45.105177', 'step': 32083, 'epoch': 3}
{'type': 'loss', 'content': 0.034568365663290024, 'timestamp': '2025-10-02 01:08:45.113577', 'step': 32084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:45.173470', 'step': 32084, 'epoch': 3}
{'type': 'loss', 'content': 0.0009491569944657385, 'timestamp': '2025-10-02 01:08:45.183762', 'step': 32085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:45.245227', 'step': 32085, 'epoch': 3}
{'type': 'loss', 'content': 0.044428035616874695, 'timestamp': '2025-10-02 01:08:45.261696', 'step': 32086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:45.322292', 'step': 32086, 'epoch': 3}
{'type': 'loss', 'content': 0.08143239468336105, 'timestamp': '2025-10-02 01:08:45.326202', 'step': 32087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:45.398665', 'step': 32087, 'epoch': 3}
{'type': 'loss', 'content': 0.05065714940428734, 'timestamp': '2025-10-02 01:08:45.406386', 'step': 32088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:45.494266', 'step': 32088, 'epoch': 3}
{'type': 'loss', 'content': 0.05593787878751755, 'timestamp': '2025-10-02 01:08:45.506699', 'step': 32089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:45.596758', 'step': 32089, 'epoch': 3}
{'type': 'loss', 'content': 0.023110002279281616, 'timestamp': '2025-10-02 01:08:45.611110', 'step': 32090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:45.687066', 'step': 32090, 'epoch': 3}
{'type': 'loss', 'content': 0.02140813134610653, 'timestamp': '2025-10-02 01:08:45.697191', 'step': 32091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:45.755727', 'step': 32091, 'epoch': 3}
{'type': 'loss', 'content': 0.03506539762020111, 'timestamp': '2025-10-02 01:08:45.765774', 'step': 32092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:08:45.860197', 'step': 32092, 'epoch': 3}
{'type': 'loss', 'content': 0.017296215519309044, 'timestamp': '2025-10-02 01:08:45.875268', 'step': 32093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:45.957683', 'step': 32093, 'epoch': 3}
{'type': 'loss', 'content': 0.04316667467355728, 'timestamp': '2025-10-02 01:08:45.974838', 'step': 32094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:08:46.082133', 'step': 32094, 'epoch': 3}
{'type': 'loss', 'content': 0.04446021094918251, 'timestamp': '2025-10-02 01:08:46.094339', 'step': 32095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:46.174687', 'step': 32095, 'epoch': 3}
{'type': 'loss', 'content': 0.0427413135766983, 'timestamp': '2025-10-02 01:08:46.194395', 'step': 32096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:46.278909', 'step': 32096, 'epoch': 3}
{'type': 'loss', 'content': 0.017281468957662582, 'timestamp': '2025-10-02 01:08:46.283370', 'step': 32097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:46.363238', 'step': 32097, 'epoch': 3}
{'type': 'loss', 'content': 0.03818703442811966, 'timestamp': '2025-10-02 01:08:46.369376', 'step': 32098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:46.432204', 'step': 32098, 'epoch': 3}
{'type': 'loss', 'content': 0.12014885246753693, 'timestamp': '2025-10-02 01:08:46.438018', 'step': 32099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:46.513667', 'step': 32099, 'epoch': 3}
{'type': 'loss', 'content': 0.1033724993467331, 'timestamp': '2025-10-02 01:08:46.524528', 'step': 32100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:46.613149', 'step': 32100, 'epoch': 3}
{'type': 'loss', 'content': 0.05639506131410599, 'timestamp': '2025-10-02 01:08:46.634637', 'step': 32101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:46.739049', 'step': 32101, 'epoch': 3}
{'type': 'loss', 'content': 0.03374155983328819, 'timestamp': '2025-10-02 01:08:46.742923', 'step': 32102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:46.818219', 'step': 32102, 'epoch': 3}
{'type': 'loss', 'content': 0.03498127683997154, 'timestamp': '2025-10-02 01:08:46.822752', 'step': 32103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:46.895626', 'step': 32103, 'epoch': 3}
{'type': 'loss', 'content': 0.0044793919660151005, 'timestamp': '2025-10-02 01:08:46.905938', 'step': 32104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:46.979757', 'step': 32104, 'epoch': 3}
{'type': 'loss', 'content': 0.08804899454116821, 'timestamp': '2025-10-02 01:08:47.001302', 'step': 32105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:47.098451', 'step': 32105, 'epoch': 3}
{'type': 'loss', 'content': 0.06142507493495941, 'timestamp': '2025-10-02 01:08:47.105887', 'step': 32106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:08:47.190960', 'step': 32106, 'epoch': 3}
{'type': 'loss', 'content': 0.08231549710035324, 'timestamp': '2025-10-02 01:08:47.196512', 'step': 32107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:08:47.277122', 'step': 32107, 'epoch': 3}
{'type': 'loss', 'content': 0.043849192559719086, 'timestamp': '2025-10-02 01:08:47.288398', 'step': 32108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:47.347405', 'step': 32108, 'epoch': 3}
{'type': 'loss', 'content': 0.05215431749820709, 'timestamp': '2025-10-02 01:08:47.352458', 'step': 32109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:47.416546', 'step': 32109, 'epoch': 3}
{'type': 'loss', 'content': 0.039136629551649094, 'timestamp': '2025-10-02 01:08:47.424167', 'step': 32110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:47.508824', 'step': 32110, 'epoch': 3}
{'type': 'loss', 'content': 0.02453259751200676, 'timestamp': '2025-10-02 01:08:47.517904', 'step': 32111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:47.592272', 'step': 32111, 'epoch': 3}
{'type': 'loss', 'content': 0.0637272372841835, 'timestamp': '2025-10-02 01:08:47.600400', 'step': 32112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:47.660850', 'step': 32112, 'epoch': 3}
{'type': 'loss', 'content': 0.01733529008924961, 'timestamp': '2025-10-02 01:08:47.673420', 'step': 32113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:47.748668', 'step': 32113, 'epoch': 3}
{'type': 'loss', 'content': 0.025379694998264313, 'timestamp': '2025-10-02 01:08:47.767916', 'step': 32114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:47.852758', 'step': 32114, 'epoch': 3}
{'type': 'loss', 'content': 0.07020937651395798, 'timestamp': '2025-10-02 01:08:47.857123', 'step': 32115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:47.930223', 'step': 32115, 'epoch': 3}
{'type': 'loss', 'content': 0.054032113403081894, 'timestamp': '2025-10-02 01:08:47.937262', 'step': 32116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:08:48.007499', 'step': 32116, 'epoch': 3}
{'type': 'loss', 'content': 0.08939657360315323, 'timestamp': '2025-10-02 01:08:48.011291', 'step': 32117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:48.071344', 'step': 32117, 'epoch': 3}
{'type': 'loss', 'content': 0.0021483758464455605, 'timestamp': '2025-10-02 01:08:48.080719', 'step': 32118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:48.141424', 'step': 32118, 'epoch': 3}
{'type': 'loss', 'content': 0.037626713514328, 'timestamp': '2025-10-02 01:08:48.147357', 'step': 32119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:48.208379', 'step': 32119, 'epoch': 3}
{'type': 'loss', 'content': 0.02795858308672905, 'timestamp': '2025-10-02 01:08:48.215698', 'step': 32120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:48.276242', 'step': 32120, 'epoch': 3}
{'type': 'loss', 'content': 0.05783741548657417, 'timestamp': '2025-10-02 01:08:48.280118', 'step': 32121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:48.342600', 'step': 32121, 'epoch': 3}
{'type': 'loss', 'content': 0.09001803398132324, 'timestamp': '2025-10-02 01:08:48.351568', 'step': 32122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:48.410547', 'step': 32122, 'epoch': 3}
{'type': 'loss', 'content': 0.04356519505381584, 'timestamp': '2025-10-02 01:08:48.415748', 'step': 32123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:08:48.476133', 'step': 32123, 'epoch': 3}
{'type': 'loss', 'content': 0.006640241015702486, 'timestamp': '2025-10-02 01:08:48.485871', 'step': 32124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:48.560135', 'step': 32124, 'epoch': 3}
{'type': 'loss', 'content': 0.13599199056625366, 'timestamp': '2025-10-02 01:08:48.565679', 'step': 32125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:48.625329', 'step': 32125, 'epoch': 3}
{'type': 'loss', 'content': 0.05909488722681999, 'timestamp': '2025-10-02 01:08:48.640240', 'step': 32126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:48.700156', 'step': 32126, 'epoch': 3}
{'type': 'loss', 'content': 0.010070611722767353, 'timestamp': '2025-10-02 01:08:48.709715', 'step': 32127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:48.780170', 'step': 32127, 'epoch': 3}
{'type': 'loss', 'content': 0.09899429976940155, 'timestamp': '2025-10-02 01:08:48.787124', 'step': 32128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:48.854254', 'step': 32128, 'epoch': 3}
{'type': 'loss', 'content': 0.040185101330280304, 'timestamp': '2025-10-02 01:08:48.865198', 'step': 32129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:48.937023', 'step': 32129, 'epoch': 3}
{'type': 'loss', 'content': 0.05137147381901741, 'timestamp': '2025-10-02 01:08:48.940894', 'step': 32130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:49.026172', 'step': 32130, 'epoch': 3}
{'type': 'loss', 'content': 0.010507859289646149, 'timestamp': '2025-10-02 01:08:49.033556', 'step': 32131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:49.104050', 'step': 32131, 'epoch': 3}
{'type': 'loss', 'content': 0.02617812156677246, 'timestamp': '2025-10-02 01:08:49.122611', 'step': 32132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:49.180661', 'step': 32132, 'epoch': 3}
{'type': 'loss', 'content': 0.1268785148859024, 'timestamp': '2025-10-02 01:08:49.184217', 'step': 32133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:49.263334', 'step': 32133, 'epoch': 3}
{'type': 'loss', 'content': 0.05478956922888756, 'timestamp': '2025-10-02 01:08:49.266693', 'step': 32134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:49.355294', 'step': 32134, 'epoch': 3}
{'type': 'loss', 'content': 0.015872299671173096, 'timestamp': '2025-10-02 01:08:49.359112', 'step': 32135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:49.421959', 'step': 32135, 'epoch': 3}
{'type': 'loss', 'content': 0.0191282220184803, 'timestamp': '2025-10-02 01:08:49.439424', 'step': 32136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:49.498286', 'step': 32136, 'epoch': 3}
{'type': 'loss', 'content': 0.05376684293150902, 'timestamp': '2025-10-02 01:08:49.505816', 'step': 32137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:49.565807', 'step': 32137, 'epoch': 3}
{'type': 'loss', 'content': 0.10527236759662628, 'timestamp': '2025-10-02 01:08:49.570852', 'step': 32138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:49.632843', 'step': 32138, 'epoch': 3}
{'type': 'loss', 'content': 0.030060555785894394, 'timestamp': '2025-10-02 01:08:49.642230', 'step': 32139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:49.704066', 'step': 32139, 'epoch': 3}
{'type': 'loss', 'content': 0.05794676020741463, 'timestamp': '2025-10-02 01:08:49.710927', 'step': 32140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:49.768962', 'step': 32140, 'epoch': 3}
{'type': 'loss', 'content': 0.053966376930475235, 'timestamp': '2025-10-02 01:08:49.773108', 'step': 32141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:49.834305', 'step': 32141, 'epoch': 3}
{'type': 'loss', 'content': 0.07895078510046005, 'timestamp': '2025-10-02 01:08:49.837707', 'step': 32142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:49.897701', 'step': 32142, 'epoch': 3}
{'type': 'loss', 'content': 0.08568379282951355, 'timestamp': '2025-10-02 01:08:49.912164', 'step': 32143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:49.985937', 'step': 32143, 'epoch': 3}
{'type': 'loss', 'content': 0.10240048170089722, 'timestamp': '2025-10-02 01:08:49.993135', 'step': 32144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:50.069355', 'step': 32144, 'epoch': 3}
{'type': 'loss', 'content': 0.015181254595518112, 'timestamp': '2025-10-02 01:08:50.073327', 'step': 32145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:50.131043', 'step': 32145, 'epoch': 3}
{'type': 'loss', 'content': 0.08976393193006516, 'timestamp': '2025-10-02 01:08:50.134765', 'step': 32146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:50.194508', 'step': 32146, 'epoch': 3}
{'type': 'loss', 'content': 0.022007212042808533, 'timestamp': '2025-10-02 01:08:50.203852', 'step': 32147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:50.288882', 'step': 32147, 'epoch': 3}
{'type': 'loss', 'content': 0.05439729243516922, 'timestamp': '2025-10-02 01:08:50.296363', 'step': 32148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:08:50.381164', 'step': 32148, 'epoch': 3}
{'type': 'loss', 'content': 0.023839201778173447, 'timestamp': '2025-10-02 01:08:50.394765', 'step': 32149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:50.454641', 'step': 32149, 'epoch': 3}
{'type': 'loss', 'content': 0.0008999669807963073, 'timestamp': '2025-10-02 01:08:50.462225', 'step': 32150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:50.522221', 'step': 32150, 'epoch': 3}
{'type': 'loss', 'content': 0.014331908896565437, 'timestamp': '2025-10-02 01:08:50.526158', 'step': 32151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:08:50.585539', 'step': 32151, 'epoch': 3}
{'type': 'loss', 'content': 0.11897466331720352, 'timestamp': '2025-10-02 01:08:50.594018', 'step': 32152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:50.664619', 'step': 32152, 'epoch': 3}
{'type': 'loss', 'content': 0.04899139702320099, 'timestamp': '2025-10-02 01:08:50.670333', 'step': 32153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:08:50.751485', 'step': 32153, 'epoch': 3}
{'type': 'loss', 'content': 0.009023403748869896, 'timestamp': '2025-10-02 01:08:50.761952', 'step': 32154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:08:50.822373', 'step': 32154, 'epoch': 3}
{'type': 'loss', 'content': 0.012461172416806221, 'timestamp': '2025-10-02 01:08:50.827374', 'step': 32155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:50.889047', 'step': 32155, 'epoch': 3}
{'type': 'loss', 'content': 0.07253987342119217, 'timestamp': '2025-10-02 01:08:50.897522', 'step': 32156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:50.959587', 'step': 32156, 'epoch': 3}
{'type': 'loss', 'content': 0.011564860120415688, 'timestamp': '2025-10-02 01:08:50.965274', 'step': 32157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:51.027569', 'step': 32157, 'epoch': 3}
{'type': 'loss', 'content': 0.017245227470993996, 'timestamp': '2025-10-02 01:08:51.030825', 'step': 32158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:51.103054', 'step': 32158, 'epoch': 3}
{'type': 'loss', 'content': 0.05640486255288124, 'timestamp': '2025-10-02 01:08:51.109256', 'step': 32159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:51.169367', 'step': 32159, 'epoch': 3}
{'type': 'loss', 'content': 0.012380082160234451, 'timestamp': '2025-10-02 01:08:51.176943', 'step': 32160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:08:51.258117', 'step': 32160, 'epoch': 3}
{'type': 'loss', 'content': 0.00397857092320919, 'timestamp': '2025-10-02 01:08:51.269786', 'step': 32161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:51.328195', 'step': 32161, 'epoch': 3}
{'type': 'loss', 'content': 0.14110758900642395, 'timestamp': '2025-10-02 01:08:51.332323', 'step': 32162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:51.401705', 'step': 32162, 'epoch': 3}
{'type': 'loss', 'content': 0.030964035540819168, 'timestamp': '2025-10-02 01:08:51.405185', 'step': 32163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:51.467608', 'step': 32163, 'epoch': 3}
{'type': 'loss', 'content': 0.1189456656575203, 'timestamp': '2025-10-02 01:08:51.476521', 'step': 32164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:51.542962', 'step': 32164, 'epoch': 3}
{'type': 'loss', 'content': 0.06760042905807495, 'timestamp': '2025-10-02 01:08:51.547387', 'step': 32165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:51.608155', 'step': 32165, 'epoch': 3}
{'type': 'loss', 'content': 0.02352055162191391, 'timestamp': '2025-10-02 01:08:51.627901', 'step': 32166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:51.692624', 'step': 32166, 'epoch': 3}
{'type': 'loss', 'content': 0.09473612159490585, 'timestamp': '2025-10-02 01:08:51.697409', 'step': 32167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:51.764806', 'step': 32167, 'epoch': 3}
{'type': 'loss', 'content': 0.01441118586808443, 'timestamp': '2025-10-02 01:08:51.775784', 'step': 32168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:51.838937', 'step': 32168, 'epoch': 3}
{'type': 'loss', 'content': 0.06417684257030487, 'timestamp': '2025-10-02 01:08:51.842449', 'step': 32169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:51.906939', 'step': 32169, 'epoch': 3}
{'type': 'loss', 'content': 0.02598542347550392, 'timestamp': '2025-10-02 01:08:51.911381', 'step': 32170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:51.969667', 'step': 32170, 'epoch': 3}
{'type': 'loss', 'content': 0.029391087591648102, 'timestamp': '2025-10-02 01:08:51.974626', 'step': 32171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:52.044900', 'step': 32171, 'epoch': 3}
{'type': 'loss', 'content': 0.05987193062901497, 'timestamp': '2025-10-02 01:08:52.053648', 'step': 32172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:52.113326', 'step': 32172, 'epoch': 3}
{'type': 'loss', 'content': 0.08745867758989334, 'timestamp': '2025-10-02 01:08:52.117667', 'step': 32173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:52.190189', 'step': 32173, 'epoch': 3}
{'type': 'loss', 'content': 0.07485660165548325, 'timestamp': '2025-10-02 01:08:52.196087', 'step': 32174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:52.274225', 'step': 32174, 'epoch': 3}
{'type': 'loss', 'content': 0.1190328299999237, 'timestamp': '2025-10-02 01:08:52.289013', 'step': 32175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:52.360785', 'step': 32175, 'epoch': 3}
{'type': 'loss', 'content': 0.02431352064013481, 'timestamp': '2025-10-02 01:08:52.369476', 'step': 32176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:52.438831', 'step': 32176, 'epoch': 3}
{'type': 'loss', 'content': 0.04627855122089386, 'timestamp': '2025-10-02 01:08:52.442057', 'step': 32177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:52.503536', 'step': 32177, 'epoch': 3}
{'type': 'loss', 'content': 0.020058540627360344, 'timestamp': '2025-10-02 01:08:52.516235', 'step': 32178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:52.589436', 'step': 32178, 'epoch': 3}
{'type': 'loss', 'content': 0.04707350581884384, 'timestamp': '2025-10-02 01:08:52.599611', 'step': 32179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:52.668086', 'step': 32179, 'epoch': 3}
{'type': 'loss', 'content': 0.014827389270067215, 'timestamp': '2025-10-02 01:08:52.675492', 'step': 32180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:52.759765', 'step': 32180, 'epoch': 3}
{'type': 'loss', 'content': 0.011741290800273418, 'timestamp': '2025-10-02 01:08:52.774457', 'step': 32181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 592], 'flops': 11840071943488.0}, 'timestamp': '2025-10-02 01:08:52.867619', 'step': 32181, 'epoch': 3}
{'type': 'loss', 'content': 0.028047895058989525, 'timestamp': '2025-10-02 01:08:52.883999', 'step': 32182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:52.945516', 'step': 32182, 'epoch': 3}
{'type': 'loss', 'content': 0.05135340616106987, 'timestamp': '2025-10-02 01:08:52.949372', 'step': 32183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:53.030619', 'step': 32183, 'epoch': 3}
{'type': 'loss', 'content': 0.04231896623969078, 'timestamp': '2025-10-02 01:08:53.047532', 'step': 32184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:53.114905', 'step': 32184, 'epoch': 3}
{'type': 'loss', 'content': 0.0171117652207613, 'timestamp': '2025-10-02 01:08:53.125167', 'step': 32185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:53.208865', 'step': 32185, 'epoch': 3}
{'type': 'loss', 'content': 0.03238480165600777, 'timestamp': '2025-10-02 01:08:53.221890', 'step': 32186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:53.306743', 'step': 32186, 'epoch': 3}
{'type': 'loss', 'content': 0.1728467494249344, 'timestamp': '2025-10-02 01:08:53.311998', 'step': 32187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:53.396482', 'step': 32187, 'epoch': 3}
{'type': 'loss', 'content': 0.03424451872706413, 'timestamp': '2025-10-02 01:08:53.411151', 'step': 32188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:08:53.476826', 'step': 32188, 'epoch': 3}
{'type': 'loss', 'content': 0.041321929544210434, 'timestamp': '2025-10-02 01:08:53.488404', 'step': 32189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:53.556070', 'step': 32189, 'epoch': 3}
{'type': 'loss', 'content': 0.07167835533618927, 'timestamp': '2025-10-02 01:08:53.558822', 'step': 32190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:53.615497', 'step': 32190, 'epoch': 3}
{'type': 'loss', 'content': 0.0249836016446352, 'timestamp': '2025-10-02 01:08:53.619049', 'step': 32191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:53.682097', 'step': 32191, 'epoch': 3}
{'type': 'loss', 'content': 0.045185837894678116, 'timestamp': '2025-10-02 01:08:53.695890', 'step': 32192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:53.772889', 'step': 32192, 'epoch': 3}
{'type': 'loss', 'content': 0.0437435656785965, 'timestamp': '2025-10-02 01:08:53.783332', 'step': 32193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:53.868215', 'step': 32193, 'epoch': 3}
{'type': 'loss', 'content': 0.041117019951343536, 'timestamp': '2025-10-02 01:08:53.877759', 'step': 32194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:53.957740', 'step': 32194, 'epoch': 3}
{'type': 'loss', 'content': 0.055817361921072006, 'timestamp': '2025-10-02 01:08:53.963611', 'step': 32195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:54.040414', 'step': 32195, 'epoch': 3}
{'type': 'loss', 'content': 0.022972289472818375, 'timestamp': '2025-10-02 01:08:54.048807', 'step': 32196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:54.106183', 'step': 32196, 'epoch': 3}
{'type': 'loss', 'content': 0.04247960448265076, 'timestamp': '2025-10-02 01:08:54.110457', 'step': 32197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:54.168362', 'step': 32197, 'epoch': 3}
{'type': 'loss', 'content': 0.04443855583667755, 'timestamp': '2025-10-02 01:08:54.174472', 'step': 32198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:54.232413', 'step': 32198, 'epoch': 3}
{'type': 'loss', 'content': 0.03401520103216171, 'timestamp': '2025-10-02 01:08:54.241969', 'step': 32199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:54.300023', 'step': 32199, 'epoch': 3}
{'type': 'loss', 'content': 0.002882289234548807, 'timestamp': '2025-10-02 01:08:54.306666', 'step': 32200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:08:54.362931', 'step': 32200, 'epoch': 3}
{'type': 'loss', 'content': 0.07507533580064774, 'timestamp': '2025-10-02 01:08:54.365558', 'step': 32201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:54.422743', 'step': 32201, 'epoch': 3}
{'type': 'loss', 'content': 0.011511063203215599, 'timestamp': '2025-10-02 01:08:54.426613', 'step': 32202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:54.483589', 'step': 32202, 'epoch': 3}
{'type': 'loss', 'content': 0.08786127716302872, 'timestamp': '2025-10-02 01:08:54.486656', 'step': 32203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:54.543596', 'step': 32203, 'epoch': 3}
{'type': 'loss', 'content': 0.012825504876673222, 'timestamp': '2025-10-02 01:08:54.551139', 'step': 32204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:54.610263', 'step': 32204, 'epoch': 3}
{'type': 'loss', 'content': 0.0005744239897467196, 'timestamp': '2025-10-02 01:08:54.620106', 'step': 32205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:54.680810', 'step': 32205, 'epoch': 3}
{'type': 'loss', 'content': 0.04265141859650612, 'timestamp': '2025-10-02 01:08:54.683797', 'step': 32206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:54.746387', 'step': 32206, 'epoch': 3}
{'type': 'loss', 'content': 0.03520095348358154, 'timestamp': '2025-10-02 01:08:54.749422', 'step': 32207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:54.808340', 'step': 32207, 'epoch': 3}
{'type': 'loss', 'content': 0.0013792430981993675, 'timestamp': '2025-10-02 01:08:54.814514', 'step': 32208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:08:54.881532', 'step': 32208, 'epoch': 3}
{'type': 'loss', 'content': 0.034935567528009415, 'timestamp': '2025-10-02 01:08:54.893311', 'step': 32209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:54.952871', 'step': 32209, 'epoch': 3}
{'type': 'loss', 'content': 0.010516263544559479, 'timestamp': '2025-10-02 01:08:54.956387', 'step': 32210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:55.015748', 'step': 32210, 'epoch': 3}
{'type': 'loss', 'content': 0.01748284138739109, 'timestamp': '2025-10-02 01:08:55.025279', 'step': 32211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:55.093606', 'step': 32211, 'epoch': 3}
{'type': 'loss', 'content': 0.028903333470225334, 'timestamp': '2025-10-02 01:08:55.102930', 'step': 32212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:55.164830', 'step': 32212, 'epoch': 3}
{'type': 'loss', 'content': 0.06808849424123764, 'timestamp': '2025-10-02 01:08:55.172690', 'step': 32213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:08:55.247482', 'step': 32213, 'epoch': 3}
{'type': 'loss', 'content': 0.043023962527513504, 'timestamp': '2025-10-02 01:08:55.251675', 'step': 32214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:55.326041', 'step': 32214, 'epoch': 3}
{'type': 'loss', 'content': 0.025008009746670723, 'timestamp': '2025-10-02 01:08:55.335591', 'step': 32215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:55.406610', 'step': 32215, 'epoch': 3}
{'type': 'loss', 'content': 0.06932947039604187, 'timestamp': '2025-10-02 01:08:55.413874', 'step': 32216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:55.492124', 'step': 32216, 'epoch': 3}
{'type': 'loss', 'content': 0.022895880043506622, 'timestamp': '2025-10-02 01:08:55.499727', 'step': 32217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:55.564700', 'step': 32217, 'epoch': 3}
{'type': 'loss', 'content': 0.03395669907331467, 'timestamp': '2025-10-02 01:08:55.571393', 'step': 32218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:55.634472', 'step': 32218, 'epoch': 3}
{'type': 'loss', 'content': 0.05494002625346184, 'timestamp': '2025-10-02 01:08:55.637519', 'step': 32219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:55.700263', 'step': 32219, 'epoch': 3}
{'type': 'loss', 'content': 0.030261225998401642, 'timestamp': '2025-10-02 01:08:55.710585', 'step': 32220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:55.772196', 'step': 32220, 'epoch': 3}
{'type': 'loss', 'content': 0.0026254041586071253, 'timestamp': '2025-10-02 01:08:55.779678', 'step': 32221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:55.840985', 'step': 32221, 'epoch': 3}
{'type': 'loss', 'content': 0.006103381514549255, 'timestamp': '2025-10-02 01:08:55.848707', 'step': 32222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:55.907537', 'step': 32222, 'epoch': 3}
{'type': 'loss', 'content': 0.06964889913797379, 'timestamp': '2025-10-02 01:08:55.910470', 'step': 32223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:55.980161', 'step': 32223, 'epoch': 3}
{'type': 'loss', 'content': 0.04886157810688019, 'timestamp': '2025-10-02 01:08:55.987509', 'step': 32224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:56.052799', 'step': 32224, 'epoch': 3}
{'type': 'loss', 'content': 0.06773197650909424, 'timestamp': '2025-10-02 01:08:56.064235', 'step': 32225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:56.123922', 'step': 32225, 'epoch': 3}
{'type': 'loss', 'content': 0.05108583718538284, 'timestamp': '2025-10-02 01:08:56.133513', 'step': 32226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:08:56.206297', 'step': 32226, 'epoch': 3}
{'type': 'loss', 'content': 0.0422249510884285, 'timestamp': '2025-10-02 01:08:56.215723', 'step': 32227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:56.287778', 'step': 32227, 'epoch': 3}
{'type': 'loss', 'content': 0.01655743457376957, 'timestamp': '2025-10-02 01:08:56.299127', 'step': 32228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:56.356720', 'step': 32228, 'epoch': 3}
{'type': 'loss', 'content': 0.04602911323308945, 'timestamp': '2025-10-02 01:08:56.363779', 'step': 32229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:08:56.429891', 'step': 32229, 'epoch': 3}
{'type': 'loss', 'content': 0.12324243783950806, 'timestamp': '2025-10-02 01:08:56.433313', 'step': 32230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:56.495965', 'step': 32230, 'epoch': 3}
{'type': 'loss', 'content': 0.0010461668716743588, 'timestamp': '2025-10-02 01:08:56.499314', 'step': 32231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:56.567141', 'step': 32231, 'epoch': 3}
{'type': 'loss', 'content': 0.031179150566458702, 'timestamp': '2025-10-02 01:08:56.574205', 'step': 32232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:56.631273', 'step': 32232, 'epoch': 3}
{'type': 'loss', 'content': 0.0699954703450203, 'timestamp': '2025-10-02 01:08:56.640825', 'step': 32233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:08:56.703982', 'step': 32233, 'epoch': 3}
{'type': 'loss', 'content': 0.04093015193939209, 'timestamp': '2025-10-02 01:08:56.714420', 'step': 32234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:56.778349', 'step': 32234, 'epoch': 3}
{'type': 'loss', 'content': 0.04931601136922836, 'timestamp': '2025-10-02 01:08:56.789964', 'step': 32235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:56.876432', 'step': 32235, 'epoch': 3}
{'type': 'loss', 'content': 0.023794429376721382, 'timestamp': '2025-10-02 01:08:56.886605', 'step': 32236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:56.944395', 'step': 32236, 'epoch': 3}
{'type': 'loss', 'content': 0.024599792435765266, 'timestamp': '2025-10-02 01:08:56.947847', 'step': 32237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:57.005659', 'step': 32237, 'epoch': 3}
{'type': 'loss', 'content': 0.10125592350959778, 'timestamp': '2025-10-02 01:08:57.008846', 'step': 32238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:08:57.066087', 'step': 32238, 'epoch': 3}
{'type': 'loss', 'content': 0.03710144758224487, 'timestamp': '2025-10-02 01:08:57.069860', 'step': 32239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:08:57.135481', 'step': 32239, 'epoch': 3}
{'type': 'loss', 'content': 0.008333791978657246, 'timestamp': '2025-10-02 01:08:57.146812', 'step': 32240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:57.209182', 'step': 32240, 'epoch': 3}
{'type': 'loss', 'content': 0.05897584185004234, 'timestamp': '2025-10-02 01:08:57.211553', 'step': 32241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:08:57.275539', 'step': 32241, 'epoch': 3}
{'type': 'loss', 'content': 0.0021807088050991297, 'timestamp': '2025-10-02 01:08:57.286192', 'step': 32242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:57.346490', 'step': 32242, 'epoch': 3}
{'type': 'loss', 'content': 0.001716039958409965, 'timestamp': '2025-10-02 01:08:57.348903', 'step': 32243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:57.413798', 'step': 32243, 'epoch': 3}
{'type': 'loss', 'content': 0.03508560732007027, 'timestamp': '2025-10-02 01:08:57.419933', 'step': 32244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:08:57.492681', 'step': 32244, 'epoch': 3}
{'type': 'loss', 'content': 0.042138174176216125, 'timestamp': '2025-10-02 01:08:57.504173', 'step': 32245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:57.573216', 'step': 32245, 'epoch': 3}
{'type': 'loss', 'content': 0.035019177943468094, 'timestamp': '2025-10-02 01:08:57.580924', 'step': 32246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:08:57.648580', 'step': 32246, 'epoch': 3}
{'type': 'loss', 'content': 0.015019985847175121, 'timestamp': '2025-10-02 01:08:57.659063', 'step': 32247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:57.716339', 'step': 32247, 'epoch': 3}
{'type': 'loss', 'content': 0.032896384596824646, 'timestamp': '2025-10-02 01:08:57.732800', 'step': 32248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:57.812727', 'step': 32248, 'epoch': 3}
{'type': 'loss', 'content': 0.07555566728115082, 'timestamp': '2025-10-02 01:08:57.815742', 'step': 32249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:57.890690', 'step': 32249, 'epoch': 3}
{'type': 'loss', 'content': 0.0014974466757848859, 'timestamp': '2025-10-02 01:08:57.894716', 'step': 32250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:57.963818', 'step': 32250, 'epoch': 3}
{'type': 'loss', 'content': 0.049739498645067215, 'timestamp': '2025-10-02 01:08:57.974591', 'step': 32251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:08:58.044181', 'step': 32251, 'epoch': 3}
{'type': 'loss', 'content': 0.01817128248512745, 'timestamp': '2025-10-02 01:08:58.055095', 'step': 32252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:58.110661', 'step': 32252, 'epoch': 3}
{'type': 'loss', 'content': 0.08269194513559341, 'timestamp': '2025-10-02 01:08:58.114376', 'step': 32253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:58.173793', 'step': 32253, 'epoch': 3}
{'type': 'loss', 'content': 0.033140089362859726, 'timestamp': '2025-10-02 01:08:58.177253', 'step': 32254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:58.236014', 'step': 32254, 'epoch': 3}
{'type': 'loss', 'content': 0.026120921596884727, 'timestamp': '2025-10-02 01:08:58.238803', 'step': 32255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:58.295633', 'step': 32255, 'epoch': 3}
{'type': 'loss', 'content': 0.07336345314979553, 'timestamp': '2025-10-02 01:08:58.302255', 'step': 32256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:58.358144', 'step': 32256, 'epoch': 3}
{'type': 'loss', 'content': 0.06002967804670334, 'timestamp': '2025-10-02 01:08:58.366548', 'step': 32257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:58.441835', 'step': 32257, 'epoch': 3}
{'type': 'loss', 'content': 0.01325626578181982, 'timestamp': '2025-10-02 01:08:58.451369', 'step': 32258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:58.516183', 'step': 32258, 'epoch': 3}
{'type': 'loss', 'content': 0.09370868653059006, 'timestamp': '2025-10-02 01:08:58.525725', 'step': 32259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:08:58.601985', 'step': 32259, 'epoch': 3}
{'type': 'loss', 'content': 0.0530955046415329, 'timestamp': '2025-10-02 01:08:58.609031', 'step': 32260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:58.677541', 'step': 32260, 'epoch': 3}
{'type': 'loss', 'content': 0.014611976221203804, 'timestamp': '2025-10-02 01:08:58.683421', 'step': 32261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:08:58.750609', 'step': 32261, 'epoch': 3}
{'type': 'loss', 'content': 0.09189344942569733, 'timestamp': '2025-10-02 01:08:58.755040', 'step': 32262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:58.825757', 'step': 32262, 'epoch': 3}
{'type': 'loss', 'content': 0.02030092291533947, 'timestamp': '2025-10-02 01:08:58.829216', 'step': 32263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:58.893857', 'step': 32263, 'epoch': 3}
{'type': 'loss', 'content': 0.006794247776269913, 'timestamp': '2025-10-02 01:08:58.905942', 'step': 32264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:08:58.980233', 'step': 32264, 'epoch': 3}
{'type': 'loss', 'content': 0.028413545340299606, 'timestamp': '2025-10-02 01:08:58.987381', 'step': 32265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:08:59.058895', 'step': 32265, 'epoch': 3}
{'type': 'loss', 'content': 0.011662761680781841, 'timestamp': '2025-10-02 01:08:59.068432', 'step': 32266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:59.126292', 'step': 32266, 'epoch': 3}
{'type': 'loss', 'content': 0.07084427028894424, 'timestamp': '2025-10-02 01:08:59.132981', 'step': 32267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:08:59.201772', 'step': 32267, 'epoch': 3}
{'type': 'loss', 'content': 0.02498394437134266, 'timestamp': '2025-10-02 01:08:59.213201', 'step': 32268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:59.276085', 'step': 32268, 'epoch': 3}
{'type': 'loss', 'content': 0.04955765977501869, 'timestamp': '2025-10-02 01:08:59.279872', 'step': 32269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:08:59.336195', 'step': 32269, 'epoch': 3}
{'type': 'loss', 'content': 0.07139254361391068, 'timestamp': '2025-10-02 01:08:59.339358', 'step': 32270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:08:59.398844', 'step': 32270, 'epoch': 3}
{'type': 'loss', 'content': 0.015427305363118649, 'timestamp': '2025-10-02 01:08:59.406191', 'step': 32271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:08:59.468147', 'step': 32271, 'epoch': 3}
{'type': 'loss', 'content': 0.02276536263525486, 'timestamp': '2025-10-02 01:08:59.476531', 'step': 32272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:59.532498', 'step': 32272, 'epoch': 3}
{'type': 'loss', 'content': 0.0067429933696985245, 'timestamp': '2025-10-02 01:08:59.535850', 'step': 32273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:59.595707', 'step': 32273, 'epoch': 3}
{'type': 'loss', 'content': 0.03615541011095047, 'timestamp': '2025-10-02 01:08:59.598110', 'step': 32274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:08:59.654157', 'step': 32274, 'epoch': 3}
{'type': 'loss', 'content': 0.03122500516474247, 'timestamp': '2025-10-02 01:08:59.660671', 'step': 32275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:08:59.716167', 'step': 32275, 'epoch': 3}
{'type': 'loss', 'content': 0.009137658402323723, 'timestamp': '2025-10-02 01:08:59.724165', 'step': 32276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:08:59.784496', 'step': 32276, 'epoch': 3}
{'type': 'loss', 'content': 0.0545131079852581, 'timestamp': '2025-10-02 01:08:59.791804', 'step': 32277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:08:59.852215', 'step': 32277, 'epoch': 3}
{'type': 'loss', 'content': 0.0002290204429300502, 'timestamp': '2025-10-02 01:08:59.856654', 'step': 32278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:08:59.915158', 'step': 32278, 'epoch': 3}
{'type': 'loss', 'content': 0.005037405528128147, 'timestamp': '2025-10-02 01:08:59.924536', 'step': 32279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:08:59.984196', 'step': 32279, 'epoch': 3}
{'type': 'loss', 'content': 0.013813798315823078, 'timestamp': '2025-10-02 01:08:59.995343', 'step': 32280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:00.062767', 'step': 32280, 'epoch': 3}
{'type': 'loss', 'content': 0.12552568316459656, 'timestamp': '2025-10-02 01:09:00.068727', 'step': 32281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:00.144367', 'step': 32281, 'epoch': 3}
{'type': 'loss', 'content': 0.03713463246822357, 'timestamp': '2025-10-02 01:09:00.153614', 'step': 32282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:00.216860', 'step': 32282, 'epoch': 3}
{'type': 'loss', 'content': 0.02220003679394722, 'timestamp': '2025-10-02 01:09:00.220405', 'step': 32283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:00.283949', 'step': 32283, 'epoch': 3}
{'type': 'loss', 'content': 0.005071606487035751, 'timestamp': '2025-10-02 01:09:00.290129', 'step': 32284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:00.344594', 'step': 32284, 'epoch': 3}
{'type': 'loss', 'content': 0.0357552208006382, 'timestamp': '2025-10-02 01:09:00.354796', 'step': 32285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:00.411153', 'step': 32285, 'epoch': 3}
{'type': 'loss', 'content': 0.045853883028030396, 'timestamp': '2025-10-02 01:09:00.417075', 'step': 32286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:00.495065', 'step': 32286, 'epoch': 3}
{'type': 'loss', 'content': 0.020059112459421158, 'timestamp': '2025-10-02 01:09:00.502395', 'step': 32287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:00.588419', 'step': 32287, 'epoch': 3}
{'type': 'loss', 'content': 0.027455175295472145, 'timestamp': '2025-10-02 01:09:00.601679', 'step': 32288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:00.659598', 'step': 32288, 'epoch': 3}
{'type': 'loss', 'content': 3.175130404997617e-05, 'timestamp': '2025-10-02 01:09:00.662988', 'step': 32289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:00.734715', 'step': 32289, 'epoch': 3}
{'type': 'loss', 'content': 0.020296478644013405, 'timestamp': '2025-10-02 01:09:00.748747', 'step': 32290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:00.815020', 'step': 32290, 'epoch': 3}
{'type': 'loss', 'content': 0.07169446349143982, 'timestamp': '2025-10-02 01:09:00.818756', 'step': 32291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:00.876172', 'step': 32291, 'epoch': 3}
{'type': 'loss', 'content': 0.10088363289833069, 'timestamp': '2025-10-02 01:09:00.883053', 'step': 32292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:00.952682', 'step': 32292, 'epoch': 3}
{'type': 'loss', 'content': 0.05359533056616783, 'timestamp': '2025-10-02 01:09:00.962109', 'step': 32293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:01.018030', 'step': 32293, 'epoch': 3}
{'type': 'loss', 'content': 0.052776943892240524, 'timestamp': '2025-10-02 01:09:01.021121', 'step': 32294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:01.085198', 'step': 32294, 'epoch': 3}
{'type': 'loss', 'content': 0.09024136513471603, 'timestamp': '2025-10-02 01:09:01.088439', 'step': 32295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:01.146477', 'step': 32295, 'epoch': 3}
{'type': 'loss', 'content': 0.060733769088983536, 'timestamp': '2025-10-02 01:09:01.155274', 'step': 32296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:01.217772', 'step': 32296, 'epoch': 3}
{'type': 'loss', 'content': 0.017450863495469093, 'timestamp': '2025-10-02 01:09:01.223352', 'step': 32297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:09:01.298264', 'step': 32297, 'epoch': 3}
{'type': 'loss', 'content': 0.04465016722679138, 'timestamp': '2025-10-02 01:09:01.310592', 'step': 32298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:09:01.382246', 'step': 32298, 'epoch': 3}
{'type': 'loss', 'content': 0.01113972906023264, 'timestamp': '2025-10-02 01:09:01.393115', 'step': 32299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:01.452645', 'step': 32299, 'epoch': 3}
{'type': 'loss', 'content': 0.05058944597840309, 'timestamp': '2025-10-02 01:09:01.459120', 'step': 32300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:01.529497', 'step': 32300, 'epoch': 3}
{'type': 'loss', 'content': 0.04278624430298805, 'timestamp': '2025-10-02 01:09:01.540531', 'step': 32301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:01.600530', 'step': 32301, 'epoch': 3}
{'type': 'loss', 'content': 0.05429590120911598, 'timestamp': '2025-10-02 01:09:01.604851', 'step': 32302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:01.660618', 'step': 32302, 'epoch': 3}
{'type': 'loss', 'content': 0.042689625173807144, 'timestamp': '2025-10-02 01:09:01.666425', 'step': 32303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:01.724502', 'step': 32303, 'epoch': 3}
{'type': 'loss', 'content': 0.02267034538090229, 'timestamp': '2025-10-02 01:09:01.733959', 'step': 32304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:09:01.817036', 'step': 32304, 'epoch': 3}
{'type': 'loss', 'content': 0.021507123485207558, 'timestamp': '2025-10-02 01:09:01.830461', 'step': 32305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:01.913634', 'step': 32305, 'epoch': 3}
{'type': 'loss', 'content': 0.040670230984687805, 'timestamp': '2025-10-02 01:09:01.923773', 'step': 32306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:02.011654', 'step': 32306, 'epoch': 3}
{'type': 'loss', 'content': 0.0006685937405563891, 'timestamp': '2025-10-02 01:09:02.021856', 'step': 32307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:02.092580', 'step': 32307, 'epoch': 3}
{'type': 'loss', 'content': 0.037659719586372375, 'timestamp': '2025-10-02 01:09:02.105467', 'step': 32308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:02.176915', 'step': 32308, 'epoch': 3}
{'type': 'loss', 'content': 0.006425368599593639, 'timestamp': '2025-10-02 01:09:02.193215', 'step': 32309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:02.265623', 'step': 32309, 'epoch': 3}
{'type': 'loss', 'content': 0.012193361297249794, 'timestamp': '2025-10-02 01:09:02.277001', 'step': 32310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:02.354433', 'step': 32310, 'epoch': 3}
{'type': 'loss', 'content': 0.01216546818614006, 'timestamp': '2025-10-02 01:09:02.363842', 'step': 32311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:02.434541', 'step': 32311, 'epoch': 3}
{'type': 'loss', 'content': 0.04862787574529648, 'timestamp': '2025-10-02 01:09:02.452485', 'step': 32312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:02.544139', 'step': 32312, 'epoch': 3}
{'type': 'loss', 'content': 0.04359089210629463, 'timestamp': '2025-10-02 01:09:02.551601', 'step': 32313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:02.621960', 'step': 32313, 'epoch': 3}
{'type': 'loss', 'content': 0.05692340433597565, 'timestamp': '2025-10-02 01:09:02.633586', 'step': 32314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:02.716135', 'step': 32314, 'epoch': 3}
{'type': 'loss', 'content': 0.01932513155043125, 'timestamp': '2025-10-02 01:09:02.730319', 'step': 32315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:02.820623', 'step': 32315, 'epoch': 3}
{'type': 'loss', 'content': 0.011863849125802517, 'timestamp': '2025-10-02 01:09:02.839049', 'step': 32316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:02.930302', 'step': 32316, 'epoch': 3}
{'type': 'loss', 'content': 0.03219937905669212, 'timestamp': '2025-10-02 01:09:02.961336', 'step': 32317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:03.040069', 'step': 32317, 'epoch': 3}
{'type': 'loss', 'content': 0.014163197949528694, 'timestamp': '2025-10-02 01:09:03.045576', 'step': 32318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:03.129895', 'step': 32318, 'epoch': 3}
{'type': 'loss', 'content': 0.0659262016415596, 'timestamp': '2025-10-02 01:09:03.142611', 'step': 32319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:03.227587', 'step': 32319, 'epoch': 3}
{'type': 'loss', 'content': 0.025148164480924606, 'timestamp': '2025-10-02 01:09:03.244502', 'step': 32320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:03.334330', 'step': 32320, 'epoch': 3}
{'type': 'loss', 'content': 0.03365810588002205, 'timestamp': '2025-10-02 01:09:03.345765', 'step': 32321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:03.420913', 'step': 32321, 'epoch': 3}
{'type': 'loss', 'content': 0.09244999289512634, 'timestamp': '2025-10-02 01:09:03.428223', 'step': 32322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:03.507310', 'step': 32322, 'epoch': 3}
{'type': 'loss', 'content': 0.05317257344722748, 'timestamp': '2025-10-02 01:09:03.516883', 'step': 32323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:03.589342', 'step': 32323, 'epoch': 3}
{'type': 'loss', 'content': 0.010067553259432316, 'timestamp': '2025-10-02 01:09:03.599343', 'step': 32324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:03.658149', 'step': 32324, 'epoch': 3}
{'type': 'loss', 'content': 0.05925551801919937, 'timestamp': '2025-10-02 01:09:03.661280', 'step': 32325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:03.722645', 'step': 32325, 'epoch': 3}
{'type': 'loss', 'content': 0.09023673832416534, 'timestamp': '2025-10-02 01:09:03.726516', 'step': 32326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:03.790992', 'step': 32326, 'epoch': 3}
{'type': 'loss', 'content': 0.047746315598487854, 'timestamp': '2025-10-02 01:09:03.798348', 'step': 32327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:03.859271', 'step': 32327, 'epoch': 3}
{'type': 'loss', 'content': 0.025747131556272507, 'timestamp': '2025-10-02 01:09:03.865532', 'step': 32328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:03.926358', 'step': 32328, 'epoch': 3}
{'type': 'loss', 'content': 0.04907078295946121, 'timestamp': '2025-10-02 01:09:03.932332', 'step': 32329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:03.993980', 'step': 32329, 'epoch': 3}
{'type': 'loss', 'content': 0.03736105561256409, 'timestamp': '2025-10-02 01:09:04.001670', 'step': 32330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:04.059221', 'step': 32330, 'epoch': 3}
{'type': 'loss', 'content': 0.011722453869879246, 'timestamp': '2025-10-02 01:09:04.066810', 'step': 32331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:04.121509', 'step': 32331, 'epoch': 3}
{'type': 'loss', 'content': 0.05485409498214722, 'timestamp': '2025-10-02 01:09:04.129276', 'step': 32332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:04.186794', 'step': 32332, 'epoch': 3}
{'type': 'loss', 'content': 0.05707724019885063, 'timestamp': '2025-10-02 01:09:04.191360', 'step': 32333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:04.249633', 'step': 32333, 'epoch': 3}
{'type': 'loss', 'content': 0.03369344398379326, 'timestamp': '2025-10-02 01:09:04.252531', 'step': 32334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:04.309042', 'step': 32334, 'epoch': 3}
{'type': 'loss', 'content': 0.01061930786818266, 'timestamp': '2025-10-02 01:09:04.318400', 'step': 32335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:04.374366', 'step': 32335, 'epoch': 3}
{'type': 'loss', 'content': 0.02695542387664318, 'timestamp': '2025-10-02 01:09:04.381739', 'step': 32336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:04.435861', 'step': 32336, 'epoch': 3}
{'type': 'loss', 'content': 0.041998207569122314, 'timestamp': '2025-10-02 01:09:04.438626', 'step': 32337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:04.493335', 'step': 32337, 'epoch': 3}
{'type': 'loss', 'content': 0.023472564294934273, 'timestamp': '2025-10-02 01:09:04.496600', 'step': 32338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:04.551614', 'step': 32338, 'epoch': 3}
{'type': 'loss', 'content': 0.004985686857253313, 'timestamp': '2025-10-02 01:09:04.554496', 'step': 32339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:04.618492', 'step': 32339, 'epoch': 3}
{'type': 'loss', 'content': 0.056392546743154526, 'timestamp': '2025-10-02 01:09:04.631066', 'step': 32340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:04.694420', 'step': 32340, 'epoch': 3}
{'type': 'loss', 'content': 0.03603142127394676, 'timestamp': '2025-10-02 01:09:04.699214', 'step': 32341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:04.762093', 'step': 32341, 'epoch': 3}
{'type': 'loss', 'content': 0.08134420216083527, 'timestamp': '2025-10-02 01:09:04.766904', 'step': 32342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:04.823904', 'step': 32342, 'epoch': 3}
{'type': 'loss', 'content': 0.06185443326830864, 'timestamp': '2025-10-02 01:09:04.826876', 'step': 32343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:04.884529', 'step': 32343, 'epoch': 3}
{'type': 'loss', 'content': 0.034631624817848206, 'timestamp': '2025-10-02 01:09:04.890452', 'step': 32344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:04.946472', 'step': 32344, 'epoch': 3}
{'type': 'loss', 'content': 0.022485490888357162, 'timestamp': '2025-10-02 01:09:04.949810', 'step': 32345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:05.011752', 'step': 32345, 'epoch': 3}
{'type': 'loss', 'content': 0.028912128880620003, 'timestamp': '2025-10-02 01:09:05.017555', 'step': 32346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:05.081546', 'step': 32346, 'epoch': 3}
{'type': 'loss', 'content': 0.07601288706064224, 'timestamp': '2025-10-02 01:09:05.084789', 'step': 32347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:05.141571', 'step': 32347, 'epoch': 3}
{'type': 'loss', 'content': 0.001416688784956932, 'timestamp': '2025-10-02 01:09:05.149279', 'step': 32348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:05.205249', 'step': 32348, 'epoch': 3}
{'type': 'loss', 'content': 0.0211848895996809, 'timestamp': '2025-10-02 01:09:05.220975', 'step': 32349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:05.303427', 'step': 32349, 'epoch': 3}
{'type': 'loss', 'content': 0.011218268424272537, 'timestamp': '2025-10-02 01:09:05.309559', 'step': 32350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:05.382299', 'step': 32350, 'epoch': 3}
{'type': 'loss', 'content': 0.03411533311009407, 'timestamp': '2025-10-02 01:09:05.392714', 'step': 32351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:05.461639', 'step': 32351, 'epoch': 3}
{'type': 'loss', 'content': 0.036516252905130386, 'timestamp': '2025-10-02 01:09:05.469298', 'step': 32352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:05.542027', 'step': 32352, 'epoch': 3}
{'type': 'loss', 'content': 0.007385163102298975, 'timestamp': '2025-10-02 01:09:05.552996', 'step': 32353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:05.615324', 'step': 32353, 'epoch': 3}
{'type': 'loss', 'content': 0.023603474721312523, 'timestamp': '2025-10-02 01:09:05.623329', 'step': 32354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:05.696036', 'step': 32354, 'epoch': 3}
{'type': 'loss', 'content': 0.0021490093786269426, 'timestamp': '2025-10-02 01:09:05.703530', 'step': 32355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:05.770195', 'step': 32355, 'epoch': 3}
{'type': 'loss', 'content': 0.02159302309155464, 'timestamp': '2025-10-02 01:09:05.785260', 'step': 32356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:05.843278', 'step': 32356, 'epoch': 3}
{'type': 'loss', 'content': 0.03398749604821205, 'timestamp': '2025-10-02 01:09:05.846935', 'step': 32357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:05.907089', 'step': 32357, 'epoch': 3}
{'type': 'loss', 'content': 0.02374654822051525, 'timestamp': '2025-10-02 01:09:05.910115', 'step': 32358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:05.972401', 'step': 32358, 'epoch': 3}
{'type': 'loss', 'content': 0.12317980080842972, 'timestamp': '2025-10-02 01:09:05.975748', 'step': 32359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:06.044283', 'step': 32359, 'epoch': 3}
{'type': 'loss', 'content': 0.07283437252044678, 'timestamp': '2025-10-02 01:09:06.051377', 'step': 32360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:06.116356', 'step': 32360, 'epoch': 3}
{'type': 'loss', 'content': 0.05956856906414032, 'timestamp': '2025-10-02 01:09:06.127121', 'step': 32361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:06.204626', 'step': 32361, 'epoch': 3}
{'type': 'loss', 'content': 0.1400163322687149, 'timestamp': '2025-10-02 01:09:06.208311', 'step': 32362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:06.271946', 'step': 32362, 'epoch': 3}
{'type': 'loss', 'content': 0.019528796896338463, 'timestamp': '2025-10-02 01:09:06.277899', 'step': 32363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:06.344678', 'step': 32363, 'epoch': 3}
{'type': 'loss', 'content': 0.06709165126085281, 'timestamp': '2025-10-02 01:09:06.356718', 'step': 32364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:06.416280', 'step': 32364, 'epoch': 3}
{'type': 'loss', 'content': 0.028621915727853775, 'timestamp': '2025-10-02 01:09:06.425774', 'step': 32365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:06.482485', 'step': 32365, 'epoch': 3}
{'type': 'loss', 'content': 0.008374791592359543, 'timestamp': '2025-10-02 01:09:06.490656', 'step': 32366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:06.558073', 'step': 32366, 'epoch': 3}
{'type': 'loss', 'content': 0.01594412699341774, 'timestamp': '2025-10-02 01:09:06.561323', 'step': 32367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:06.618614', 'step': 32367, 'epoch': 3}
{'type': 'loss', 'content': 0.037387847900390625, 'timestamp': '2025-10-02 01:09:06.625577', 'step': 32368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:06.679649', 'step': 32368, 'epoch': 3}
{'type': 'loss', 'content': 0.006060595158487558, 'timestamp': '2025-10-02 01:09:06.689878', 'step': 32369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:06.744732', 'step': 32369, 'epoch': 3}
{'type': 'loss', 'content': 0.0277358777821064, 'timestamp': '2025-10-02 01:09:06.750795', 'step': 32370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:06.805101', 'step': 32370, 'epoch': 3}
{'type': 'loss', 'content': 0.03823048993945122, 'timestamp': '2025-10-02 01:09:06.807499', 'step': 32371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:06.867751', 'step': 32371, 'epoch': 3}
{'type': 'loss', 'content': 0.006803716532886028, 'timestamp': '2025-10-02 01:09:06.878703', 'step': 32372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:06.938071', 'step': 32372, 'epoch': 3}
{'type': 'loss', 'content': 0.05552460253238678, 'timestamp': '2025-10-02 01:09:06.949041', 'step': 32373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:07.005398', 'step': 32373, 'epoch': 3}
{'type': 'loss', 'content': 0.004947919864207506, 'timestamp': '2025-10-02 01:09:07.014972', 'step': 32374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:07.073759', 'step': 32374, 'epoch': 3}
{'type': 'loss', 'content': 0.03902965784072876, 'timestamp': '2025-10-02 01:09:07.075962', 'step': 32375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:07.134498', 'step': 32375, 'epoch': 3}
{'type': 'loss', 'content': 0.048647765070199966, 'timestamp': '2025-10-02 01:09:07.145460', 'step': 32376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:07.198495', 'step': 32376, 'epoch': 3}
{'type': 'loss', 'content': 0.04668514057993889, 'timestamp': '2025-10-02 01:09:07.208069', 'step': 32377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:07.262907', 'step': 32377, 'epoch': 3}
{'type': 'loss', 'content': 0.004553754348307848, 'timestamp': '2025-10-02 01:09:07.272561', 'step': 32378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:07.326625', 'step': 32378, 'epoch': 3}
{'type': 'loss', 'content': 0.02291570045053959, 'timestamp': '2025-10-02 01:09:07.328961', 'step': 32379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:07.384459', 'step': 32379, 'epoch': 3}
{'type': 'loss', 'content': 0.05070069059729576, 'timestamp': '2025-10-02 01:09:07.394747', 'step': 32380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:07.452141', 'step': 32380, 'epoch': 3}
{'type': 'loss', 'content': 0.0005598283605650067, 'timestamp': '2025-10-02 01:09:07.463117', 'step': 32381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:07.518382', 'step': 32381, 'epoch': 3}
{'type': 'loss', 'content': 0.045249003916978836, 'timestamp': '2025-10-02 01:09:07.521138', 'step': 32382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:07.577127', 'step': 32382, 'epoch': 3}
{'type': 'loss', 'content': 0.00906060915440321, 'timestamp': '2025-10-02 01:09:07.582962', 'step': 32383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:07.637219', 'step': 32383, 'epoch': 3}
{'type': 'loss', 'content': 0.010647253133356571, 'timestamp': '2025-10-02 01:09:07.645577', 'step': 32384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:07.700128', 'step': 32384, 'epoch': 3}
{'type': 'loss', 'content': 0.03715517744421959, 'timestamp': '2025-10-02 01:09:07.704811', 'step': 32385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:07.759367', 'step': 32385, 'epoch': 3}
{'type': 'loss', 'content': 0.05523596704006195, 'timestamp': '2025-10-02 01:09:07.762453', 'step': 32386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:07.816517', 'step': 32386, 'epoch': 3}
{'type': 'loss', 'content': 0.044325437396764755, 'timestamp': '2025-10-02 01:09:07.818861', 'step': 32387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:07.873765', 'step': 32387, 'epoch': 3}
{'type': 'loss', 'content': 0.007104809861630201, 'timestamp': '2025-10-02 01:09:07.879636', 'step': 32388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:07.933601', 'step': 32388, 'epoch': 3}
{'type': 'loss', 'content': 0.04976080730557442, 'timestamp': '2025-10-02 01:09:07.936405', 'step': 32389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:07.990426', 'step': 32389, 'epoch': 3}
{'type': 'loss', 'content': 0.04894274100661278, 'timestamp': '2025-10-02 01:09:07.994211', 'step': 32390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:08.052873', 'step': 32390, 'epoch': 3}
{'type': 'loss', 'content': 0.03710617870092392, 'timestamp': '2025-10-02 01:09:08.055876', 'step': 32391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:08.110531', 'step': 32391, 'epoch': 3}
{'type': 'loss', 'content': 0.057570260018110275, 'timestamp': '2025-10-02 01:09:08.116715', 'step': 32392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:08.170512', 'step': 32392, 'epoch': 3}
{'type': 'loss', 'content': 0.017903881147503853, 'timestamp': '2025-10-02 01:09:08.176271', 'step': 32393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:08.231156', 'step': 32393, 'epoch': 3}
{'type': 'loss', 'content': 0.029719410464167595, 'timestamp': '2025-10-02 01:09:08.233767', 'step': 32394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:08.289033', 'step': 32394, 'epoch': 3}
{'type': 'loss', 'content': 0.05724574252963066, 'timestamp': '2025-10-02 01:09:08.292885', 'step': 32395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:08.349826', 'step': 32395, 'epoch': 3}
{'type': 'loss', 'content': 0.04238281399011612, 'timestamp': '2025-10-02 01:09:08.360204', 'step': 32396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:08.414995', 'step': 32396, 'epoch': 3}
{'type': 'loss', 'content': 0.13575533032417297, 'timestamp': '2025-10-02 01:09:08.417354', 'step': 32397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:08.471508', 'step': 32397, 'epoch': 3}
{'type': 'loss', 'content': 0.09808510541915894, 'timestamp': '2025-10-02 01:09:08.474585', 'step': 32398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:08.529564', 'step': 32398, 'epoch': 3}
{'type': 'loss', 'content': 0.009643311612308025, 'timestamp': '2025-10-02 01:09:08.532596', 'step': 32399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:08.587061', 'step': 32399, 'epoch': 3}
{'type': 'loss', 'content': 0.022308217361569405, 'timestamp': '2025-10-02 01:09:08.593357', 'step': 32400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:08.647302', 'step': 32400, 'epoch': 3}
{'type': 'loss', 'content': 0.07327291369438171, 'timestamp': '2025-10-02 01:09:08.650465', 'step': 32401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:08.706090', 'step': 32401, 'epoch': 3}
{'type': 'loss', 'content': 0.10644295811653137, 'timestamp': '2025-10-02 01:09:08.708586', 'step': 32402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:08.762605', 'step': 32402, 'epoch': 3}
{'type': 'loss', 'content': 0.048659585416316986, 'timestamp': '2025-10-02 01:09:08.770187', 'step': 32403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:08.828425', 'step': 32403, 'epoch': 3}
{'type': 'loss', 'content': 0.0052291275933384895, 'timestamp': '2025-10-02 01:09:08.835205', 'step': 32404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:08.889347', 'step': 32404, 'epoch': 3}
{'type': 'loss', 'content': 0.1257607340812683, 'timestamp': '2025-10-02 01:09:08.891738', 'step': 32405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:08.945914', 'step': 32405, 'epoch': 3}
{'type': 'loss', 'content': 0.023697685450315475, 'timestamp': '2025-10-02 01:09:08.948640', 'step': 32406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:09.003681', 'step': 32406, 'epoch': 3}
{'type': 'loss', 'content': 0.007997294887900352, 'timestamp': '2025-10-02 01:09:09.013022', 'step': 32407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:09.076507', 'step': 32407, 'epoch': 3}
{'type': 'loss', 'content': 0.07009144872426987, 'timestamp': '2025-10-02 01:09:09.086794', 'step': 32408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:09.140373', 'step': 32408, 'epoch': 3}
{'type': 'loss', 'content': 0.03007485345005989, 'timestamp': '2025-10-02 01:09:09.146952', 'step': 32409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:09.203047', 'step': 32409, 'epoch': 3}
{'type': 'loss', 'content': 0.10316716134548187, 'timestamp': '2025-10-02 01:09:09.205713', 'step': 32410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:09.268083', 'step': 32410, 'epoch': 3}
{'type': 'loss', 'content': 0.042635150253772736, 'timestamp': '2025-10-02 01:09:09.278570', 'step': 32411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:09.332664', 'step': 32411, 'epoch': 3}
{'type': 'loss', 'content': 0.04073929041624069, 'timestamp': '2025-10-02 01:09:09.340947', 'step': 32412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:09.395667', 'step': 32412, 'epoch': 3}
{'type': 'loss', 'content': 0.09971050173044205, 'timestamp': '2025-10-02 01:09:09.399329', 'step': 32413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:09.454046', 'step': 32413, 'epoch': 3}
{'type': 'loss', 'content': 0.002252948936074972, 'timestamp': '2025-10-02 01:09:09.459963', 'step': 32414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:09.515122', 'step': 32414, 'epoch': 3}
{'type': 'loss', 'content': 0.04496585205197334, 'timestamp': '2025-10-02 01:09:09.524407', 'step': 32415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:09:09.594826', 'step': 32415, 'epoch': 3}
{'type': 'loss', 'content': 0.020141998305916786, 'timestamp': '2025-10-02 01:09:09.608045', 'step': 32416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:09.661395', 'step': 32416, 'epoch': 3}
{'type': 'loss', 'content': 0.025174258276820183, 'timestamp': '2025-10-02 01:09:09.669976', 'step': 32417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:09.725295', 'step': 32417, 'epoch': 3}
{'type': 'loss', 'content': 0.035443615168333054, 'timestamp': '2025-10-02 01:09:09.727836', 'step': 32418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:09.783631', 'step': 32418, 'epoch': 3}
{'type': 'loss', 'content': 0.016199853271245956, 'timestamp': '2025-10-02 01:09:09.792878', 'step': 32419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:09.847965', 'step': 32419, 'epoch': 3}
{'type': 'loss', 'content': 0.04653142765164375, 'timestamp': '2025-10-02 01:09:09.854775', 'step': 32420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:09.908328', 'step': 32420, 'epoch': 3}
{'type': 'loss', 'content': 0.010948571376502514, 'timestamp': '2025-10-02 01:09:09.915838', 'step': 32421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:09.971003', 'step': 32421, 'epoch': 3}
{'type': 'loss', 'content': 0.04495028033852577, 'timestamp': '2025-10-02 01:09:09.973496', 'step': 32422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:10.028468', 'step': 32422, 'epoch': 3}
{'type': 'loss', 'content': 0.039758965373039246, 'timestamp': '2025-10-02 01:09:10.032019', 'step': 32423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:10.091449', 'step': 32423, 'epoch': 3}
{'type': 'loss', 'content': 0.007806203793734312, 'timestamp': '2025-10-02 01:09:10.098240', 'step': 32424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:10.152177', 'step': 32424, 'epoch': 3}
{'type': 'loss', 'content': 0.1520986706018448, 'timestamp': '2025-10-02 01:09:10.154706', 'step': 32425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:10.209449', 'step': 32425, 'epoch': 3}
{'type': 'loss', 'content': 0.01344413310289383, 'timestamp': '2025-10-02 01:09:10.216971', 'step': 32426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:09:10.292028', 'step': 32426, 'epoch': 3}
{'type': 'loss', 'content': 0.04470308497548103, 'timestamp': '2025-10-02 01:09:10.305467', 'step': 32427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:10.360287', 'step': 32427, 'epoch': 3}
{'type': 'loss', 'content': 0.05557155981659889, 'timestamp': '2025-10-02 01:09:10.366937', 'step': 32428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:10.422266', 'step': 32428, 'epoch': 3}
{'type': 'loss', 'content': 0.06182381883263588, 'timestamp': '2025-10-02 01:09:10.424689', 'step': 32429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:10.480059', 'step': 32429, 'epoch': 3}
{'type': 'loss', 'content': 0.0313522070646286, 'timestamp': '2025-10-02 01:09:10.482402', 'step': 32430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:10.536882', 'step': 32430, 'epoch': 3}
{'type': 'loss', 'content': 0.011866597458720207, 'timestamp': '2025-10-02 01:09:10.542936', 'step': 32431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:10.597236', 'step': 32431, 'epoch': 3}
{'type': 'loss', 'content': 0.03898384049534798, 'timestamp': '2025-10-02 01:09:10.603583', 'step': 32432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:10.656948', 'step': 32432, 'epoch': 3}
{'type': 'loss', 'content': 0.08443222939968109, 'timestamp': '2025-10-02 01:09:10.662334', 'step': 32433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:10.717761', 'step': 32433, 'epoch': 3}
{'type': 'loss', 'content': 0.058837682008743286, 'timestamp': '2025-10-02 01:09:10.720245', 'step': 32434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:10.774127', 'step': 32434, 'epoch': 3}
{'type': 'loss', 'content': 0.024293573573231697, 'timestamp': '2025-10-02 01:09:10.777035', 'step': 32435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:09:10.844627', 'step': 32435, 'epoch': 3}
{'type': 'loss', 'content': 0.021705305203795433, 'timestamp': '2025-10-02 01:09:10.857307', 'step': 32436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:10.911675', 'step': 32436, 'epoch': 3}
{'type': 'loss', 'content': 0.0886402502655983, 'timestamp': '2025-10-02 01:09:10.914403', 'step': 32437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:10.969073', 'step': 32437, 'epoch': 3}
{'type': 'loss', 'content': 0.011393396183848381, 'timestamp': '2025-10-02 01:09:10.976620', 'step': 32438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:11.031829', 'step': 32438, 'epoch': 3}
{'type': 'loss', 'content': 0.12206901609897614, 'timestamp': '2025-10-02 01:09:11.034767', 'step': 32439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:11.097202', 'step': 32439, 'epoch': 3}
{'type': 'loss', 'content': 0.013782719150185585, 'timestamp': '2025-10-02 01:09:11.108201', 'step': 32440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:11.163724', 'step': 32440, 'epoch': 3}
{'type': 'loss', 'content': 0.008767764084041119, 'timestamp': '2025-10-02 01:09:11.167467', 'step': 32441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:11.222498', 'step': 32441, 'epoch': 3}
{'type': 'loss', 'content': 0.027296442538499832, 'timestamp': '2025-10-02 01:09:11.225106', 'step': 32442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:11.280521', 'step': 32442, 'epoch': 3}
{'type': 'loss', 'content': 0.025445709004998207, 'timestamp': '2025-10-02 01:09:11.286112', 'step': 32443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:11.340670', 'step': 32443, 'epoch': 3}
{'type': 'loss', 'content': 0.06889884173870087, 'timestamp': '2025-10-02 01:09:11.348944', 'step': 32444, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:11.404762', 'step': 32444, 'epoch': 3}
{'type': 'loss', 'content': 0.06266073882579803, 'timestamp': '2025-10-02 01:09:11.407635', 'step': 32445, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:11.465873', 'step': 32445, 'epoch': 3}
{'type': 'loss', 'content': 0.014813792891800404, 'timestamp': '2025-10-02 01:09:11.475248', 'step': 32446, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:11.531646', 'step': 32446, 'epoch': 3}
{'type': 'loss', 'content': 0.1382276713848114, 'timestamp': '2025-10-02 01:09:11.535510', 'step': 32447, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:11.593609', 'step': 32447, 'epoch': 3}
{'type': 'loss', 'content': 0.07546785473823547, 'timestamp': '2025-10-02 01:09:11.600993', 'step': 32448, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:11.657193', 'step': 32448, 'epoch': 3}
{'type': 'loss', 'content': 0.046046871691942215, 'timestamp': '2025-10-02 01:09:11.663026', 'step': 32449, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:11.725114', 'step': 32449, 'epoch': 3}
{'type': 'loss', 'content': 0.014273321256041527, 'timestamp': '2025-10-02 01:09:11.735588', 'step': 32450, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:11.794269', 'step': 32450, 'epoch': 3}
{'type': 'loss', 'content': 0.018247587606310844, 'timestamp': '2025-10-02 01:09:11.803798', 'step': 32451, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:09:11.867352', 'step': 32451, 'epoch': 3}
{'type': 'loss', 'content': 0.019624784588813782, 'timestamp': '2025-10-02 01:09:11.878828', 'step': 32452, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:11.935944', 'step': 32452, 'epoch': 3}
{'type': 'loss', 'content': 0.04390161484479904, 'timestamp': '2025-10-02 01:09:11.939259', 'step': 32453, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:11.995335', 'step': 32453, 'epoch': 3}
{'type': 'loss', 'content': 0.02650086022913456, 'timestamp': '2025-10-02 01:09:11.998278', 'step': 32454, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:12.056383', 'step': 32454, 'epoch': 3}
{'type': 'loss', 'content': 0.019534815102815628, 'timestamp': '2025-10-02 01:09:12.061572', 'step': 32455, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:12.123504', 'step': 32455, 'epoch': 3}
{'type': 'loss', 'content': 0.06098562106490135, 'timestamp': '2025-10-02 01:09:12.132808', 'step': 32456, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:12.192607', 'step': 32456, 'epoch': 3}
{'type': 'loss', 'content': 0.07695251703262329, 'timestamp': '2025-10-02 01:09:12.198881', 'step': 32457, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:12.255220', 'step': 32457, 'epoch': 3}
{'type': 'loss', 'content': 0.017692575231194496, 'timestamp': '2025-10-02 01:09:12.264560', 'step': 32458, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:12.321716', 'step': 32458, 'epoch': 3}
{'type': 'loss', 'content': 0.11091576516628265, 'timestamp': '2025-10-02 01:09:12.324916', 'step': 32459, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:12.384819', 'step': 32459, 'epoch': 3}
{'type': 'loss', 'content': 0.002090688096359372, 'timestamp': '2025-10-02 01:09:12.392539', 'step': 32460, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:12.448555', 'step': 32460, 'epoch': 3}
{'type': 'loss', 'content': 0.009109201841056347, 'timestamp': '2025-10-02 01:09:12.454576', 'step': 32461, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:12.511875', 'step': 32461, 'epoch': 3}
{'type': 'loss', 'content': 0.03702922165393829, 'timestamp': '2025-10-02 01:09:12.514346', 'step': 32462, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:12.570461', 'step': 32462, 'epoch': 3}
{'type': 'loss', 'content': 0.025237029418349266, 'timestamp': '2025-10-02 01:09:12.576572', 'step': 32463, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:12.634317', 'step': 32463, 'epoch': 3}
{'type': 'loss', 'content': 0.06162129342556, 'timestamp': '2025-10-02 01:09:12.641033', 'step': 32464, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:12.696509', 'step': 32464, 'epoch': 3}
{'type': 'loss', 'content': 0.027116967365145683, 'timestamp': '2025-10-02 01:09:12.702514', 'step': 32465, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:12.760736', 'step': 32465, 'epoch': 3}
{'type': 'loss', 'content': 0.00024208136892411858, 'timestamp': '2025-10-02 01:09:12.768282', 'step': 32466, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:12.823757', 'step': 32466, 'epoch': 3}
{'type': 'loss', 'content': 0.015408650040626526, 'timestamp': '2025-10-02 01:09:12.826194', 'step': 32467, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:12.891000', 'step': 32467, 'epoch': 3}
{'type': 'loss', 'content': 0.03372599184513092, 'timestamp': '2025-10-02 01:09:12.902254', 'step': 32468, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:12.957134', 'step': 32468, 'epoch': 3}
{'type': 'loss', 'content': 0.041628722101449966, 'timestamp': '2025-10-02 01:09:12.959777', 'step': 32469, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-10-02 01:09:13.041622', 'step': 32469, 'epoch': 3}
{'type': 'loss', 'content': 0.026950713247060776, 'timestamp': '2025-10-02 01:09:13.056395', 'step': 32470, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:13.133267', 'step': 32470, 'epoch': 3}
{'type': 'loss', 'content': 0.07554477453231812, 'timestamp': '2025-10-02 01:09:13.135683', 'step': 32471, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:13.193261', 'step': 32471, 'epoch': 3}
{'type': 'loss', 'content': 0.04196079075336456, 'timestamp': '2025-10-02 01:09:13.199212', 'step': 32472, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:13.253040', 'step': 32472, 'epoch': 3}
{'type': 'loss', 'content': 0.06268814206123352, 'timestamp': '2025-10-02 01:09:13.255402', 'step': 32473, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:13.309744', 'step': 32473, 'epoch': 3}
{'type': 'loss', 'content': 0.08326026052236557, 'timestamp': '2025-10-02 01:09:13.312976', 'step': 32474, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:13.367233', 'step': 32474, 'epoch': 3}
{'type': 'loss', 'content': 0.06277038902044296, 'timestamp': '2025-10-02 01:09:13.370378', 'step': 32475, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:13.423947', 'step': 32475, 'epoch': 3}
{'type': 'loss', 'content': 0.025782985612750053, 'timestamp': '2025-10-02 01:09:13.429929', 'step': 32476, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:13.483654', 'step': 32476, 'epoch': 3}
{'type': 'loss', 'content': 0.03830565884709358, 'timestamp': '2025-10-02 01:09:13.486305', 'step': 32477, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:13.540679', 'step': 32477, 'epoch': 3}
{'type': 'loss', 'content': 0.0072418986819684505, 'timestamp': '2025-10-02 01:09:13.546702', 'step': 32478, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:13.600944', 'step': 32478, 'epoch': 3}
{'type': 'loss', 'content': 0.06933131814002991, 'timestamp': '2025-10-02 01:09:13.603619', 'step': 32479, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:13.660072', 'step': 32479, 'epoch': 3}
{'type': 'loss', 'content': 0.026927966624498367, 'timestamp': '2025-10-02 01:09:13.666704', 'step': 32480, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:13.721746', 'step': 32480, 'epoch': 3}
{'type': 'loss', 'content': 0.009236359968781471, 'timestamp': '2025-10-02 01:09:13.724357', 'step': 32481, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:13.779145', 'step': 32481, 'epoch': 3}
{'type': 'loss', 'content': 0.015437299385666847, 'timestamp': '2025-10-02 01:09:13.782009', 'step': 32482, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:13.837090', 'step': 32482, 'epoch': 3}
{'type': 'loss', 'content': 0.03214464709162712, 'timestamp': '2025-10-02 01:09:13.839924', 'step': 32483, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:13.894353', 'step': 32483, 'epoch': 3}
{'type': 'loss', 'content': 0.036339033395051956, 'timestamp': '2025-10-02 01:09:13.901315', 'step': 32484, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:13.957169', 'step': 32484, 'epoch': 3}
{'type': 'loss', 'content': 0.012156451120972633, 'timestamp': '2025-10-02 01:09:13.963127', 'step': 32485, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:14.017399', 'step': 32485, 'epoch': 3}
{'type': 'loss', 'content': 0.05362223833799362, 'timestamp': '2025-10-02 01:09:14.019912', 'step': 32486, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:14.074431', 'step': 32486, 'epoch': 3}
{'type': 'loss', 'content': 0.018564710393548012, 'timestamp': '2025-10-02 01:09:14.077748', 'step': 32487, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:14.133951', 'step': 32487, 'epoch': 3}
{'type': 'loss', 'content': 0.03299008309841156, 'timestamp': '2025-10-02 01:09:14.140046', 'step': 32488, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:14.193711', 'step': 32488, 'epoch': 3}
{'type': 'loss', 'content': 0.07499029487371445, 'timestamp': '2025-10-02 01:09:14.195906', 'step': 32489, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:14.250436', 'step': 32489, 'epoch': 3}
{'type': 'loss', 'content': 0.00397074781358242, 'timestamp': '2025-10-02 01:09:14.252886', 'step': 32490, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:14.306705', 'step': 32490, 'epoch': 3}
{'type': 'loss', 'content': 0.02924654632806778, 'timestamp': '2025-10-02 01:09:14.312600', 'step': 32491, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:09:14.367298', 'step': 32491, 'epoch': 3}
{'type': 'loss', 'content': 0.08315017074346542, 'timestamp': '2025-10-02 01:09:14.373513', 'step': 32492, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:14.426299', 'step': 32492, 'epoch': 3}
{'type': 'loss', 'content': 0.06590546667575836, 'timestamp': '2025-10-02 01:09:14.428690', 'step': 32493, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:14.483190', 'step': 32493, 'epoch': 3}
{'type': 'loss', 'content': 0.07050614804029465, 'timestamp': '2025-10-02 01:09:14.485757', 'step': 32494, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:09:14.554858', 'step': 32494, 'epoch': 3}
{'type': 'loss', 'content': 0.026636512950062752, 'timestamp': '2025-10-02 01:09:14.567177', 'step': 32495, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:14.621668', 'step': 32495, 'epoch': 3}
{'type': 'loss', 'content': 0.018270744010806084, 'timestamp': '2025-10-02 01:09:14.628233', 'step': 32496, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:14.681907', 'step': 32496, 'epoch': 3}
{'type': 'loss', 'content': 0.04153675585985184, 'timestamp': '2025-10-02 01:09:14.684579', 'step': 32497, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:14.738723', 'step': 32497, 'epoch': 3}
{'type': 'loss', 'content': 0.041336655616760254, 'timestamp': '2025-10-02 01:09:14.744619', 'step': 32498, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:14.799150', 'step': 32498, 'epoch': 3}
{'type': 'loss', 'content': 0.006664665415883064, 'timestamp': '2025-10-02 01:09:14.801466', 'step': 32499, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:14.863110', 'step': 32499, 'epoch': 3}
{'type': 'loss', 'content': 0.04965091496706009, 'timestamp': '2025-10-02 01:09:14.874405', 'step': 32500, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 32500', 'timestamp': '2025-10-02 01:09:15.274500', 'step': 32500, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:15.330465', 'step': 32500, 'epoch': 3}
{'type': 'loss', 'content': 0.13984625041484833, 'timestamp': '2025-10-02 01:09:15.332633', 'step': 32501, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:15.388172', 'step': 32501, 'epoch': 3}
{'type': 'loss', 'content': 0.05053536593914032, 'timestamp': '2025-10-02 01:09:15.390513', 'step': 32502, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:15.444514', 'step': 32502, 'epoch': 3}
{'type': 'loss', 'content': 0.08332226425409317, 'timestamp': '2025-10-02 01:09:15.447161', 'step': 32503, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:15.501797', 'step': 32503, 'epoch': 3}
{'type': 'loss', 'content': 0.04989504814147949, 'timestamp': '2025-10-02 01:09:15.507949', 'step': 32504, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:15.563589', 'step': 32504, 'epoch': 3}
{'type': 'loss', 'content': 0.0007021779892966151, 'timestamp': '2025-10-02 01:09:15.569230', 'step': 32505, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:15.627492', 'step': 32505, 'epoch': 3}
{'type': 'loss', 'content': 0.06483336538076401, 'timestamp': '2025-10-02 01:09:15.637674', 'step': 32506, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:15.692216', 'step': 32506, 'epoch': 3}
{'type': 'loss', 'content': 0.011771302670240402, 'timestamp': '2025-10-02 01:09:15.695059', 'step': 32507, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:15.749513', 'step': 32507, 'epoch': 3}
{'type': 'loss', 'content': 0.0178648941218853, 'timestamp': '2025-10-02 01:09:15.755304', 'step': 32508, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:15.808420', 'step': 32508, 'epoch': 3}
{'type': 'loss', 'content': 0.10740383714437485, 'timestamp': '2025-10-02 01:09:15.810618', 'step': 32509, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:15.865090', 'step': 32509, 'epoch': 3}
{'type': 'loss', 'content': 0.047076255083084106, 'timestamp': '2025-10-02 01:09:15.867634', 'step': 32510, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:15.923812', 'step': 32510, 'epoch': 3}
{'type': 'loss', 'content': 0.03678770363330841, 'timestamp': '2025-10-02 01:09:15.933423', 'step': 32511, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:15.988513', 'step': 32511, 'epoch': 3}
{'type': 'loss', 'content': 0.02410208247601986, 'timestamp': '2025-10-02 01:09:15.994361', 'step': 32512, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:16.048653', 'step': 32512, 'epoch': 3}
{'type': 'loss', 'content': 0.025149976834654808, 'timestamp': '2025-10-02 01:09:16.051081', 'step': 32513, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:16.107141', 'step': 32513, 'epoch': 3}
{'type': 'loss', 'content': 0.07280536741018295, 'timestamp': '2025-10-02 01:09:16.113152', 'step': 32514, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:16.169216', 'step': 32514, 'epoch': 3}
{'type': 'loss', 'content': 0.024918129667639732, 'timestamp': '2025-10-02 01:09:16.172220', 'step': 32515, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:16.229682', 'step': 32515, 'epoch': 3}
{'type': 'loss', 'content': 0.03731779009103775, 'timestamp': '2025-10-02 01:09:16.239788', 'step': 32516, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:16.297171', 'step': 32516, 'epoch': 3}
{'type': 'loss', 'content': 0.018050335347652435, 'timestamp': '2025-10-02 01:09:16.308124', 'step': 32517, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:09:16.370820', 'step': 32517, 'epoch': 3}
{'type': 'loss', 'content': 0.027938099578022957, 'timestamp': '2025-10-02 01:09:16.381776', 'step': 32518, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:16.435809', 'step': 32518, 'epoch': 3}
{'type': 'loss', 'content': 0.04466956481337547, 'timestamp': '2025-10-02 01:09:16.438245', 'step': 32519, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:16.493044', 'step': 32519, 'epoch': 3}
{'type': 'loss', 'content': 0.028659388422966003, 'timestamp': '2025-10-02 01:09:16.498928', 'step': 32520, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:16.552596', 'step': 32520, 'epoch': 3}
{'type': 'loss', 'content': 0.097208172082901, 'timestamp': '2025-10-02 01:09:16.554922', 'step': 32521, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:16.608812', 'step': 32521, 'epoch': 3}
{'type': 'loss', 'content': 0.059752997010946274, 'timestamp': '2025-10-02 01:09:16.618134', 'step': 32522, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:16.672317', 'step': 32522, 'epoch': 3}
{'type': 'loss', 'content': 0.17850905656814575, 'timestamp': '2025-10-02 01:09:16.675488', 'step': 32523, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:16.731032', 'step': 32523, 'epoch': 3}
{'type': 'loss', 'content': 0.06230948865413666, 'timestamp': '2025-10-02 01:09:16.737506', 'step': 32524, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:16.793308', 'step': 32524, 'epoch': 3}
{'type': 'loss', 'content': 0.021856950595974922, 'timestamp': '2025-10-02 01:09:16.799367', 'step': 32525, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:16.854324', 'step': 32525, 'epoch': 3}
{'type': 'loss', 'content': 0.03830098360776901, 'timestamp': '2025-10-02 01:09:16.863697', 'step': 32526, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:16.917681', 'step': 32526, 'epoch': 3}
{'type': 'loss', 'content': 0.05470193549990654, 'timestamp': '2025-10-02 01:09:16.920134', 'step': 32527, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:16.975043', 'step': 32527, 'epoch': 3}
{'type': 'loss', 'content': 0.03651406615972519, 'timestamp': '2025-10-02 01:09:16.981172', 'step': 32528, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:17.034765', 'step': 32528, 'epoch': 3}
{'type': 'loss', 'content': 0.00619507348164916, 'timestamp': '2025-10-02 01:09:17.037247', 'step': 32529, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:17.091879', 'step': 32529, 'epoch': 3}
{'type': 'loss', 'content': 0.04315482825040817, 'timestamp': '2025-10-02 01:09:17.096053', 'step': 32530, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:17.153302', 'step': 32530, 'epoch': 3}
{'type': 'loss', 'content': 0.04117240011692047, 'timestamp': '2025-10-02 01:09:17.160887', 'step': 32531, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:17.217115', 'step': 32531, 'epoch': 3}
{'type': 'loss', 'content': 0.050866611301898956, 'timestamp': '2025-10-02 01:09:17.223239', 'step': 32532, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:17.276648', 'step': 32532, 'epoch': 3}
{'type': 'loss', 'content': 0.13825972378253937, 'timestamp': '2025-10-02 01:09:17.279362', 'step': 32533, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:17.333034', 'step': 32533, 'epoch': 3}
{'type': 'loss', 'content': 0.07199538499116898, 'timestamp': '2025-10-02 01:09:17.335547', 'step': 32534, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:17.390072', 'step': 32534, 'epoch': 3}
{'type': 'loss', 'content': 0.0059225657023489475, 'timestamp': '2025-10-02 01:09:17.399416', 'step': 32535, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:17.453358', 'step': 32535, 'epoch': 3}
{'type': 'loss', 'content': 0.06484679132699966, 'timestamp': '2025-10-02 01:09:17.459180', 'step': 32536, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:17.513177', 'step': 32536, 'epoch': 3}
{'type': 'loss', 'content': 0.03325517848134041, 'timestamp': '2025-10-02 01:09:17.523358', 'step': 32537, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:17.578175', 'step': 32537, 'epoch': 3}
{'type': 'loss', 'content': 0.03995438292622566, 'timestamp': '2025-10-02 01:09:17.580693', 'step': 32538, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:17.635193', 'step': 32538, 'epoch': 3}
{'type': 'loss', 'content': 0.003172538010403514, 'timestamp': '2025-10-02 01:09:17.640877', 'step': 32539, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:09:17.710786', 'step': 32539, 'epoch': 3}
{'type': 'loss', 'content': 0.0011149682104587555, 'timestamp': '2025-10-02 01:09:17.723889', 'step': 32540, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:17.778057', 'step': 32540, 'epoch': 3}
{'type': 'loss', 'content': 0.014990132302045822, 'timestamp': '2025-10-02 01:09:17.785562', 'step': 32541, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:09:17.848435', 'step': 32541, 'epoch': 3}
{'type': 'loss', 'content': 0.01954667456448078, 'timestamp': '2025-10-02 01:09:17.859164', 'step': 32542, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:17.913596', 'step': 32542, 'epoch': 3}
{'type': 'loss', 'content': 0.05506240203976631, 'timestamp': '2025-10-02 01:09:17.921123', 'step': 32543, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:17.975394', 'step': 32543, 'epoch': 3}
{'type': 'loss', 'content': 0.03513627126812935, 'timestamp': '2025-10-02 01:09:17.981270', 'step': 32544, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:18.035583', 'step': 32544, 'epoch': 3}
{'type': 'loss', 'content': 0.030753737315535545, 'timestamp': '2025-10-02 01:09:18.045859', 'step': 32545, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:18.100392', 'step': 32545, 'epoch': 3}
{'type': 'loss', 'content': 0.06098048388957977, 'timestamp': '2025-10-02 01:09:18.103526', 'step': 32546, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:18.161431', 'step': 32546, 'epoch': 3}
{'type': 'loss', 'content': 0.008211294189095497, 'timestamp': '2025-10-02 01:09:18.170903', 'step': 32547, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:18.230179', 'step': 32547, 'epoch': 3}
{'type': 'loss', 'content': 0.014453520067036152, 'timestamp': '2025-10-02 01:09:18.236198', 'step': 32548, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:18.289715', 'step': 32548, 'epoch': 3}
{'type': 'loss', 'content': 0.024235501885414124, 'timestamp': '2025-10-02 01:09:18.292717', 'step': 32549, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:18.346628', 'step': 32549, 'epoch': 3}
{'type': 'loss', 'content': 0.05153071880340576, 'timestamp': '2025-10-02 01:09:18.349114', 'step': 32550, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:18.403061', 'step': 32550, 'epoch': 3}
{'type': 'loss', 'content': 0.10101386159658432, 'timestamp': '2025-10-02 01:09:18.405828', 'step': 32551, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:18.460293', 'step': 32551, 'epoch': 3}
{'type': 'loss', 'content': 0.13945099711418152, 'timestamp': '2025-10-02 01:09:18.466334', 'step': 32552, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:18.520329', 'step': 32552, 'epoch': 3}
{'type': 'loss', 'content': 0.021516678854823112, 'timestamp': '2025-10-02 01:09:18.526359', 'step': 32553, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:18.582786', 'step': 32553, 'epoch': 3}
{'type': 'loss', 'content': 0.02165030688047409, 'timestamp': '2025-10-02 01:09:18.592278', 'step': 32554, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:18.646433', 'step': 32554, 'epoch': 3}
{'type': 'loss', 'content': 0.05407999828457832, 'timestamp': '2025-10-02 01:09:18.648789', 'step': 32555, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:18.702992', 'step': 32555, 'epoch': 3}
{'type': 'loss', 'content': 0.025396155193448067, 'timestamp': '2025-10-02 01:09:18.708854', 'step': 32556, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:18.762002', 'step': 32556, 'epoch': 3}
{'type': 'loss', 'content': 0.02993045002222061, 'timestamp': '2025-10-02 01:09:18.769898', 'step': 32557, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:18.827075', 'step': 32557, 'epoch': 3}
{'type': 'loss', 'content': 0.018537811934947968, 'timestamp': '2025-10-02 01:09:18.829825', 'step': 32558, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:18.884859', 'step': 32558, 'epoch': 3}
{'type': 'loss', 'content': 0.031602293252944946, 'timestamp': '2025-10-02 01:09:18.892519', 'step': 32559, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:18.946253', 'step': 32559, 'epoch': 3}
{'type': 'loss', 'content': 0.011003546416759491, 'timestamp': '2025-10-02 01:09:18.952039', 'step': 32560, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:19.005622', 'step': 32560, 'epoch': 3}
{'type': 'loss', 'content': 0.04443198814988136, 'timestamp': '2025-10-02 01:09:19.011386', 'step': 32561, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:19.065919', 'step': 32561, 'epoch': 3}
{'type': 'loss', 'content': 0.06147177144885063, 'timestamp': '2025-10-02 01:09:19.068358', 'step': 32562, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:19.122927', 'step': 32562, 'epoch': 3}
{'type': 'loss', 'content': 0.06794685870409012, 'timestamp': '2025-10-02 01:09:19.126635', 'step': 32563, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:19.184562', 'step': 32563, 'epoch': 3}
{'type': 'loss', 'content': 0.053545158356428146, 'timestamp': '2025-10-02 01:09:19.190476', 'step': 32564, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:19.243929', 'step': 32564, 'epoch': 3}
{'type': 'loss', 'content': 0.03353516757488251, 'timestamp': '2025-10-02 01:09:19.251524', 'step': 32565, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:19.307221', 'step': 32565, 'epoch': 3}
{'type': 'loss', 'content': 0.04133615270256996, 'timestamp': '2025-10-02 01:09:19.309960', 'step': 32566, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:19.372373', 'step': 32566, 'epoch': 3}
{'type': 'loss', 'content': 0.017981240525841713, 'timestamp': '2025-10-02 01:09:19.382830', 'step': 32567, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:19.444238', 'step': 32567, 'epoch': 3}
{'type': 'loss', 'content': 0.01619820110499859, 'timestamp': '2025-10-02 01:09:19.455453', 'step': 32568, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:19.508522', 'step': 32568, 'epoch': 3}
{'type': 'loss', 'content': 0.0665382593870163, 'timestamp': '2025-10-02 01:09:19.510936', 'step': 32569, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:19.565616', 'step': 32569, 'epoch': 3}
{'type': 'loss', 'content': 0.019699908792972565, 'timestamp': '2025-10-02 01:09:19.568137', 'step': 32570, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:19.622550', 'step': 32570, 'epoch': 3}
{'type': 'loss', 'content': 0.009422678500413895, 'timestamp': '2025-10-02 01:09:19.630385', 'step': 32571, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:19.684901', 'step': 32571, 'epoch': 3}
{'type': 'loss', 'content': 0.006157297641038895, 'timestamp': '2025-10-02 01:09:19.693159', 'step': 32572, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:19.747985', 'step': 32572, 'epoch': 3}
{'type': 'loss', 'content': 0.06382781267166138, 'timestamp': '2025-10-02 01:09:19.750800', 'step': 32573, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:19.805297', 'step': 32573, 'epoch': 3}
{'type': 'loss', 'content': 0.0401780903339386, 'timestamp': '2025-10-02 01:09:19.812975', 'step': 32574, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:19.867266', 'step': 32574, 'epoch': 3}
{'type': 'loss', 'content': 0.06194976717233658, 'timestamp': '2025-10-02 01:09:19.875016', 'step': 32575, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:19.930307', 'step': 32575, 'epoch': 3}
{'type': 'loss', 'content': 0.000882348045706749, 'timestamp': '2025-10-02 01:09:19.940499', 'step': 32576, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:19.994581', 'step': 32576, 'epoch': 3}
{'type': 'loss', 'content': 0.03626742959022522, 'timestamp': '2025-10-02 01:09:20.002302', 'step': 32577, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:20.056427', 'step': 32577, 'epoch': 3}
{'type': 'loss', 'content': 0.03856208547949791, 'timestamp': '2025-10-02 01:09:20.059147', 'step': 32578, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:20.113680', 'step': 32578, 'epoch': 3}
{'type': 'loss', 'content': 0.026914352551102638, 'timestamp': '2025-10-02 01:09:20.117073', 'step': 32579, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:20.173987', 'step': 32579, 'epoch': 3}
{'type': 'loss', 'content': 0.01512809656560421, 'timestamp': '2025-10-02 01:09:20.182402', 'step': 32580, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:20.237018', 'step': 32580, 'epoch': 3}
{'type': 'loss', 'content': 0.05313140153884888, 'timestamp': '2025-10-02 01:09:20.247276', 'step': 32581, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:20.302597', 'step': 32581, 'epoch': 3}
{'type': 'loss', 'content': 0.0417911522090435, 'timestamp': '2025-10-02 01:09:20.305408', 'step': 32582, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:20.359360', 'step': 32582, 'epoch': 3}
{'type': 'loss', 'content': 0.028053954243659973, 'timestamp': '2025-10-02 01:09:20.362387', 'step': 32583, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:20.418066', 'step': 32583, 'epoch': 3}
{'type': 'loss', 'content': 0.02067108266055584, 'timestamp': '2025-10-02 01:09:20.424110', 'step': 32584, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:20.478344', 'step': 32584, 'epoch': 3}
{'type': 'loss', 'content': 0.02351502887904644, 'timestamp': '2025-10-02 01:09:20.481632', 'step': 32585, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:20.536504', 'step': 32585, 'epoch': 3}
{'type': 'loss', 'content': 0.03460673615336418, 'timestamp': '2025-10-02 01:09:20.539055', 'step': 32586, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:20.595171', 'step': 32586, 'epoch': 3}
{'type': 'loss', 'content': 0.019081037491559982, 'timestamp': '2025-10-02 01:09:20.604653', 'step': 32587, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:20.660354', 'step': 32587, 'epoch': 3}
{'type': 'loss', 'content': 0.03703458607196808, 'timestamp': '2025-10-02 01:09:20.667142', 'step': 32588, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:20.723101', 'step': 32588, 'epoch': 3}
{'type': 'loss', 'content': 0.059897199273109436, 'timestamp': '2025-10-02 01:09:20.733375', 'step': 32589, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:20.790825', 'step': 32589, 'epoch': 3}
{'type': 'loss', 'content': 0.021485397592186928, 'timestamp': '2025-10-02 01:09:20.793938', 'step': 32590, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:20.858189', 'step': 32590, 'epoch': 3}
{'type': 'loss', 'content': 0.06412611156702042, 'timestamp': '2025-10-02 01:09:20.868692', 'step': 32591, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:20.925997', 'step': 32591, 'epoch': 3}
{'type': 'loss', 'content': 0.03649856150150299, 'timestamp': '2025-10-02 01:09:20.932980', 'step': 32592, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:20.988726', 'step': 32592, 'epoch': 3}
{'type': 'loss', 'content': 0.02949993684887886, 'timestamp': '2025-10-02 01:09:20.998236', 'step': 32593, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:21.054455', 'step': 32593, 'epoch': 3}
{'type': 'loss', 'content': 0.01981496252119541, 'timestamp': '2025-10-02 01:09:21.060510', 'step': 32594, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:21.117126', 'step': 32594, 'epoch': 3}
{'type': 'loss', 'content': 0.049136240035295486, 'timestamp': '2025-10-02 01:09:21.119890', 'step': 32595, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:21.183429', 'step': 32595, 'epoch': 3}
{'type': 'loss', 'content': 0.044713329523801804, 'timestamp': '2025-10-02 01:09:21.190583', 'step': 32596, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:21.246180', 'step': 32596, 'epoch': 3}
{'type': 'loss', 'content': 0.035675033926963806, 'timestamp': '2025-10-02 01:09:21.249641', 'step': 32597, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:21.307873', 'step': 32597, 'epoch': 3}
{'type': 'loss', 'content': 0.00032415895839221776, 'timestamp': '2025-10-02 01:09:21.315677', 'step': 32598, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:09:21.386655', 'step': 32598, 'epoch': 3}
{'type': 'loss', 'content': 0.025530004873871803, 'timestamp': '2025-10-02 01:09:21.398983', 'step': 32599, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:21.454672', 'step': 32599, 'epoch': 3}
{'type': 'loss', 'content': 0.03502511605620384, 'timestamp': '2025-10-02 01:09:21.461186', 'step': 32600, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:21.515242', 'step': 32600, 'epoch': 3}
{'type': 'loss', 'content': 0.025867490097880363, 'timestamp': '2025-10-02 01:09:21.518675', 'step': 32601, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:21.572628', 'step': 32601, 'epoch': 3}
{'type': 'loss', 'content': 0.010400191880762577, 'timestamp': '2025-10-02 01:09:21.575418', 'step': 32602, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:21.631543', 'step': 32602, 'epoch': 3}
{'type': 'loss', 'content': 0.0298110693693161, 'timestamp': '2025-10-02 01:09:21.634962', 'step': 32603, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:21.691559', 'step': 32603, 'epoch': 3}
{'type': 'loss', 'content': 0.04216961935162544, 'timestamp': '2025-10-02 01:09:21.699325', 'step': 32604, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:21.757320', 'step': 32604, 'epoch': 3}
{'type': 'loss', 'content': 0.030835554003715515, 'timestamp': '2025-10-02 01:09:21.768295', 'step': 32605, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:21.823141', 'step': 32605, 'epoch': 3}
{'type': 'loss', 'content': 0.03448330610990524, 'timestamp': '2025-10-02 01:09:21.825357', 'step': 32606, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:21.879610', 'step': 32606, 'epoch': 3}
{'type': 'loss', 'content': 0.01723727211356163, 'timestamp': '2025-10-02 01:09:21.882483', 'step': 32607, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:21.937278', 'step': 32607, 'epoch': 3}
{'type': 'loss', 'content': 0.04093555733561516, 'timestamp': '2025-10-02 01:09:21.943265', 'step': 32608, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:21.996577', 'step': 32608, 'epoch': 3}
{'type': 'loss', 'content': 0.050708550959825516, 'timestamp': '2025-10-02 01:09:21.998889', 'step': 32609, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:22.052917', 'step': 32609, 'epoch': 3}
{'type': 'loss', 'content': 0.0027652455028146505, 'timestamp': '2025-10-02 01:09:22.055484', 'step': 32610, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:22.109051', 'step': 32610, 'epoch': 3}
{'type': 'loss', 'content': 0.03782671317458153, 'timestamp': '2025-10-02 01:09:22.112658', 'step': 32611, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:22.166415', 'step': 32611, 'epoch': 3}
{'type': 'loss', 'content': 0.03661849722266197, 'timestamp': '2025-10-02 01:09:22.172217', 'step': 32612, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:22.225334', 'step': 32612, 'epoch': 3}
{'type': 'loss', 'content': 0.07339401543140411, 'timestamp': '2025-10-02 01:09:22.235020', 'step': 32613, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:22.294007', 'step': 32613, 'epoch': 3}
{'type': 'loss', 'content': 0.06914535909891129, 'timestamp': '2025-10-02 01:09:22.304191', 'step': 32614, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:22.360016', 'step': 32614, 'epoch': 3}
{'type': 'loss', 'content': 0.032340217381715775, 'timestamp': '2025-10-02 01:09:22.369338', 'step': 32615, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:22.423872', 'step': 32615, 'epoch': 3}
{'type': 'loss', 'content': 0.05893555283546448, 'timestamp': '2025-10-02 01:09:22.429715', 'step': 32616, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:22.482872', 'step': 32616, 'epoch': 3}
{'type': 'loss', 'content': 0.059348393231630325, 'timestamp': '2025-10-02 01:09:22.488634', 'step': 32617, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:22.542232', 'step': 32617, 'epoch': 3}
{'type': 'loss', 'content': 0.06707798689603806, 'timestamp': '2025-10-02 01:09:22.544343', 'step': 32618, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:22.598665', 'step': 32618, 'epoch': 3}
{'type': 'loss', 'content': 0.032298676669597626, 'timestamp': '2025-10-02 01:09:22.601000', 'step': 32619, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:22.654397', 'step': 32619, 'epoch': 3}
{'type': 'loss', 'content': 0.04570009559392929, 'timestamp': '2025-10-02 01:09:22.659719', 'step': 32620, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:09:22.731503', 'step': 32620, 'epoch': 3}
{'type': 'loss', 'content': 0.0007052799919620156, 'timestamp': '2025-10-02 01:09:22.745952', 'step': 32621, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:22.804047', 'step': 32621, 'epoch': 3}
{'type': 'loss', 'content': 0.07734690606594086, 'timestamp': '2025-10-02 01:09:22.806269', 'step': 32622, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:22.860307', 'step': 32622, 'epoch': 3}
{'type': 'loss', 'content': 0.05645610764622688, 'timestamp': '2025-10-02 01:09:22.867636', 'step': 32623, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:22.922192', 'step': 32623, 'epoch': 3}
{'type': 'loss', 'content': 0.026130618527531624, 'timestamp': '2025-10-02 01:09:22.927883', 'step': 32624, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:22.982692', 'step': 32624, 'epoch': 3}
{'type': 'loss', 'content': 0.01697252318263054, 'timestamp': '2025-10-02 01:09:22.993070', 'step': 32625, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:23.052217', 'step': 32625, 'epoch': 3}
{'type': 'loss', 'content': 0.006982635241001844, 'timestamp': '2025-10-02 01:09:23.062523', 'step': 32626, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:23.117171', 'step': 32626, 'epoch': 3}
{'type': 'loss', 'content': 0.043111126869916916, 'timestamp': '2025-10-02 01:09:23.119637', 'step': 32627, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:23.173180', 'step': 32627, 'epoch': 3}
{'type': 'loss', 'content': 0.024546364322304726, 'timestamp': '2025-10-02 01:09:23.179108', 'step': 32628, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:23.232159', 'step': 32628, 'epoch': 3}
{'type': 'loss', 'content': 0.0654512569308281, 'timestamp': '2025-10-02 01:09:23.234212', 'step': 32629, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:23.289206', 'step': 32629, 'epoch': 3}
{'type': 'loss', 'content': 0.0613056905567646, 'timestamp': '2025-10-02 01:09:23.291325', 'step': 32630, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:23.345334', 'step': 32630, 'epoch': 3}
{'type': 'loss', 'content': 0.057693786919116974, 'timestamp': '2025-10-02 01:09:23.347603', 'step': 32631, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:23.402485', 'step': 32631, 'epoch': 3}
{'type': 'loss', 'content': 0.021069355309009552, 'timestamp': '2025-10-02 01:09:23.408275', 'step': 32632, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:23.465046', 'step': 32632, 'epoch': 3}
{'type': 'loss', 'content': 0.0152396559715271, 'timestamp': '2025-10-02 01:09:23.476140', 'step': 32633, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:23.532271', 'step': 32633, 'epoch': 3}
{'type': 'loss', 'content': 0.0015380907570943236, 'timestamp': '2025-10-02 01:09:23.534468', 'step': 32634, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:23.587894', 'step': 32634, 'epoch': 3}
{'type': 'loss', 'content': 0.007157045882195234, 'timestamp': '2025-10-02 01:09:23.595416', 'step': 32635, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:23.650520', 'step': 32635, 'epoch': 3}
{'type': 'loss', 'content': 0.026431679725646973, 'timestamp': '2025-10-02 01:09:23.656241', 'step': 32636, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:23.709418', 'step': 32636, 'epoch': 3}
{'type': 'loss', 'content': 0.016257092356681824, 'timestamp': '2025-10-02 01:09:23.711856', 'step': 32637, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:23.766961', 'step': 32637, 'epoch': 3}
{'type': 'loss', 'content': 0.019991397857666016, 'timestamp': '2025-10-02 01:09:23.773001', 'step': 32638, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:23.834052', 'step': 32638, 'epoch': 3}
{'type': 'loss', 'content': 0.023704087361693382, 'timestamp': '2025-10-02 01:09:23.844593', 'step': 32639, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:23.899275', 'step': 32639, 'epoch': 3}
{'type': 'loss', 'content': 0.07574998587369919, 'timestamp': '2025-10-02 01:09:23.904908', 'step': 32640, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:23.958453', 'step': 32640, 'epoch': 3}
{'type': 'loss', 'content': 0.08349675685167313, 'timestamp': '2025-10-02 01:09:23.960389', 'step': 32641, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:24.014809', 'step': 32641, 'epoch': 3}
{'type': 'loss', 'content': 0.0015114003326743841, 'timestamp': '2025-10-02 01:09:24.022363', 'step': 32642, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:24.077405', 'step': 32642, 'epoch': 3}
{'type': 'loss', 'content': 0.05242791026830673, 'timestamp': '2025-10-02 01:09:24.079785', 'step': 32643, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:24.134200', 'step': 32643, 'epoch': 3}
{'type': 'loss', 'content': 0.05572288855910301, 'timestamp': '2025-10-02 01:09:24.142574', 'step': 32644, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:24.196725', 'step': 32644, 'epoch': 3}
{'type': 'loss', 'content': 0.05714055523276329, 'timestamp': '2025-10-02 01:09:24.198809', 'step': 32645, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:24.252721', 'step': 32645, 'epoch': 3}
{'type': 'loss', 'content': 0.03491632267832756, 'timestamp': '2025-10-02 01:09:24.256904', 'step': 32646, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:24.311615', 'step': 32646, 'epoch': 3}
{'type': 'loss', 'content': 0.05656568706035614, 'timestamp': '2025-10-02 01:09:24.313862', 'step': 32647, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:24.367711', 'step': 32647, 'epoch': 3}
{'type': 'loss', 'content': 0.051479652523994446, 'timestamp': '2025-10-02 01:09:24.373740', 'step': 32648, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:24.426863', 'step': 32648, 'epoch': 3}
{'type': 'loss', 'content': 0.022708291187882423, 'timestamp': '2025-10-02 01:09:24.428847', 'step': 32649, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:24.483239', 'step': 32649, 'epoch': 3}
{'type': 'loss', 'content': 0.02841670997440815, 'timestamp': '2025-10-02 01:09:24.485314', 'step': 32650, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:24.539685', 'step': 32650, 'epoch': 3}
{'type': 'loss', 'content': 0.016227057203650475, 'timestamp': '2025-10-02 01:09:24.542721', 'step': 32651, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:24.597807', 'step': 32651, 'epoch': 3}
{'type': 'loss', 'content': 0.009468214586377144, 'timestamp': '2025-10-02 01:09:24.606208', 'step': 32652, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:24.660138', 'step': 32652, 'epoch': 3}
{'type': 'loss', 'content': 0.007564184255897999, 'timestamp': '2025-10-02 01:09:24.665912', 'step': 32653, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:24.720813', 'step': 32653, 'epoch': 3}
{'type': 'loss', 'content': 0.061994414776563644, 'timestamp': '2025-10-02 01:09:24.722971', 'step': 32654, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:24.778171', 'step': 32654, 'epoch': 3}
{'type': 'loss', 'content': 0.03360090032219887, 'timestamp': '2025-10-02 01:09:24.780647', 'step': 32655, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:24.835178', 'step': 32655, 'epoch': 3}
{'type': 'loss', 'content': 0.05376816540956497, 'timestamp': '2025-10-02 01:09:24.843325', 'step': 32656, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:24.897183', 'step': 32656, 'epoch': 3}
{'type': 'loss', 'content': 0.09293916076421738, 'timestamp': '2025-10-02 01:09:24.899764', 'step': 32657, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:24.954288', 'step': 32657, 'epoch': 3}
{'type': 'loss', 'content': 0.0022640719544142485, 'timestamp': '2025-10-02 01:09:24.961580', 'step': 32658, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:25.040898', 'step': 32658, 'epoch': 3}
{'type': 'loss', 'content': 0.06165454164147377, 'timestamp': '2025-10-02 01:09:25.047588', 'step': 32659, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:25.110939', 'step': 32659, 'epoch': 3}
{'type': 'loss', 'content': 0.03941492363810539, 'timestamp': '2025-10-02 01:09:25.117909', 'step': 32660, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:25.186271', 'step': 32660, 'epoch': 3}
{'type': 'loss', 'content': 0.0545470230281353, 'timestamp': '2025-10-02 01:09:25.192236', 'step': 32661, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:25.248267', 'step': 32661, 'epoch': 3}
{'type': 'loss', 'content': 0.02502872608602047, 'timestamp': '2025-10-02 01:09:25.255885', 'step': 32662, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:09:25.323613', 'step': 32662, 'epoch': 3}
{'type': 'loss', 'content': 0.020093465223908424, 'timestamp': '2025-10-02 01:09:25.334594', 'step': 32663, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:25.410447', 'step': 32663, 'epoch': 3}
{'type': 'loss', 'content': 0.013630388304591179, 'timestamp': '2025-10-02 01:09:25.421557', 'step': 32664, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:25.483424', 'step': 32664, 'epoch': 3}
{'type': 'loss', 'content': 0.06415212154388428, 'timestamp': '2025-10-02 01:09:25.486439', 'step': 32665, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:25.543899', 'step': 32665, 'epoch': 3}
{'type': 'loss', 'content': 0.013192188926041126, 'timestamp': '2025-10-02 01:09:25.554109', 'step': 32666, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:25.612814', 'step': 32666, 'epoch': 3}
{'type': 'loss', 'content': 0.08610516041517258, 'timestamp': '2025-10-02 01:09:25.622327', 'step': 32667, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:25.688852', 'step': 32667, 'epoch': 3}
{'type': 'loss', 'content': 0.08159488439559937, 'timestamp': '2025-10-02 01:09:25.695908', 'step': 32668, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:25.767133', 'step': 32668, 'epoch': 3}
{'type': 'loss', 'content': 0.035735227167606354, 'timestamp': '2025-10-02 01:09:25.778249', 'step': 32669, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:25.834378', 'step': 32669, 'epoch': 3}
{'type': 'loss', 'content': 0.055683013051748276, 'timestamp': '2025-10-02 01:09:25.842212', 'step': 32670, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:09:25.911441', 'step': 32670, 'epoch': 3}
{'type': 'loss', 'content': 0.04830123856663704, 'timestamp': '2025-10-02 01:09:25.922114', 'step': 32671, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:25.985272', 'step': 32671, 'epoch': 3}
{'type': 'loss', 'content': 0.0157216414809227, 'timestamp': '2025-10-02 01:09:25.996746', 'step': 32672, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:26.053069', 'step': 32672, 'epoch': 3}
{'type': 'loss', 'content': 0.048103153705596924, 'timestamp': '2025-10-02 01:09:26.055366', 'step': 32673, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:26.109339', 'step': 32673, 'epoch': 3}
{'type': 'loss', 'content': 0.07356835156679153, 'timestamp': '2025-10-02 01:09:26.111761', 'step': 32674, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:26.165956', 'step': 32674, 'epoch': 3}
{'type': 'loss', 'content': 0.016539238393306732, 'timestamp': '2025-10-02 01:09:26.168685', 'step': 32675, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:26.223538', 'step': 32675, 'epoch': 3}
{'type': 'loss', 'content': 0.006122994236648083, 'timestamp': '2025-10-02 01:09:26.232034', 'step': 32676, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:26.285509', 'step': 32676, 'epoch': 3}
{'type': 'loss', 'content': 0.0340237133204937, 'timestamp': '2025-10-02 01:09:26.287801', 'step': 32677, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:26.342544', 'step': 32677, 'epoch': 3}
{'type': 'loss', 'content': 0.07035548239946365, 'timestamp': '2025-10-02 01:09:26.345107', 'step': 32678, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:26.399964', 'step': 32678, 'epoch': 3}
{'type': 'loss', 'content': 0.015618899837136269, 'timestamp': '2025-10-02 01:09:26.409350', 'step': 32679, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:26.464228', 'step': 32679, 'epoch': 3}
{'type': 'loss', 'content': 0.08636760711669922, 'timestamp': '2025-10-02 01:09:26.469652', 'step': 32680, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:26.524396', 'step': 32680, 'epoch': 3}
{'type': 'loss', 'content': 0.08951157331466675, 'timestamp': '2025-10-02 01:09:26.526819', 'step': 32681, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:26.581731', 'step': 32681, 'epoch': 3}
{'type': 'loss', 'content': 0.015353446826338768, 'timestamp': '2025-10-02 01:09:26.584554', 'step': 32682, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:26.641319', 'step': 32682, 'epoch': 3}
{'type': 'loss', 'content': 0.000815445149783045, 'timestamp': '2025-10-02 01:09:26.650897', 'step': 32683, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:26.709266', 'step': 32683, 'epoch': 3}
{'type': 'loss', 'content': 0.02850869856774807, 'timestamp': '2025-10-02 01:09:26.715405', 'step': 32684, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:26.768653', 'step': 32684, 'epoch': 3}
{'type': 'loss', 'content': 0.07250364869832993, 'timestamp': '2025-10-02 01:09:26.771019', 'step': 32685, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:26.825417', 'step': 32685, 'epoch': 3}
{'type': 'loss', 'content': 0.07870976626873016, 'timestamp': '2025-10-02 01:09:26.828285', 'step': 32686, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:26.882410', 'step': 32686, 'epoch': 3}
{'type': 'loss', 'content': 0.01601249724626541, 'timestamp': '2025-10-02 01:09:26.888393', 'step': 32687, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:26.942390', 'step': 32687, 'epoch': 3}
{'type': 'loss', 'content': 0.05237613990902901, 'timestamp': '2025-10-02 01:09:26.947856', 'step': 32688, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:27.001877', 'step': 32688, 'epoch': 3}
{'type': 'loss', 'content': 0.000559747451916337, 'timestamp': '2025-10-02 01:09:27.004408', 'step': 32689, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:27.059623', 'step': 32689, 'epoch': 3}
{'type': 'loss', 'content': 0.10370376706123352, 'timestamp': '2025-10-02 01:09:27.062026', 'step': 32690, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:27.116800', 'step': 32690, 'epoch': 3}
{'type': 'loss', 'content': 0.023578157648444176, 'timestamp': '2025-10-02 01:09:27.120156', 'step': 32691, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:27.175238', 'step': 32691, 'epoch': 3}
{'type': 'loss', 'content': 0.02292768843472004, 'timestamp': '2025-10-02 01:09:27.181256', 'step': 32692, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:27.235135', 'step': 32692, 'epoch': 3}
{'type': 'loss', 'content': 0.031912095844745636, 'timestamp': '2025-10-02 01:09:27.237659', 'step': 32693, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:27.292965', 'step': 32693, 'epoch': 3}
{'type': 'loss', 'content': 0.04523133486509323, 'timestamp': '2025-10-02 01:09:27.302353', 'step': 32694, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:27.356428', 'step': 32694, 'epoch': 3}
{'type': 'loss', 'content': 0.05332193896174431, 'timestamp': '2025-10-02 01:09:27.363707', 'step': 32695, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:27.418783', 'step': 32695, 'epoch': 3}
{'type': 'loss', 'content': 0.06784051656723022, 'timestamp': '2025-10-02 01:09:27.424854', 'step': 32696, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:27.478427', 'step': 32696, 'epoch': 3}
{'type': 'loss', 'content': 0.08844897150993347, 'timestamp': '2025-10-02 01:09:27.481013', 'step': 32697, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:27.535405', 'step': 32697, 'epoch': 3}
{'type': 'loss', 'content': 0.03228051960468292, 'timestamp': '2025-10-02 01:09:27.538175', 'step': 32698, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:09:27.600229', 'step': 32698, 'epoch': 3}
{'type': 'loss', 'content': 0.02919258549809456, 'timestamp': '2025-10-02 01:09:27.610815', 'step': 32699, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:27.666573', 'step': 32699, 'epoch': 3}
{'type': 'loss', 'content': 0.017329024150967598, 'timestamp': '2025-10-02 01:09:27.676476', 'step': 32700, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:27.730270', 'step': 32700, 'epoch': 3}
{'type': 'loss', 'content': 0.015755876898765564, 'timestamp': '2025-10-02 01:09:27.733059', 'step': 32701, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:27.787127', 'step': 32701, 'epoch': 3}
{'type': 'loss', 'content': 0.04174131900072098, 'timestamp': '2025-10-02 01:09:27.794481', 'step': 32702, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:27.849138', 'step': 32702, 'epoch': 3}
{'type': 'loss', 'content': 0.017756087705492973, 'timestamp': '2025-10-02 01:09:27.851700', 'step': 32703, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:27.905920', 'step': 32703, 'epoch': 3}
{'type': 'loss', 'content': 0.04659390076994896, 'timestamp': '2025-10-02 01:09:27.911861', 'step': 32704, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:27.965668', 'step': 32704, 'epoch': 3}
{'type': 'loss', 'content': 0.04176625609397888, 'timestamp': '2025-10-02 01:09:27.974993', 'step': 32705, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:28.032241', 'step': 32705, 'epoch': 3}
{'type': 'loss', 'content': 0.1325976699590683, 'timestamp': '2025-10-02 01:09:28.034488', 'step': 32706, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:28.088312', 'step': 32706, 'epoch': 3}
{'type': 'loss', 'content': 0.07383731752634048, 'timestamp': '2025-10-02 01:09:28.090712', 'step': 32707, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:28.146395', 'step': 32707, 'epoch': 3}
{'type': 'loss', 'content': 0.007085599936544895, 'timestamp': '2025-10-02 01:09:28.152934', 'step': 32708, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:28.206849', 'step': 32708, 'epoch': 3}
{'type': 'loss', 'content': 0.07173921167850494, 'timestamp': '2025-10-02 01:09:28.209576', 'step': 32709, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:28.263916', 'step': 32709, 'epoch': 3}
{'type': 'loss', 'content': 0.02900776080787182, 'timestamp': '2025-10-02 01:09:28.266541', 'step': 32710, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:28.323131', 'step': 32710, 'epoch': 3}
{'type': 'loss', 'content': 0.01938757672905922, 'timestamp': '2025-10-02 01:09:28.332687', 'step': 32711, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:28.395242', 'step': 32711, 'epoch': 3}
{'type': 'loss', 'content': 0.023564690724015236, 'timestamp': '2025-10-02 01:09:28.406486', 'step': 32712, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 01:09:55.008705', 'step': 32712, 'epoch': 3}
{'type': 'pplx', 'content': 95.84936566267578, 'timestamp': '2025-10-02 01:09:55.012440', 'step': 32712, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:55.068325', 'step': 32712, 'epoch': 3}
{'type': 'loss', 'content': 0.045090775936841965, 'timestamp': '2025-10-02 01:09:55.072727', 'step': 32713, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:55.128347', 'step': 32713, 'epoch': 3}
{'type': 'loss', 'content': 0.018844464793801308, 'timestamp': '2025-10-02 01:09:55.130988', 'step': 32714, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:55.186626', 'step': 32714, 'epoch': 3}
{'type': 'loss', 'content': 0.06464557349681854, 'timestamp': '2025-10-02 01:09:55.189159', 'step': 32715, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:55.243628', 'step': 32715, 'epoch': 3}
{'type': 'loss', 'content': 0.06201545149087906, 'timestamp': '2025-10-02 01:09:55.250155', 'step': 32716, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:55.303894', 'step': 32716, 'epoch': 3}
{'type': 'loss', 'content': 0.0544593408703804, 'timestamp': '2025-10-02 01:09:55.309769', 'step': 32717, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:55.365411', 'step': 32717, 'epoch': 3}
{'type': 'loss', 'content': 0.04912830889225006, 'timestamp': '2025-10-02 01:09:55.367665', 'step': 32718, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:55.422250', 'step': 32718, 'epoch': 3}
{'type': 'loss', 'content': 0.015487732365727425, 'timestamp': '2025-10-02 01:09:55.424470', 'step': 32719, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:55.478757', 'step': 32719, 'epoch': 3}
{'type': 'loss', 'content': 0.001625193515792489, 'timestamp': '2025-10-02 01:09:55.484862', 'step': 32720, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:55.539488', 'step': 32720, 'epoch': 3}
{'type': 'loss', 'content': 0.03512909263372421, 'timestamp': '2025-10-02 01:09:55.541749', 'step': 32721, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:55.598903', 'step': 32721, 'epoch': 3}
{'type': 'loss', 'content': 0.008634286932647228, 'timestamp': '2025-10-02 01:09:55.608461', 'step': 32722, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:55.663775', 'step': 32722, 'epoch': 3}
{'type': 'loss', 'content': 0.0369064025580883, 'timestamp': '2025-10-02 01:09:55.666534', 'step': 32723, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:55.725676', 'step': 32723, 'epoch': 3}
{'type': 'loss', 'content': 0.043456368148326874, 'timestamp': '2025-10-02 01:09:55.736581', 'step': 32724, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:55.790285', 'step': 32724, 'epoch': 3}
{'type': 'loss', 'content': 0.04034417122602463, 'timestamp': '2025-10-02 01:09:55.792710', 'step': 32725, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:55.846047', 'step': 32725, 'epoch': 3}
{'type': 'loss', 'content': 0.03777065500617027, 'timestamp': '2025-10-02 01:09:55.848350', 'step': 32726, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:55.904298', 'step': 32726, 'epoch': 3}
{'type': 'loss', 'content': 0.014783259481191635, 'timestamp': '2025-10-02 01:09:55.911747', 'step': 32727, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:55.966239', 'step': 32727, 'epoch': 3}
{'type': 'loss', 'content': 0.11485365778207779, 'timestamp': '2025-10-02 01:09:55.972338', 'step': 32728, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:09:56.032322', 'step': 32728, 'epoch': 3}
{'type': 'loss', 'content': 0.04474526643753052, 'timestamp': '2025-10-02 01:09:56.043848', 'step': 32729, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:56.098491', 'step': 32729, 'epoch': 3}
{'type': 'loss', 'content': 0.011518039740622044, 'timestamp': '2025-10-02 01:09:56.101110', 'step': 32730, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:56.155093', 'step': 32730, 'epoch': 3}
{'type': 'loss', 'content': 0.04834020510315895, 'timestamp': '2025-10-02 01:09:56.157677', 'step': 32731, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:56.212741', 'step': 32731, 'epoch': 3}
{'type': 'loss', 'content': 0.013363763689994812, 'timestamp': '2025-10-02 01:09:56.219566', 'step': 32732, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:56.273925', 'step': 32732, 'epoch': 3}
{'type': 'loss', 'content': 0.07518511265516281, 'timestamp': '2025-10-02 01:09:56.276121', 'step': 32733, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:56.330385', 'step': 32733, 'epoch': 3}
{'type': 'loss', 'content': 0.03128684684634209, 'timestamp': '2025-10-02 01:09:56.332611', 'step': 32734, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:56.388224', 'step': 32734, 'epoch': 3}
{'type': 'loss', 'content': 0.035017941147089005, 'timestamp': '2025-10-02 01:09:56.390368', 'step': 32735, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:56.444064', 'step': 32735, 'epoch': 3}
{'type': 'loss', 'content': 0.04033208265900612, 'timestamp': '2025-10-02 01:09:56.450037', 'step': 32736, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:56.503499', 'step': 32736, 'epoch': 3}
{'type': 'loss', 'content': 0.016638224944472313, 'timestamp': '2025-10-02 01:09:56.505763', 'step': 32737, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:56.559804', 'step': 32737, 'epoch': 3}
{'type': 'loss', 'content': 0.03157389536499977, 'timestamp': '2025-10-02 01:09:56.562242', 'step': 32738, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:09:56.617743', 'step': 32738, 'epoch': 3}
{'type': 'loss', 'content': 0.02092457003891468, 'timestamp': '2025-10-02 01:09:56.620239', 'step': 32739, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:56.675931', 'step': 32739, 'epoch': 3}
{'type': 'loss', 'content': 0.05127369239926338, 'timestamp': '2025-10-02 01:09:56.681686', 'step': 32740, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:56.734893', 'step': 32740, 'epoch': 3}
{'type': 'loss', 'content': 0.09359085559844971, 'timestamp': '2025-10-02 01:09:56.737313', 'step': 32741, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:56.790017', 'step': 32741, 'epoch': 3}
{'type': 'loss', 'content': 0.05104744806885719, 'timestamp': '2025-10-02 01:09:56.792051', 'step': 32742, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:56.850259', 'step': 32742, 'epoch': 3}
{'type': 'loss', 'content': 0.02389601804316044, 'timestamp': '2025-10-02 01:09:56.860406', 'step': 32743, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:56.915863', 'step': 32743, 'epoch': 3}
{'type': 'loss', 'content': 0.05866310000419617, 'timestamp': '2025-10-02 01:09:56.922009', 'step': 32744, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:56.982966', 'step': 32744, 'epoch': 3}
{'type': 'loss', 'content': 0.03933119401335716, 'timestamp': '2025-10-02 01:09:56.994299', 'step': 32745, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:57.050885', 'step': 32745, 'epoch': 3}
{'type': 'loss', 'content': 0.09029138833284378, 'timestamp': '2025-10-02 01:09:57.059825', 'step': 32746, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:57.115903', 'step': 32746, 'epoch': 3}
{'type': 'loss', 'content': 0.02121049165725708, 'timestamp': '2025-10-02 01:09:57.121554', 'step': 32747, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:57.178314', 'step': 32747, 'epoch': 3}
{'type': 'loss', 'content': 0.028231138363480568, 'timestamp': '2025-10-02 01:09:57.185247', 'step': 32748, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:57.241910', 'step': 32748, 'epoch': 3}
{'type': 'loss', 'content': 0.06631111353635788, 'timestamp': '2025-10-02 01:09:57.245114', 'step': 32749, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:57.300743', 'step': 32749, 'epoch': 3}
{'type': 'loss', 'content': 0.01380025502294302, 'timestamp': '2025-10-02 01:09:57.308209', 'step': 32750, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:09:57.365097', 'step': 32750, 'epoch': 3}
{'type': 'loss', 'content': 0.03406061977148056, 'timestamp': '2025-10-02 01:09:57.368474', 'step': 32751, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:57.425610', 'step': 32751, 'epoch': 3}
{'type': 'loss', 'content': 0.0011368182022124529, 'timestamp': '2025-10-02 01:09:57.435732', 'step': 32752, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:57.491353', 'step': 32752, 'epoch': 3}
{'type': 'loss', 'content': 0.03984048217535019, 'timestamp': '2025-10-02 01:09:57.500926', 'step': 32753, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:57.556457', 'step': 32753, 'epoch': 3}
{'type': 'loss', 'content': 0.058304134756326675, 'timestamp': '2025-10-02 01:09:57.562436', 'step': 32754, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:57.619348', 'step': 32754, 'epoch': 3}
{'type': 'loss', 'content': 0.042065542191267014, 'timestamp': '2025-10-02 01:09:57.628693', 'step': 32755, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:57.682871', 'step': 32755, 'epoch': 3}
{'type': 'loss', 'content': 0.03833921626210213, 'timestamp': '2025-10-02 01:09:57.691451', 'step': 32756, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:57.746523', 'step': 32756, 'epoch': 3}
{'type': 'loss', 'content': 0.06948929280042648, 'timestamp': '2025-10-02 01:09:57.749941', 'step': 32757, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:57.805357', 'step': 32757, 'epoch': 3}
{'type': 'loss', 'content': 0.0663096159696579, 'timestamp': '2025-10-02 01:09:57.808511', 'step': 32758, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:57.863870', 'step': 32758, 'epoch': 3}
{'type': 'loss', 'content': 0.07489507645368576, 'timestamp': '2025-10-02 01:09:57.867118', 'step': 32759, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:57.924121', 'step': 32759, 'epoch': 3}
{'type': 'loss', 'content': 0.040563348680734634, 'timestamp': '2025-10-02 01:09:57.930825', 'step': 32760, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:57.985854', 'step': 32760, 'epoch': 3}
{'type': 'loss', 'content': 0.04107850417494774, 'timestamp': '2025-10-02 01:09:57.989332', 'step': 32761, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:09:58.051705', 'step': 32761, 'epoch': 3}
{'type': 'loss', 'content': 0.026263663545250893, 'timestamp': '2025-10-02 01:09:58.062155', 'step': 32762, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:58.117895', 'step': 32762, 'epoch': 3}
{'type': 'loss', 'content': 0.017108481377363205, 'timestamp': '2025-10-02 01:09:58.127424', 'step': 32763, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:58.181515', 'step': 32763, 'epoch': 3}
{'type': 'loss', 'content': 0.044404536485672, 'timestamp': '2025-10-02 01:09:58.188093', 'step': 32764, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:58.242767', 'step': 32764, 'epoch': 3}
{'type': 'loss', 'content': 0.018248362466692924, 'timestamp': '2025-10-02 01:09:58.245453', 'step': 32765, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:58.299202', 'step': 32765, 'epoch': 3}
{'type': 'loss', 'content': 0.05713052302598953, 'timestamp': '2025-10-02 01:09:58.301650', 'step': 32766, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:58.356732', 'step': 32766, 'epoch': 3}
{'type': 'loss', 'content': 0.07498529553413391, 'timestamp': '2025-10-02 01:09:58.359465', 'step': 32767, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:09:58.413611', 'step': 32767, 'epoch': 3}
{'type': 'loss', 'content': 0.016232997179031372, 'timestamp': '2025-10-02 01:09:58.419547', 'step': 32768, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:09:58.473603', 'step': 32768, 'epoch': 3}
{'type': 'loss', 'content': 0.18218059837818146, 'timestamp': '2025-10-02 01:09:58.475806', 'step': 32769, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:58.529626', 'step': 32769, 'epoch': 3}
{'type': 'loss', 'content': 0.01936924085021019, 'timestamp': '2025-10-02 01:09:58.532379', 'step': 32770, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:58.586414', 'step': 32770, 'epoch': 3}
{'type': 'loss', 'content': 0.018678558990359306, 'timestamp': '2025-10-02 01:09:58.589029', 'step': 32771, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:09:58.644083', 'step': 32771, 'epoch': 3}
{'type': 'loss', 'content': 0.07737675309181213, 'timestamp': '2025-10-02 01:09:58.650903', 'step': 32772, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:58.705125', 'step': 32772, 'epoch': 3}
{'type': 'loss', 'content': 0.030317522585392, 'timestamp': '2025-10-02 01:09:58.715350', 'step': 32773, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:58.772233', 'step': 32773, 'epoch': 3}
{'type': 'loss', 'content': 0.08664385974407196, 'timestamp': '2025-10-02 01:09:58.774802', 'step': 32774, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:58.830346', 'step': 32774, 'epoch': 3}
{'type': 'loss', 'content': 0.10600872337818146, 'timestamp': '2025-10-02 01:09:58.832990', 'step': 32775, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:09:58.887771', 'step': 32775, 'epoch': 3}
{'type': 'loss', 'content': 0.05750054121017456, 'timestamp': '2025-10-02 01:09:58.893719', 'step': 32776, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:09:58.955120', 'step': 32776, 'epoch': 3}
{'type': 'loss', 'content': 0.015720326453447342, 'timestamp': '2025-10-02 01:09:58.966864', 'step': 32777, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:59.022450', 'step': 32777, 'epoch': 3}
{'type': 'loss', 'content': 0.02857951447367668, 'timestamp': '2025-10-02 01:09:59.025114', 'step': 32778, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:59.080131', 'step': 32778, 'epoch': 3}
{'type': 'loss', 'content': 0.02508038841187954, 'timestamp': '2025-10-02 01:09:59.087727', 'step': 32779, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:09:59.143119', 'step': 32779, 'epoch': 3}
{'type': 'loss', 'content': 0.046509165316820145, 'timestamp': '2025-10-02 01:09:59.149190', 'step': 32780, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:59.204073', 'step': 32780, 'epoch': 3}
{'type': 'loss', 'content': 0.0037323604337871075, 'timestamp': '2025-10-02 01:09:59.211606', 'step': 32781, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:59.265724', 'step': 32781, 'epoch': 3}
{'type': 'loss', 'content': 0.06718281656503677, 'timestamp': '2025-10-02 01:09:59.268339', 'step': 32782, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:09:59.323311', 'step': 32782, 'epoch': 3}
{'type': 'loss', 'content': 0.013924941420555115, 'timestamp': '2025-10-02 01:09:59.330805', 'step': 32783, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:59.384440', 'step': 32783, 'epoch': 3}
{'type': 'loss', 'content': 0.08491972088813782, 'timestamp': '2025-10-02 01:09:59.390430', 'step': 32784, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:09:59.444498', 'step': 32784, 'epoch': 3}
{'type': 'loss', 'content': 0.03371580317616463, 'timestamp': '2025-10-02 01:09:59.448127', 'step': 32785, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:09:59.502074', 'step': 32785, 'epoch': 3}
{'type': 'loss', 'content': 0.024239735677838326, 'timestamp': '2025-10-02 01:09:59.504766', 'step': 32786, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:09:59.561144', 'step': 32786, 'epoch': 3}
{'type': 'loss', 'content': 0.017102038487792015, 'timestamp': '2025-10-02 01:09:59.570683', 'step': 32787, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:09:59.630153', 'step': 32787, 'epoch': 3}
{'type': 'loss', 'content': 0.0026809796690940857, 'timestamp': '2025-10-02 01:09:59.641092', 'step': 32788, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:09:59.693923', 'step': 32788, 'epoch': 3}
{'type': 'loss', 'content': 0.12105495482683182, 'timestamp': '2025-10-02 01:09:59.696149', 'step': 32789, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:09:59.750426', 'step': 32789, 'epoch': 3}
{'type': 'loss', 'content': 0.16073103249073029, 'timestamp': '2025-10-02 01:09:59.752639', 'step': 32790, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:09:59.806430', 'step': 32790, 'epoch': 3}
{'type': 'loss', 'content': 0.18547721207141876, 'timestamp': '2025-10-02 01:09:59.809120', 'step': 32791, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:09:59.863747', 'step': 32791, 'epoch': 3}
{'type': 'loss', 'content': 0.028041746467351913, 'timestamp': '2025-10-02 01:09:59.870089', 'step': 32792, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:09:59.923906', 'step': 32792, 'epoch': 3}
{'type': 'loss', 'content': 0.01098591461777687, 'timestamp': '2025-10-02 01:09:59.933509', 'step': 32793, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:09:59.986907', 'step': 32793, 'epoch': 3}
{'type': 'loss', 'content': 0.06046037748456001, 'timestamp': '2025-10-02 01:09:59.989320', 'step': 32794, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:00.045876', 'step': 32794, 'epoch': 3}
{'type': 'loss', 'content': 0.02130506932735443, 'timestamp': '2025-10-02 01:10:00.048347', 'step': 32795, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:00.113981', 'step': 32795, 'epoch': 3}
{'type': 'loss', 'content': 0.030000193044543266, 'timestamp': '2025-10-02 01:10:00.125233', 'step': 32796, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:00.180033', 'step': 32796, 'epoch': 3}
{'type': 'loss', 'content': 0.0711539164185524, 'timestamp': '2025-10-02 01:10:00.187341', 'step': 32797, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:00.242356', 'step': 32797, 'epoch': 3}
{'type': 'loss', 'content': 0.0089168855920434, 'timestamp': '2025-10-02 01:10:00.245051', 'step': 32798, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:00.299846', 'step': 32798, 'epoch': 3}
{'type': 'loss', 'content': 0.022953825071454048, 'timestamp': '2025-10-02 01:10:00.302390', 'step': 32799, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:00.356263', 'step': 32799, 'epoch': 3}
{'type': 'loss', 'content': 0.013780156150460243, 'timestamp': '2025-10-02 01:10:00.362411', 'step': 32800, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:00.417030', 'step': 32800, 'epoch': 3}
{'type': 'loss', 'content': 0.09920097142457962, 'timestamp': '2025-10-02 01:10:00.420335', 'step': 32801, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:00.475881', 'step': 32801, 'epoch': 3}
{'type': 'loss', 'content': 0.0033568721264600754, 'timestamp': '2025-10-02 01:10:00.485383', 'step': 32802, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:00.539710', 'step': 32802, 'epoch': 3}
{'type': 'loss', 'content': 0.012465011328458786, 'timestamp': '2025-10-02 01:10:00.545684', 'step': 32803, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:00.600932', 'step': 32803, 'epoch': 3}
{'type': 'loss', 'content': 0.00010231195483356714, 'timestamp': '2025-10-02 01:10:00.611074', 'step': 32804, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:00.665063', 'step': 32804, 'epoch': 3}
{'type': 'loss', 'content': 0.03815273940563202, 'timestamp': '2025-10-02 01:10:00.667344', 'step': 32805, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:00.721858', 'step': 32805, 'epoch': 3}
{'type': 'loss', 'content': 0.025473134592175484, 'timestamp': '2025-10-02 01:10:00.731233', 'step': 32806, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:00.785789', 'step': 32806, 'epoch': 3}
{'type': 'loss', 'content': 0.013700507581233978, 'timestamp': '2025-10-02 01:10:00.788024', 'step': 32807, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:00.847511', 'step': 32807, 'epoch': 3}
{'type': 'loss', 'content': 0.004074923228472471, 'timestamp': '2025-10-02 01:10:00.858473', 'step': 32808, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:00.918046', 'step': 32808, 'epoch': 3}
{'type': 'loss', 'content': 0.048904456198215485, 'timestamp': '2025-10-02 01:10:00.920505', 'step': 32809, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:00.974953', 'step': 32809, 'epoch': 3}
{'type': 'loss', 'content': 0.06783302128314972, 'timestamp': '2025-10-02 01:10:00.977654', 'step': 32810, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:01.033384', 'step': 32810, 'epoch': 3}
{'type': 'loss', 'content': 0.02741832472383976, 'timestamp': '2025-10-02 01:10:01.035620', 'step': 32811, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:01.088940', 'step': 32811, 'epoch': 3}
{'type': 'loss', 'content': 0.07795560359954834, 'timestamp': '2025-10-02 01:10:01.094929', 'step': 32812, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:01.148518', 'step': 32812, 'epoch': 3}
{'type': 'loss', 'content': 0.07186112552881241, 'timestamp': '2025-10-02 01:10:01.158154', 'step': 32813, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:01.213472', 'step': 32813, 'epoch': 3}
{'type': 'loss', 'content': 0.008758334442973137, 'timestamp': '2025-10-02 01:10:01.215793', 'step': 32814, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:01.270239', 'step': 32814, 'epoch': 3}
{'type': 'loss', 'content': 0.046605464071035385, 'timestamp': '2025-10-02 01:10:01.277667', 'step': 32815, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:01.339783', 'step': 32815, 'epoch': 3}
{'type': 'loss', 'content': 0.055395983159542084, 'timestamp': '2025-10-02 01:10:01.346096', 'step': 32816, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:01.399481', 'step': 32816, 'epoch': 3}
{'type': 'loss', 'content': 0.0642397403717041, 'timestamp': '2025-10-02 01:10:01.401909', 'step': 32817, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:01.456504', 'step': 32817, 'epoch': 3}
{'type': 'loss', 'content': 0.034270428121089935, 'timestamp': '2025-10-02 01:10:01.458789', 'step': 32818, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:01.512898', 'step': 32818, 'epoch': 3}
{'type': 'loss', 'content': 0.030124759301543236, 'timestamp': '2025-10-02 01:10:01.520479', 'step': 32819, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:01.575182', 'step': 32819, 'epoch': 3}
{'type': 'loss', 'content': 0.04917501285672188, 'timestamp': '2025-10-02 01:10:01.580985', 'step': 32820, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:01.634866', 'step': 32820, 'epoch': 3}
{'type': 'loss', 'content': 0.03267677128314972, 'timestamp': '2025-10-02 01:10:01.640901', 'step': 32821, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:01.695907', 'step': 32821, 'epoch': 3}
{'type': 'loss', 'content': 0.03447836637496948, 'timestamp': '2025-10-02 01:10:01.703201', 'step': 32822, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:01.757615', 'step': 32822, 'epoch': 3}
{'type': 'loss', 'content': 0.05382223427295685, 'timestamp': '2025-10-02 01:10:01.763517', 'step': 32823, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:01.818110', 'step': 32823, 'epoch': 3}
{'type': 'loss', 'content': 0.05901190638542175, 'timestamp': '2025-10-02 01:10:01.824089', 'step': 32824, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:01.877413', 'step': 32824, 'epoch': 3}
{'type': 'loss', 'content': 0.042919717729091644, 'timestamp': '2025-10-02 01:10:01.883337', 'step': 32825, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:01.938399', 'step': 32825, 'epoch': 3}
{'type': 'loss', 'content': 0.03787699714303017, 'timestamp': '2025-10-02 01:10:01.941119', 'step': 32826, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:01.995040', 'step': 32826, 'epoch': 3}
{'type': 'loss', 'content': 0.053230494260787964, 'timestamp': '2025-10-02 01:10:01.997371', 'step': 32827, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:02.051451', 'step': 32827, 'epoch': 3}
{'type': 'loss', 'content': 0.004528616555035114, 'timestamp': '2025-10-02 01:10:02.059971', 'step': 32828, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:02.114493', 'step': 32828, 'epoch': 3}
{'type': 'loss', 'content': 0.02539237216114998, 'timestamp': '2025-10-02 01:10:02.117018', 'step': 32829, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:02.171175', 'step': 32829, 'epoch': 3}
{'type': 'loss', 'content': 0.08833317458629608, 'timestamp': '2025-10-02 01:10:02.173927', 'step': 32830, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:02.228634', 'step': 32830, 'epoch': 3}
{'type': 'loss', 'content': 0.02478598803281784, 'timestamp': '2025-10-02 01:10:02.231137', 'step': 32831, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:02.285504', 'step': 32831, 'epoch': 3}
{'type': 'loss', 'content': 0.07086055725812912, 'timestamp': '2025-10-02 01:10:02.291292', 'step': 32832, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:02.345307', 'step': 32832, 'epoch': 3}
{'type': 'loss', 'content': 0.06202300265431404, 'timestamp': '2025-10-02 01:10:02.347666', 'step': 32833, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:02.401175', 'step': 32833, 'epoch': 3}
{'type': 'loss', 'content': 0.045689165592193604, 'timestamp': '2025-10-02 01:10:02.403714', 'step': 32834, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:02.458542', 'step': 32834, 'epoch': 3}
{'type': 'loss', 'content': 0.02546011097729206, 'timestamp': '2025-10-02 01:10:02.460907', 'step': 32835, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:02.523502', 'step': 32835, 'epoch': 3}
{'type': 'loss', 'content': 0.029414944350719452, 'timestamp': '2025-10-02 01:10:02.534907', 'step': 32836, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:02.588803', 'step': 32836, 'epoch': 3}
{'type': 'loss', 'content': 0.03088756650686264, 'timestamp': '2025-10-02 01:10:02.591224', 'step': 32837, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:02.644851', 'step': 32837, 'epoch': 3}
{'type': 'loss', 'content': 0.04170370101928711, 'timestamp': '2025-10-02 01:10:02.652542', 'step': 32838, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:02.710030', 'step': 32838, 'epoch': 3}
{'type': 'loss', 'content': 0.000297424296149984, 'timestamp': '2025-10-02 01:10:02.719400', 'step': 32839, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:10:02.783210', 'step': 32839, 'epoch': 3}
{'type': 'loss', 'content': 0.004490308463573456, 'timestamp': '2025-10-02 01:10:02.794857', 'step': 32840, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:02.848122', 'step': 32840, 'epoch': 3}
{'type': 'loss', 'content': 0.1622023582458496, 'timestamp': '2025-10-02 01:10:02.851055', 'step': 32841, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:02.905942', 'step': 32841, 'epoch': 3}
{'type': 'loss', 'content': 0.028830653056502342, 'timestamp': '2025-10-02 01:10:02.913586', 'step': 32842, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:02.968389', 'step': 32842, 'epoch': 3}
{'type': 'loss', 'content': 0.10455779731273651, 'timestamp': '2025-10-02 01:10:02.970762', 'step': 32843, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:03.025059', 'step': 32843, 'epoch': 3}
{'type': 'loss', 'content': 0.01637072116136551, 'timestamp': '2025-10-02 01:10:03.031120', 'step': 32844, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:03.086667', 'step': 32844, 'epoch': 3}
{'type': 'loss', 'content': 0.03963056206703186, 'timestamp': '2025-10-02 01:10:03.089029', 'step': 32845, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:03.143217', 'step': 32845, 'epoch': 3}
{'type': 'loss', 'content': 0.05553712695837021, 'timestamp': '2025-10-02 01:10:03.145554', 'step': 32846, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:03.201381', 'step': 32846, 'epoch': 3}
{'type': 'loss', 'content': 0.02351408451795578, 'timestamp': '2025-10-02 01:10:03.204349', 'step': 32847, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:03.263427', 'step': 32847, 'epoch': 3}
{'type': 'loss', 'content': 0.023996179923415184, 'timestamp': '2025-10-02 01:10:03.274353', 'step': 32848, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:03.332414', 'step': 32848, 'epoch': 3}
{'type': 'loss', 'content': 0.009065949358046055, 'timestamp': '2025-10-02 01:10:03.343393', 'step': 32849, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:03.398242', 'step': 32849, 'epoch': 3}
{'type': 'loss', 'content': 0.11661993712186813, 'timestamp': '2025-10-02 01:10:03.400529', 'step': 32850, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:03.462332', 'step': 32850, 'epoch': 3}
{'type': 'loss', 'content': 0.019435230642557144, 'timestamp': '2025-10-02 01:10:03.472812', 'step': 32851, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:10:03.541023', 'step': 32851, 'epoch': 3}
{'type': 'loss', 'content': 0.02353406324982643, 'timestamp': '2025-10-02 01:10:03.553727', 'step': 32852, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:03.608505', 'step': 32852, 'epoch': 3}
{'type': 'loss', 'content': 0.02681563049554825, 'timestamp': '2025-10-02 01:10:03.610962', 'step': 32853, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:03.665143', 'step': 32853, 'epoch': 3}
{'type': 'loss', 'content': 0.02679019421339035, 'timestamp': '2025-10-02 01:10:03.667337', 'step': 32854, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:03.721167', 'step': 32854, 'epoch': 3}
{'type': 'loss', 'content': 0.06656528264284134, 'timestamp': '2025-10-02 01:10:03.727043', 'step': 32855, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:03.792235', 'step': 32855, 'epoch': 3}
{'type': 'loss', 'content': 0.014614887535572052, 'timestamp': '2025-10-02 01:10:03.803486', 'step': 32856, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:03.856852', 'step': 32856, 'epoch': 3}
{'type': 'loss', 'content': 0.06591933965682983, 'timestamp': '2025-10-02 01:10:03.859137', 'step': 32857, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:03.913157', 'step': 32857, 'epoch': 3}
{'type': 'loss', 'content': 0.08899639546871185, 'timestamp': '2025-10-02 01:10:03.915424', 'step': 32858, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:03.969685', 'step': 32858, 'epoch': 3}
{'type': 'loss', 'content': 0.01722756214439869, 'timestamp': '2025-10-02 01:10:03.975599', 'step': 32859, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:04.029690', 'step': 32859, 'epoch': 3}
{'type': 'loss', 'content': 0.04911106452345848, 'timestamp': '2025-10-02 01:10:04.035460', 'step': 32860, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:04.089303', 'step': 32860, 'epoch': 3}
{'type': 'loss', 'content': 0.007544369902461767, 'timestamp': '2025-10-02 01:10:04.097009', 'step': 32861, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:04.150896', 'step': 32861, 'epoch': 3}
{'type': 'loss', 'content': 0.04879237338900566, 'timestamp': '2025-10-02 01:10:04.153209', 'step': 32862, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:04.207812', 'step': 32862, 'epoch': 3}
{'type': 'loss', 'content': 0.07569308578968048, 'timestamp': '2025-10-02 01:10:04.216248', 'step': 32863, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:04.272071', 'step': 32863, 'epoch': 3}
{'type': 'loss', 'content': 0.044504448771476746, 'timestamp': '2025-10-02 01:10:04.278117', 'step': 32864, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:04.332099', 'step': 32864, 'epoch': 3}
{'type': 'loss', 'content': 0.020107775926589966, 'timestamp': '2025-10-02 01:10:04.334904', 'step': 32865, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:04.389269', 'step': 32865, 'epoch': 3}
{'type': 'loss', 'content': 0.07983358949422836, 'timestamp': '2025-10-02 01:10:04.392313', 'step': 32866, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:04.456930', 'step': 32866, 'epoch': 3}
{'type': 'loss', 'content': 0.0676882416009903, 'timestamp': '2025-10-02 01:10:04.459338', 'step': 32867, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:04.514630', 'step': 32867, 'epoch': 3}
{'type': 'loss', 'content': 0.026849845424294472, 'timestamp': '2025-10-02 01:10:04.521735', 'step': 32868, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:04.603497', 'step': 32868, 'epoch': 3}
{'type': 'loss', 'content': 0.06013523414731026, 'timestamp': '2025-10-02 01:10:04.614800', 'step': 32869, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:04.678762', 'step': 32869, 'epoch': 3}
{'type': 'loss', 'content': 0.05208120122551918, 'timestamp': '2025-10-02 01:10:04.686315', 'step': 32870, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:04.776802', 'step': 32870, 'epoch': 3}
{'type': 'loss', 'content': 0.06251019984483719, 'timestamp': '2025-10-02 01:10:04.780924', 'step': 32871, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:04.884212', 'step': 32871, 'epoch': 3}
{'type': 'loss', 'content': 0.014447584748268127, 'timestamp': '2025-10-02 01:10:04.906209', 'step': 32872, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:10:05.027953', 'step': 32872, 'epoch': 3}
{'type': 'loss', 'content': 0.03066304512321949, 'timestamp': '2025-10-02 01:10:05.047614', 'step': 32873, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:05.123027', 'step': 32873, 'epoch': 3}
{'type': 'loss', 'content': 0.05209704861044884, 'timestamp': '2025-10-02 01:10:05.126759', 'step': 32874, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:05.218193', 'step': 32874, 'epoch': 3}
{'type': 'loss', 'content': 0.05039813369512558, 'timestamp': '2025-10-02 01:10:05.238158', 'step': 32875, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:10:05.346757', 'step': 32875, 'epoch': 3}
{'type': 'loss', 'content': 0.07899583131074905, 'timestamp': '2025-10-02 01:10:05.356924', 'step': 32876, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:05.468731', 'step': 32876, 'epoch': 3}
{'type': 'loss', 'content': 0.05088815838098526, 'timestamp': '2025-10-02 01:10:05.472456', 'step': 32877, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:05.532011', 'step': 32877, 'epoch': 3}
{'type': 'loss', 'content': 0.08495353162288666, 'timestamp': '2025-10-02 01:10:05.553727', 'step': 32878, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:05.614640', 'step': 32878, 'epoch': 3}
{'type': 'loss', 'content': 0.025542132556438446, 'timestamp': '2025-10-02 01:10:05.632701', 'step': 32879, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:05.705987', 'step': 32879, 'epoch': 3}
{'type': 'loss', 'content': 0.15374167263507843, 'timestamp': '2025-10-02 01:10:05.714216', 'step': 32880, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:05.786599', 'step': 32880, 'epoch': 3}
{'type': 'loss', 'content': 0.017619965597987175, 'timestamp': '2025-10-02 01:10:05.800746', 'step': 32881, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:10:05.874458', 'step': 32881, 'epoch': 3}
{'type': 'loss', 'content': 0.018082840368151665, 'timestamp': '2025-10-02 01:10:05.885206', 'step': 32882, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:05.954143', 'step': 32882, 'epoch': 3}
{'type': 'loss', 'content': 0.01928585208952427, 'timestamp': '2025-10-02 01:10:05.958578', 'step': 32883, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:06.019582', 'step': 32883, 'epoch': 3}
{'type': 'loss', 'content': 0.078413225710392, 'timestamp': '2025-10-02 01:10:06.026411', 'step': 32884, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:06.084587', 'step': 32884, 'epoch': 3}
{'type': 'loss', 'content': 0.03520141541957855, 'timestamp': '2025-10-02 01:10:06.091980', 'step': 32885, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:06.173035', 'step': 32885, 'epoch': 3}
{'type': 'loss', 'content': 0.043880533427000046, 'timestamp': '2025-10-02 01:10:06.180331', 'step': 32886, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:06.239957', 'step': 32886, 'epoch': 3}
{'type': 'loss', 'content': 0.05354752019047737, 'timestamp': '2025-10-02 01:10:06.247316', 'step': 32887, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:06.307024', 'step': 32887, 'epoch': 3}
{'type': 'loss', 'content': 0.02387296035885811, 'timestamp': '2025-10-02 01:10:06.327924', 'step': 32888, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:06.390262', 'step': 32888, 'epoch': 3}
{'type': 'loss', 'content': 0.05988924950361252, 'timestamp': '2025-10-02 01:10:06.393874', 'step': 32889, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:06.471644', 'step': 32889, 'epoch': 3}
{'type': 'loss', 'content': 0.029141206294298172, 'timestamp': '2025-10-02 01:10:06.476522', 'step': 32890, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:06.548838', 'step': 32890, 'epoch': 3}
{'type': 'loss', 'content': 0.050316937267780304, 'timestamp': '2025-10-02 01:10:06.552748', 'step': 32891, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:06.612159', 'step': 32891, 'epoch': 3}
{'type': 'loss', 'content': 0.14146552979946136, 'timestamp': '2025-10-02 01:10:06.620779', 'step': 32892, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:06.715716', 'step': 32892, 'epoch': 3}
{'type': 'loss', 'content': 0.02386086992919445, 'timestamp': '2025-10-02 01:10:06.719529', 'step': 32893, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:06.801823', 'step': 32893, 'epoch': 3}
{'type': 'loss', 'content': 0.013573692180216312, 'timestamp': '2025-10-02 01:10:06.820760', 'step': 32894, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:06.882674', 'step': 32894, 'epoch': 3}
{'type': 'loss', 'content': 0.001830487628467381, 'timestamp': '2025-10-02 01:10:06.898440', 'step': 32895, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:06.993835', 'step': 32895, 'epoch': 3}
{'type': 'loss', 'content': 0.04325110837817192, 'timestamp': '2025-10-02 01:10:07.018494', 'step': 32896, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:07.088312', 'step': 32896, 'epoch': 3}
{'type': 'loss', 'content': 0.039144717156887054, 'timestamp': '2025-10-02 01:10:07.092450', 'step': 32897, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:10:07.173686', 'step': 32897, 'epoch': 3}
{'type': 'loss', 'content': 0.023035451769828796, 'timestamp': '2025-10-02 01:10:07.185986', 'step': 32898, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:07.263431', 'step': 32898, 'epoch': 3}
{'type': 'loss', 'content': 0.021930983290076256, 'timestamp': '2025-10-02 01:10:07.272974', 'step': 32899, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:07.336765', 'step': 32899, 'epoch': 3}
{'type': 'loss', 'content': 0.03338693082332611, 'timestamp': '2025-10-02 01:10:07.344590', 'step': 32900, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:07.406445', 'step': 32900, 'epoch': 3}
{'type': 'loss', 'content': 0.030225640162825584, 'timestamp': '2025-10-02 01:10:07.417740', 'step': 32901, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:07.488866', 'step': 32901, 'epoch': 3}
{'type': 'loss', 'content': 0.0590803399682045, 'timestamp': '2025-10-02 01:10:07.492844', 'step': 32902, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:07.552620', 'step': 32902, 'epoch': 3}
{'type': 'loss', 'content': 0.01212216541171074, 'timestamp': '2025-10-02 01:10:07.562146', 'step': 32903, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-10-02 01:10:07.658621', 'step': 32903, 'epoch': 3}
{'type': 'loss', 'content': 0.027596130967140198, 'timestamp': '2025-10-02 01:10:07.672853', 'step': 32904, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:07.737835', 'step': 32904, 'epoch': 3}
{'type': 'loss', 'content': 0.055049583315849304, 'timestamp': '2025-10-02 01:10:07.745543', 'step': 32905, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:07.805528', 'step': 32905, 'epoch': 3}
{'type': 'loss', 'content': 0.07711826264858246, 'timestamp': '2025-10-02 01:10:07.809608', 'step': 32906, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:07.867192', 'step': 32906, 'epoch': 3}
{'type': 'loss', 'content': 0.014224736951291561, 'timestamp': '2025-10-02 01:10:07.874623', 'step': 32907, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:10:07.950982', 'step': 32907, 'epoch': 3}
{'type': 'loss', 'content': 0.028071047738194466, 'timestamp': '2025-10-02 01:10:07.964959', 'step': 32908, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:08.024209', 'step': 32908, 'epoch': 3}
{'type': 'loss', 'content': 0.03700176626443863, 'timestamp': '2025-10-02 01:10:08.028816', 'step': 32909, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:08.117676', 'step': 32909, 'epoch': 3}
{'type': 'loss', 'content': 0.040062129497528076, 'timestamp': '2025-10-02 01:10:08.123335', 'step': 32910, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:08.188049', 'step': 32910, 'epoch': 3}
{'type': 'loss', 'content': 0.0721224769949913, 'timestamp': '2025-10-02 01:10:08.201621', 'step': 32911, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:08.267335', 'step': 32911, 'epoch': 3}
{'type': 'loss', 'content': 0.03413625434041023, 'timestamp': '2025-10-02 01:10:08.275634', 'step': 32912, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:08.340091', 'step': 32912, 'epoch': 3}
{'type': 'loss', 'content': 0.07643595337867737, 'timestamp': '2025-10-02 01:10:08.355236', 'step': 32913, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:08.440998', 'step': 32913, 'epoch': 3}
{'type': 'loss', 'content': 0.051034968346357346, 'timestamp': '2025-10-02 01:10:08.444847', 'step': 32914, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:08.506858', 'step': 32914, 'epoch': 3}
{'type': 'loss', 'content': 0.046611685305833817, 'timestamp': '2025-10-02 01:10:08.511039', 'step': 32915, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:08.577097', 'step': 32915, 'epoch': 3}
{'type': 'loss', 'content': 0.06966520845890045, 'timestamp': '2025-10-02 01:10:08.584598', 'step': 32916, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:08.651722', 'step': 32916, 'epoch': 3}
{'type': 'loss', 'content': 0.06475844234228134, 'timestamp': '2025-10-02 01:10:08.656370', 'step': 32917, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:08.716392', 'step': 32917, 'epoch': 3}
{'type': 'loss', 'content': 0.058281801640987396, 'timestamp': '2025-10-02 01:10:08.726535', 'step': 32918, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:08.787034', 'step': 32918, 'epoch': 3}
{'type': 'loss', 'content': 0.12928178906440735, 'timestamp': '2025-10-02 01:10:08.797839', 'step': 32919, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:08.865342', 'step': 32919, 'epoch': 3}
{'type': 'loss', 'content': 0.10703696310520172, 'timestamp': '2025-10-02 01:10:08.873733', 'step': 32920, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:08.940488', 'step': 32920, 'epoch': 3}
{'type': 'loss', 'content': 0.023956136777997017, 'timestamp': '2025-10-02 01:10:08.953249', 'step': 32921, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:09.013258', 'step': 32921, 'epoch': 3}
{'type': 'loss', 'content': 0.015413179993629456, 'timestamp': '2025-10-02 01:10:09.030112', 'step': 32922, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:09.115506', 'step': 32922, 'epoch': 3}
{'type': 'loss', 'content': 0.0004332983517087996, 'timestamp': '2025-10-02 01:10:09.125067', 'step': 32923, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:09.195692', 'step': 32923, 'epoch': 3}
{'type': 'loss', 'content': 0.019890427589416504, 'timestamp': '2025-10-02 01:10:09.206047', 'step': 32924, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:09.264992', 'step': 32924, 'epoch': 3}
{'type': 'loss', 'content': 0.048855774104595184, 'timestamp': '2025-10-02 01:10:09.268800', 'step': 32925, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:09.327361', 'step': 32925, 'epoch': 3}
{'type': 'loss', 'content': 0.03308020532131195, 'timestamp': '2025-10-02 01:10:09.330266', 'step': 32926, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:09.388294', 'step': 32926, 'epoch': 3}
{'type': 'loss', 'content': 0.12024857103824615, 'timestamp': '2025-10-02 01:10:09.391684', 'step': 32927, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:09.457152', 'step': 32927, 'epoch': 3}
{'type': 'loss', 'content': 0.003932929132133722, 'timestamp': '2025-10-02 01:10:09.463500', 'step': 32928, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:09.520659', 'step': 32928, 'epoch': 3}
{'type': 'loss', 'content': 0.0499839223921299, 'timestamp': '2025-10-02 01:10:09.523294', 'step': 32929, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:10:09.580391', 'step': 32929, 'epoch': 3}
{'type': 'loss', 'content': 0.08007731288671494, 'timestamp': '2025-10-02 01:10:09.584841', 'step': 32930, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:09.652859', 'step': 32930, 'epoch': 3}
{'type': 'loss', 'content': 0.039745964109897614, 'timestamp': '2025-10-02 01:10:09.665640', 'step': 32931, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:09.750329', 'step': 32931, 'epoch': 3}
{'type': 'loss', 'content': 0.058507781475782394, 'timestamp': '2025-10-02 01:10:09.757294', 'step': 32932, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:09.815126', 'step': 32932, 'epoch': 3}
{'type': 'loss', 'content': 0.0066225300543010235, 'timestamp': '2025-10-02 01:10:09.819024', 'step': 32933, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:09.886202', 'step': 32933, 'epoch': 3}
{'type': 'loss', 'content': 0.053021140396595, 'timestamp': '2025-10-02 01:10:09.890874', 'step': 32934, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:09.965060', 'step': 32934, 'epoch': 3}
{'type': 'loss', 'content': 0.004499812610447407, 'timestamp': '2025-10-02 01:10:09.970918', 'step': 32935, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:10.029434', 'step': 32935, 'epoch': 3}
{'type': 'loss', 'content': 0.001516204560175538, 'timestamp': '2025-10-02 01:10:10.044024', 'step': 32936, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:10.102642', 'step': 32936, 'epoch': 3}
{'type': 'loss', 'content': 0.10651914030313492, 'timestamp': '2025-10-02 01:10:10.106816', 'step': 32937, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:10.189321', 'step': 32937, 'epoch': 3}
{'type': 'loss', 'content': 0.015219484455883503, 'timestamp': '2025-10-02 01:10:10.204930', 'step': 32938, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:10.275146', 'step': 32938, 'epoch': 3}
{'type': 'loss', 'content': 0.02041161246597767, 'timestamp': '2025-10-02 01:10:10.280699', 'step': 32939, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:10.338566', 'step': 32939, 'epoch': 3}
{'type': 'loss', 'content': 0.11716420948505402, 'timestamp': '2025-10-02 01:10:10.354538', 'step': 32940, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:10.433591', 'step': 32940, 'epoch': 3}
{'type': 'loss', 'content': 0.04941083490848541, 'timestamp': '2025-10-02 01:10:10.446425', 'step': 32941, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:10.528152', 'step': 32941, 'epoch': 3}
{'type': 'loss', 'content': 0.1022733524441719, 'timestamp': '2025-10-02 01:10:10.531368', 'step': 32942, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:10.612355', 'step': 32942, 'epoch': 3}
{'type': 'loss', 'content': 0.12638920545578003, 'timestamp': '2025-10-02 01:10:10.615630', 'step': 32943, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:10.692066', 'step': 32943, 'epoch': 3}
{'type': 'loss', 'content': 0.009733574464917183, 'timestamp': '2025-10-02 01:10:10.699791', 'step': 32944, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:10.763760', 'step': 32944, 'epoch': 3}
{'type': 'loss', 'content': 0.06018649786710739, 'timestamp': '2025-10-02 01:10:10.775090', 'step': 32945, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:10.843533', 'step': 32945, 'epoch': 3}
{'type': 'loss', 'content': 0.019900422543287277, 'timestamp': '2025-10-02 01:10:10.856830', 'step': 32946, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:10.945090', 'step': 32946, 'epoch': 3}
{'type': 'loss', 'content': 0.006244205869734287, 'timestamp': '2025-10-02 01:10:10.948767', 'step': 32947, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:11.029191', 'step': 32947, 'epoch': 3}
{'type': 'loss', 'content': 0.09047237783670425, 'timestamp': '2025-10-02 01:10:11.036611', 'step': 32948, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:11.101921', 'step': 32948, 'epoch': 3}
{'type': 'loss', 'content': 0.060506708920001984, 'timestamp': '2025-10-02 01:10:11.107701', 'step': 32949, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:11.186226', 'step': 32949, 'epoch': 3}
{'type': 'loss', 'content': 0.04361040145158768, 'timestamp': '2025-10-02 01:10:11.190455', 'step': 32950, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:11.252589', 'step': 32950, 'epoch': 3}
{'type': 'loss', 'content': 0.03969208523631096, 'timestamp': '2025-10-02 01:10:11.256554', 'step': 32951, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:11.327127', 'step': 32951, 'epoch': 3}
{'type': 'loss', 'content': 0.0879877507686615, 'timestamp': '2025-10-02 01:10:11.335675', 'step': 32952, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:11.403424', 'step': 32952, 'epoch': 3}
{'type': 'loss', 'content': 0.03851613402366638, 'timestamp': '2025-10-02 01:10:11.416905', 'step': 32953, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:11.493115', 'step': 32953, 'epoch': 3}
{'type': 'loss', 'content': 0.03360876813530922, 'timestamp': '2025-10-02 01:10:11.496047', 'step': 32954, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:11.559028', 'step': 32954, 'epoch': 3}
{'type': 'loss', 'content': 0.016965052112936974, 'timestamp': '2025-10-02 01:10:11.569225', 'step': 32955, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:11.644622', 'step': 32955, 'epoch': 3}
{'type': 'loss', 'content': 0.04331125691533089, 'timestamp': '2025-10-02 01:10:11.656053', 'step': 32956, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:11.719527', 'step': 32956, 'epoch': 3}
{'type': 'loss', 'content': 0.03227733075618744, 'timestamp': '2025-10-02 01:10:11.732796', 'step': 32957, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:11.823965', 'step': 32957, 'epoch': 3}
{'type': 'loss', 'content': 0.02602536790072918, 'timestamp': '2025-10-02 01:10:11.833511', 'step': 32958, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:11.891778', 'step': 32958, 'epoch': 3}
{'type': 'loss', 'content': 0.004903935827314854, 'timestamp': '2025-10-02 01:10:11.901161', 'step': 32959, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:11.970798', 'step': 32959, 'epoch': 3}
{'type': 'loss', 'content': 0.025948461145162582, 'timestamp': '2025-10-02 01:10:11.979312', 'step': 32960, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:12.051691', 'step': 32960, 'epoch': 3}
{'type': 'loss', 'content': 0.03895008563995361, 'timestamp': '2025-10-02 01:10:12.066682', 'step': 32961, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:12.150710', 'step': 32961, 'epoch': 3}
{'type': 'loss', 'content': 0.01997746154665947, 'timestamp': '2025-10-02 01:10:12.160222', 'step': 32962, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:12.233612', 'step': 32962, 'epoch': 3}
{'type': 'loss', 'content': 0.06316277384757996, 'timestamp': '2025-10-02 01:10:12.243116', 'step': 32963, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:12.311406', 'step': 32963, 'epoch': 3}
{'type': 'loss', 'content': 0.0064707230776548386, 'timestamp': '2025-10-02 01:10:12.321551', 'step': 32964, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:12.380185', 'step': 32964, 'epoch': 3}
{'type': 'loss', 'content': 0.03128965571522713, 'timestamp': '2025-10-02 01:10:12.384134', 'step': 32965, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:12.440960', 'step': 32965, 'epoch': 3}
{'type': 'loss', 'content': 0.059004172682762146, 'timestamp': '2025-10-02 01:10:12.444966', 'step': 32966, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:12.516327', 'step': 32966, 'epoch': 3}
{'type': 'loss', 'content': 0.018623720854520798, 'timestamp': '2025-10-02 01:10:12.523815', 'step': 32967, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:12.583953', 'step': 32967, 'epoch': 3}
{'type': 'loss', 'content': 0.04583178833127022, 'timestamp': '2025-10-02 01:10:12.591124', 'step': 32968, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:12.665336', 'step': 32968, 'epoch': 3}
{'type': 'loss', 'content': 0.02452855184674263, 'timestamp': '2025-10-02 01:10:12.668814', 'step': 32969, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:12.749409', 'step': 32969, 'epoch': 3}
{'type': 'loss', 'content': 0.06775563955307007, 'timestamp': '2025-10-02 01:10:12.762335', 'step': 32970, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:12.835075', 'step': 32970, 'epoch': 3}
{'type': 'loss', 'content': 0.05099726840853691, 'timestamp': '2025-10-02 01:10:12.847889', 'step': 32971, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:12.923481', 'step': 32971, 'epoch': 3}
{'type': 'loss', 'content': 0.04212811961770058, 'timestamp': '2025-10-02 01:10:12.931715', 'step': 32972, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:13.004924', 'step': 32972, 'epoch': 3}
{'type': 'loss', 'content': 0.01258710864931345, 'timestamp': '2025-10-02 01:10:13.010449', 'step': 32973, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:13.076453', 'step': 32973, 'epoch': 3}
{'type': 'loss', 'content': 0.017563870176672935, 'timestamp': '2025-10-02 01:10:13.080138', 'step': 32974, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:13.145729', 'step': 32974, 'epoch': 3}
{'type': 'loss', 'content': 0.04033726826310158, 'timestamp': '2025-10-02 01:10:13.153273', 'step': 32975, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:13.225190', 'step': 32975, 'epoch': 3}
{'type': 'loss', 'content': 0.024563485756516457, 'timestamp': '2025-10-02 01:10:13.242193', 'step': 32976, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:13.314423', 'step': 32976, 'epoch': 3}
{'type': 'loss', 'content': 0.008488254621624947, 'timestamp': '2025-10-02 01:10:13.329498', 'step': 32977, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:13.387375', 'step': 32977, 'epoch': 3}
{'type': 'loss', 'content': 0.04777891933917999, 'timestamp': '2025-10-02 01:10:13.390917', 'step': 32978, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:13.457129', 'step': 32978, 'epoch': 3}
{'type': 'loss', 'content': 0.053542669862508774, 'timestamp': '2025-10-02 01:10:13.461239', 'step': 32979, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:13.534602', 'step': 32979, 'epoch': 3}
{'type': 'loss', 'content': 0.004043783061206341, 'timestamp': '2025-10-02 01:10:13.542105', 'step': 32980, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:13.619930', 'step': 32980, 'epoch': 3}
{'type': 'loss', 'content': 0.035310469567775726, 'timestamp': '2025-10-02 01:10:13.633054', 'step': 32981, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:13.710510', 'step': 32981, 'epoch': 3}
{'type': 'loss', 'content': 0.0346161425113678, 'timestamp': '2025-10-02 01:10:13.713881', 'step': 32982, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:13.788497', 'step': 32982, 'epoch': 3}
{'type': 'loss', 'content': 0.021416043862700462, 'timestamp': '2025-10-02 01:10:13.792680', 'step': 32983, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:13.851825', 'step': 32983, 'epoch': 3}
{'type': 'loss', 'content': 0.07539695501327515, 'timestamp': '2025-10-02 01:10:13.858940', 'step': 32984, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:13.934727', 'step': 32984, 'epoch': 3}
{'type': 'loss', 'content': 0.07034134119749069, 'timestamp': '2025-10-02 01:10:13.948899', 'step': 32985, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:14.026213', 'step': 32985, 'epoch': 3}
{'type': 'loss', 'content': 0.023198522627353668, 'timestamp': '2025-10-02 01:10:14.031509', 'step': 32986, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:14.099291', 'step': 32986, 'epoch': 3}
{'type': 'loss', 'content': 0.02249615453183651, 'timestamp': '2025-10-02 01:10:14.105297', 'step': 32987, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:14.200559', 'step': 32987, 'epoch': 3}
{'type': 'loss', 'content': 0.08693362772464752, 'timestamp': '2025-10-02 01:10:14.207883', 'step': 32988, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:14.274955', 'step': 32988, 'epoch': 3}
{'type': 'loss', 'content': 0.024996476247906685, 'timestamp': '2025-10-02 01:10:14.285899', 'step': 32989, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:14.347221', 'step': 32989, 'epoch': 3}
{'type': 'loss', 'content': 0.04865765944123268, 'timestamp': '2025-10-02 01:10:14.356786', 'step': 32990, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:14.435940', 'step': 32990, 'epoch': 3}
{'type': 'loss', 'content': 0.027246423065662384, 'timestamp': '2025-10-02 01:10:14.446976', 'step': 32991, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:14.523213', 'step': 32991, 'epoch': 3}
{'type': 'loss', 'content': 0.0462162122130394, 'timestamp': '2025-10-02 01:10:14.541196', 'step': 32992, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:14.622301', 'step': 32992, 'epoch': 3}
{'type': 'loss', 'content': 0.10289670526981354, 'timestamp': '2025-10-02 01:10:14.633781', 'step': 32993, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:14.690160', 'step': 32993, 'epoch': 3}
{'type': 'loss', 'content': 0.07319843024015427, 'timestamp': '2025-10-02 01:10:14.693738', 'step': 32994, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:14.785819', 'step': 32994, 'epoch': 3}
{'type': 'loss', 'content': 0.099729984998703, 'timestamp': '2025-10-02 01:10:14.796313', 'step': 32995, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:14.874264', 'step': 32995, 'epoch': 3}
{'type': 'loss', 'content': 0.03821015730500221, 'timestamp': '2025-10-02 01:10:14.890989', 'step': 32996, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:14.957441', 'step': 32996, 'epoch': 3}
{'type': 'loss', 'content': 0.0767962709069252, 'timestamp': '2025-10-02 01:10:14.960854', 'step': 32997, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:15.017509', 'step': 32997, 'epoch': 3}
{'type': 'loss', 'content': 0.06690025329589844, 'timestamp': '2025-10-02 01:10:15.020451', 'step': 32998, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:15.085668', 'step': 32998, 'epoch': 3}
{'type': 'loss', 'content': 0.10607561469078064, 'timestamp': '2025-10-02 01:10:15.089013', 'step': 32999, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:15.155279', 'step': 32999, 'epoch': 3}
{'type': 'loss', 'content': 0.04569578543305397, 'timestamp': '2025-10-02 01:10:15.165035', 'step': 33000, 'epoch': 3}
{'type': 'info', 'content': 'Checkpoint saved at step 33000', 'timestamp': '2025-10-02 01:10:15.589827', 'step': 33000, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:15.647866', 'step': 33000, 'epoch': 3}
{'type': 'loss', 'content': 0.02868480235338211, 'timestamp': '2025-10-02 01:10:15.651752', 'step': 33001, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:15.711516', 'step': 33001, 'epoch': 3}
{'type': 'loss', 'content': 0.014922680333256721, 'timestamp': '2025-10-02 01:10:15.714915', 'step': 33002, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:10:15.786999', 'step': 33002, 'epoch': 3}
{'type': 'loss', 'content': 0.0537000447511673, 'timestamp': '2025-10-02 01:10:15.798880', 'step': 33003, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:15.857835', 'step': 33003, 'epoch': 3}
{'type': 'loss', 'content': 0.018706632778048515, 'timestamp': '2025-10-02 01:10:15.866605', 'step': 33004, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:15.939548', 'step': 33004, 'epoch': 3}
{'type': 'loss', 'content': 0.02791537530720234, 'timestamp': '2025-10-02 01:10:15.951113', 'step': 33005, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:16.013256', 'step': 33005, 'epoch': 3}
{'type': 'loss', 'content': 0.034487031400203705, 'timestamp': '2025-10-02 01:10:16.017880', 'step': 33006, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:16.118096', 'step': 33006, 'epoch': 3}
{'type': 'loss', 'content': 0.13034750521183014, 'timestamp': '2025-10-02 01:10:16.122504', 'step': 33007, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:16.181648', 'step': 33007, 'epoch': 3}
{'type': 'loss', 'content': 0.027474960312247276, 'timestamp': '2025-10-02 01:10:16.191923', 'step': 33008, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:16.266043', 'step': 33008, 'epoch': 3}
{'type': 'loss', 'content': 0.02120683342218399, 'timestamp': '2025-10-02 01:10:16.270055', 'step': 33009, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:10:16.341402', 'step': 33009, 'epoch': 3}
{'type': 'loss', 'content': 0.016646774485707283, 'timestamp': '2025-10-02 01:10:16.353356', 'step': 33010, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:16.412306', 'step': 33010, 'epoch': 3}
{'type': 'loss', 'content': 0.06245720386505127, 'timestamp': '2025-10-02 01:10:16.416410', 'step': 33011, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:16.480038', 'step': 33011, 'epoch': 3}
{'type': 'loss', 'content': 0.03719917684793472, 'timestamp': '2025-10-02 01:10:16.488131', 'step': 33012, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:16.545652', 'step': 33012, 'epoch': 3}
{'type': 'loss', 'content': 0.023556455969810486, 'timestamp': '2025-10-02 01:10:16.554706', 'step': 33013, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:16.621600', 'step': 33013, 'epoch': 3}
{'type': 'loss', 'content': 0.09081286936998367, 'timestamp': '2025-10-02 01:10:16.631248', 'step': 33014, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:16.720982', 'step': 33014, 'epoch': 3}
{'type': 'loss', 'content': 0.04314030706882477, 'timestamp': '2025-10-02 01:10:16.731628', 'step': 33015, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:16.790630', 'step': 33015, 'epoch': 3}
{'type': 'loss', 'content': 0.01634376496076584, 'timestamp': '2025-10-02 01:10:16.797123', 'step': 33016, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:16.854388', 'step': 33016, 'epoch': 3}
{'type': 'loss', 'content': 0.08790673315525055, 'timestamp': '2025-10-02 01:10:16.866713', 'step': 33017, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:16.959438', 'step': 33017, 'epoch': 3}
{'type': 'loss', 'content': 0.04492358863353729, 'timestamp': '2025-10-02 01:10:16.969857', 'step': 33018, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:17.028593', 'step': 33018, 'epoch': 3}
{'type': 'loss', 'content': 0.007237319368869066, 'timestamp': '2025-10-02 01:10:17.031470', 'step': 33019, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:17.089626', 'step': 33019, 'epoch': 3}
{'type': 'loss', 'content': 0.0496232695877552, 'timestamp': '2025-10-02 01:10:17.097462', 'step': 33020, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:17.180706', 'step': 33020, 'epoch': 3}
{'type': 'loss', 'content': 0.08674664050340652, 'timestamp': '2025-10-02 01:10:17.184718', 'step': 33021, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:17.247447', 'step': 33021, 'epoch': 3}
{'type': 'loss', 'content': 0.05649873614311218, 'timestamp': '2025-10-02 01:10:17.251220', 'step': 33022, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:17.326317', 'step': 33022, 'epoch': 3}
{'type': 'loss', 'content': 0.055275943130254745, 'timestamp': '2025-10-02 01:10:17.330866', 'step': 33023, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:17.405582', 'step': 33023, 'epoch': 3}
{'type': 'loss', 'content': 0.05249428376555443, 'timestamp': '2025-10-02 01:10:17.412647', 'step': 33024, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:17.477827', 'step': 33024, 'epoch': 3}
{'type': 'loss', 'content': 0.019817668944597244, 'timestamp': '2025-10-02 01:10:17.481351', 'step': 33025, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:17.547347', 'step': 33025, 'epoch': 3}
{'type': 'loss', 'content': 0.07736905664205551, 'timestamp': '2025-10-02 01:10:17.559576', 'step': 33026, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:17.630184', 'step': 33026, 'epoch': 3}
{'type': 'loss', 'content': 0.047728776931762695, 'timestamp': '2025-10-02 01:10:17.642676', 'step': 33027, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:17.740518', 'step': 33027, 'epoch': 3}
{'type': 'loss', 'content': 0.10000377148389816, 'timestamp': '2025-10-02 01:10:17.762163', 'step': 33028, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:17.844172', 'step': 33028, 'epoch': 3}
{'type': 'loss', 'content': 0.017162051051855087, 'timestamp': '2025-10-02 01:10:17.854403', 'step': 33029, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:17.943389', 'step': 33029, 'epoch': 3}
{'type': 'loss', 'content': 0.011422432959079742, 'timestamp': '2025-10-02 01:10:17.951111', 'step': 33030, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:18.039609', 'step': 33030, 'epoch': 3}
{'type': 'loss', 'content': 0.08360172808170319, 'timestamp': '2025-10-02 01:10:18.044235', 'step': 33031, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-10-02 01:10:18.121793', 'step': 33031, 'epoch': 3}
{'type': 'loss', 'content': 0.0065899379551410675, 'timestamp': '2025-10-02 01:10:18.135787', 'step': 33032, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:18.203594', 'step': 33032, 'epoch': 3}
{'type': 'loss', 'content': 0.04309267923235893, 'timestamp': '2025-10-02 01:10:18.214850', 'step': 33033, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:18.279877', 'step': 33033, 'epoch': 3}
{'type': 'loss', 'content': 0.004765092395246029, 'timestamp': '2025-10-02 01:10:18.290535', 'step': 33034, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:10:18.356383', 'step': 33034, 'epoch': 3}
{'type': 'loss', 'content': 0.04676026105880737, 'timestamp': '2025-10-02 01:10:18.367228', 'step': 33035, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:18.427521', 'step': 33035, 'epoch': 3}
{'type': 'loss', 'content': 0.01867462694644928, 'timestamp': '2025-10-02 01:10:18.436549', 'step': 33036, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:18.516229', 'step': 33036, 'epoch': 3}
{'type': 'loss', 'content': 0.010045613162219524, 'timestamp': '2025-10-02 01:10:18.529232', 'step': 33037, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:18.606155', 'step': 33037, 'epoch': 3}
{'type': 'loss', 'content': 0.026898089796304703, 'timestamp': '2025-10-02 01:10:18.609453', 'step': 33038, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:10:18.685635', 'step': 33038, 'epoch': 3}
{'type': 'loss', 'content': 0.025996819138526917, 'timestamp': '2025-10-02 01:10:18.696446', 'step': 33039, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:18.765831', 'step': 33039, 'epoch': 3}
{'type': 'loss', 'content': 0.04682817682623863, 'timestamp': '2025-10-02 01:10:18.776779', 'step': 33040, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 01:10:18.867667', 'step': 33040, 'epoch': 3}
{'type': 'loss', 'content': 0.0767662450671196, 'timestamp': '2025-10-02 01:10:18.882830', 'step': 33041, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:18.941113', 'step': 33041, 'epoch': 3}
{'type': 'loss', 'content': 0.06728312373161316, 'timestamp': '2025-10-02 01:10:18.951343', 'step': 33042, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:19.025109', 'step': 33042, 'epoch': 3}
{'type': 'loss', 'content': 0.018637392669916153, 'timestamp': '2025-10-02 01:10:19.035133', 'step': 33043, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:19.111279', 'step': 33043, 'epoch': 3}
{'type': 'loss', 'content': 0.008882640860974789, 'timestamp': '2025-10-02 01:10:19.122682', 'step': 33044, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:19.179117', 'step': 33044, 'epoch': 3}
{'type': 'loss', 'content': 0.048681121319532394, 'timestamp': '2025-10-02 01:10:19.183550', 'step': 33045, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:19.242109', 'step': 33045, 'epoch': 3}
{'type': 'loss', 'content': 0.0411926805973053, 'timestamp': '2025-10-02 01:10:19.256432', 'step': 33046, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:19.314322', 'step': 33046, 'epoch': 3}
{'type': 'loss', 'content': 0.015997132286429405, 'timestamp': '2025-10-02 01:10:19.317334', 'step': 33047, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:19.374798', 'step': 33047, 'epoch': 3}
{'type': 'loss', 'content': 0.08969690650701523, 'timestamp': '2025-10-02 01:10:19.388593', 'step': 33048, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:19.454023', 'step': 33048, 'epoch': 3}
{'type': 'loss', 'content': 0.04156080260872841, 'timestamp': '2025-10-02 01:10:19.458021', 'step': 33049, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:10:19.522428', 'step': 33049, 'epoch': 3}
{'type': 'loss', 'content': 0.010791496373713017, 'timestamp': '2025-10-02 01:10:19.533261', 'step': 33050, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:19.611228', 'step': 33050, 'epoch': 3}
{'type': 'loss', 'content': 0.03168454393744469, 'timestamp': '2025-10-02 01:10:19.624243', 'step': 33051, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:19.685339', 'step': 33051, 'epoch': 3}
{'type': 'loss', 'content': 0.09397325664758682, 'timestamp': '2025-10-02 01:10:19.692278', 'step': 33052, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:19.758359', 'step': 33052, 'epoch': 3}
{'type': 'loss', 'content': 0.009836047887802124, 'timestamp': '2025-10-02 01:10:19.768520', 'step': 33053, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:19.843275', 'step': 33053, 'epoch': 3}
{'type': 'loss', 'content': 0.008267189376056194, 'timestamp': '2025-10-02 01:10:19.846743', 'step': 33054, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:19.903922', 'step': 33054, 'epoch': 3}
{'type': 'loss', 'content': 0.09950435906648636, 'timestamp': '2025-10-02 01:10:19.919454', 'step': 33055, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:19.999503', 'step': 33055, 'epoch': 3}
{'type': 'loss', 'content': 0.1404866874217987, 'timestamp': '2025-10-02 01:10:20.006244', 'step': 33056, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:20.062506', 'step': 33056, 'epoch': 3}
{'type': 'loss', 'content': 0.06472069770097733, 'timestamp': '2025-10-02 01:10:20.067291', 'step': 33057, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:20.131653', 'step': 33057, 'epoch': 3}
{'type': 'loss', 'content': 0.06637486815452576, 'timestamp': '2025-10-02 01:10:20.140953', 'step': 33058, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:20.198714', 'step': 33058, 'epoch': 3}
{'type': 'loss', 'content': 0.03228379040956497, 'timestamp': '2025-10-02 01:10:20.203726', 'step': 33059, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:20.279450', 'step': 33059, 'epoch': 3}
{'type': 'loss', 'content': 0.010564818978309631, 'timestamp': '2025-10-02 01:10:20.287723', 'step': 33060, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:20.365320', 'step': 33060, 'epoch': 3}
{'type': 'loss', 'content': 0.011640570126473904, 'timestamp': '2025-10-02 01:10:20.373032', 'step': 33061, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:20.464621', 'step': 33061, 'epoch': 3}
{'type': 'loss', 'content': 0.040638670325279236, 'timestamp': '2025-10-02 01:10:20.478007', 'step': 33062, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:20.548214', 'step': 33062, 'epoch': 3}
{'type': 'loss', 'content': 0.011594196781516075, 'timestamp': '2025-10-02 01:10:20.559821', 'step': 33063, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:20.630365', 'step': 33063, 'epoch': 3}
{'type': 'loss', 'content': 0.034669358283281326, 'timestamp': '2025-10-02 01:10:20.637975', 'step': 33064, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:20.729357', 'step': 33064, 'epoch': 3}
{'type': 'loss', 'content': 0.04022977873682976, 'timestamp': '2025-10-02 01:10:20.732917', 'step': 33065, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:20.790531', 'step': 33065, 'epoch': 3}
{'type': 'loss', 'content': 0.01371390838176012, 'timestamp': '2025-10-02 01:10:20.794250', 'step': 33066, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:20.876484', 'step': 33066, 'epoch': 3}
{'type': 'loss', 'content': 0.021100487560033798, 'timestamp': '2025-10-02 01:10:20.883473', 'step': 33067, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:20.954245', 'step': 33067, 'epoch': 3}
{'type': 'loss', 'content': 0.0248350128531456, 'timestamp': '2025-10-02 01:10:20.961325', 'step': 33068, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:21.019201', 'step': 33068, 'epoch': 3}
{'type': 'loss', 'content': 0.05815403163433075, 'timestamp': '2025-10-02 01:10:21.022675', 'step': 33069, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:21.083045', 'step': 33069, 'epoch': 3}
{'type': 'loss', 'content': 0.03199254721403122, 'timestamp': '2025-10-02 01:10:21.092449', 'step': 33070, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:21.150702', 'step': 33070, 'epoch': 3}
{'type': 'loss', 'content': 0.04255788028240204, 'timestamp': '2025-10-02 01:10:21.154925', 'step': 33071, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:21.212844', 'step': 33071, 'epoch': 3}
{'type': 'loss', 'content': 0.0065767159685492516, 'timestamp': '2025-10-02 01:10:21.224805', 'step': 33072, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:21.298738', 'step': 33072, 'epoch': 3}
{'type': 'loss', 'content': 0.027123721316456795, 'timestamp': '2025-10-02 01:10:21.309894', 'step': 33073, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:21.388367', 'step': 33073, 'epoch': 3}
{'type': 'loss', 'content': 0.08908167481422424, 'timestamp': '2025-10-02 01:10:21.401053', 'step': 33074, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:21.480097', 'step': 33074, 'epoch': 3}
{'type': 'loss', 'content': 0.00580773688852787, 'timestamp': '2025-10-02 01:10:21.487677', 'step': 33075, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:21.579987', 'step': 33075, 'epoch': 3}
{'type': 'loss', 'content': 0.0530211478471756, 'timestamp': '2025-10-02 01:10:21.597357', 'step': 33076, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:21.662500', 'step': 33076, 'epoch': 3}
{'type': 'loss', 'content': 0.010752493515610695, 'timestamp': '2025-10-02 01:10:21.673798', 'step': 33077, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:10:21.760627', 'step': 33077, 'epoch': 3}
{'type': 'loss', 'content': 0.03013814054429531, 'timestamp': '2025-10-02 01:10:21.773208', 'step': 33078, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:21.832414', 'step': 33078, 'epoch': 3}
{'type': 'loss', 'content': 0.007381768431514502, 'timestamp': '2025-10-02 01:10:21.839789', 'step': 33079, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:21.916693', 'step': 33079, 'epoch': 3}
{'type': 'loss', 'content': 0.04003533348441124, 'timestamp': '2025-10-02 01:10:21.932958', 'step': 33080, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:22.013415', 'step': 33080, 'epoch': 3}
{'type': 'loss', 'content': 0.019539078697562218, 'timestamp': '2025-10-02 01:10:22.016832', 'step': 33081, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:22.078456', 'step': 33081, 'epoch': 3}
{'type': 'loss', 'content': 0.014068896882236004, 'timestamp': '2025-10-02 01:10:22.087875', 'step': 33082, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:22.171270', 'step': 33082, 'epoch': 3}
{'type': 'loss', 'content': 0.030230848118662834, 'timestamp': '2025-10-02 01:10:22.175246', 'step': 33083, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:22.255868', 'step': 33083, 'epoch': 3}
{'type': 'loss', 'content': 0.012381330132484436, 'timestamp': '2025-10-02 01:10:22.263940', 'step': 33084, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:22.327132', 'step': 33084, 'epoch': 3}
{'type': 'loss', 'content': 0.06188235431909561, 'timestamp': '2025-10-02 01:10:22.342829', 'step': 33085, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:22.448194', 'step': 33085, 'epoch': 3}
{'type': 'loss', 'content': 0.01593533530831337, 'timestamp': '2025-10-02 01:10:22.456004', 'step': 33086, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:22.517126', 'step': 33086, 'epoch': 3}
{'type': 'loss', 'content': 0.029980691149830818, 'timestamp': '2025-10-02 01:10:22.520957', 'step': 33087, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:22.578071', 'step': 33087, 'epoch': 3}
{'type': 'loss', 'content': 0.02212703973054886, 'timestamp': '2025-10-02 01:10:22.584138', 'step': 33088, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:22.646873', 'step': 33088, 'epoch': 3}
{'type': 'loss', 'content': 0.061553943902254105, 'timestamp': '2025-10-02 01:10:22.653862', 'step': 33089, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:22.726965', 'step': 33089, 'epoch': 3}
{'type': 'loss', 'content': 0.0055122715421020985, 'timestamp': '2025-10-02 01:10:22.737179', 'step': 33090, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:22.816129', 'step': 33090, 'epoch': 3}
{'type': 'loss', 'content': 0.0328792929649353, 'timestamp': '2025-10-02 01:10:22.822093', 'step': 33091, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:22.879052', 'step': 33091, 'epoch': 3}
{'type': 'loss', 'content': 0.0141648193821311, 'timestamp': '2025-10-02 01:10:22.885769', 'step': 33092, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:22.942448', 'step': 33092, 'epoch': 3}
{'type': 'loss', 'content': 0.014596950262784958, 'timestamp': '2025-10-02 01:10:22.945231', 'step': 33093, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:23.001411', 'step': 33093, 'epoch': 3}
{'type': 'loss', 'content': 0.027441466227173805, 'timestamp': '2025-10-02 01:10:23.013692', 'step': 33094, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:23.074560', 'step': 33094, 'epoch': 3}
{'type': 'loss', 'content': 0.02408822812139988, 'timestamp': '2025-10-02 01:10:23.083908', 'step': 33095, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:23.155111', 'step': 33095, 'epoch': 3}
{'type': 'loss', 'content': 0.019844267517328262, 'timestamp': '2025-10-02 01:10:23.165368', 'step': 33096, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:23.224803', 'step': 33096, 'epoch': 3}
{'type': 'loss', 'content': 0.007724474184215069, 'timestamp': '2025-10-02 01:10:23.232009', 'step': 33097, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:23.303026', 'step': 33097, 'epoch': 3}
{'type': 'loss', 'content': 0.094983771443367, 'timestamp': '2025-10-02 01:10:23.309209', 'step': 33098, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:23.369776', 'step': 33098, 'epoch': 3}
{'type': 'loss', 'content': 0.0033299080096185207, 'timestamp': '2025-10-02 01:10:23.379280', 'step': 33099, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:23.436738', 'step': 33099, 'epoch': 3}
{'type': 'loss', 'content': 0.08294039219617844, 'timestamp': '2025-10-02 01:10:23.443459', 'step': 33100, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:23.507406', 'step': 33100, 'epoch': 3}
{'type': 'loss', 'content': 0.02147264964878559, 'timestamp': '2025-10-02 01:10:23.513483', 'step': 33101, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:23.580506', 'step': 33101, 'epoch': 3}
{'type': 'loss', 'content': 0.02653362601995468, 'timestamp': '2025-10-02 01:10:23.590013', 'step': 33102, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:23.656426', 'step': 33102, 'epoch': 3}
{'type': 'loss', 'content': 0.02268070913851261, 'timestamp': '2025-10-02 01:10:23.663017', 'step': 33103, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:23.737023', 'step': 33103, 'epoch': 3}
{'type': 'loss', 'content': 0.01597878709435463, 'timestamp': '2025-10-02 01:10:23.747047', 'step': 33104, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:23.801709', 'step': 33104, 'epoch': 3}
{'type': 'loss', 'content': 0.045872654765844345, 'timestamp': '2025-10-02 01:10:23.808164', 'step': 33105, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:23.883290', 'step': 33105, 'epoch': 3}
{'type': 'loss', 'content': 0.054033730179071426, 'timestamp': '2025-10-02 01:10:23.893945', 'step': 33106, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:23.961340', 'step': 33106, 'epoch': 3}
{'type': 'loss', 'content': 0.05551176518201828, 'timestamp': '2025-10-02 01:10:23.968419', 'step': 33107, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:24.034179', 'step': 33107, 'epoch': 3}
{'type': 'loss', 'content': 0.0746324434876442, 'timestamp': '2025-10-02 01:10:24.046323', 'step': 33108, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:24.104003', 'step': 33108, 'epoch': 3}
{'type': 'loss', 'content': 0.0916886106133461, 'timestamp': '2025-10-02 01:10:24.110250', 'step': 33109, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:24.179119', 'step': 33109, 'epoch': 3}
{'type': 'loss', 'content': 0.04843396693468094, 'timestamp': '2025-10-02 01:10:24.184121', 'step': 33110, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:24.243068', 'step': 33110, 'epoch': 3}
{'type': 'loss', 'content': 0.010136812925338745, 'timestamp': '2025-10-02 01:10:24.248952', 'step': 33111, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:24.306333', 'step': 33111, 'epoch': 3}
{'type': 'loss', 'content': 0.015215053223073483, 'timestamp': '2025-10-02 01:10:24.313226', 'step': 33112, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:24.370491', 'step': 33112, 'epoch': 3}
{'type': 'loss', 'content': 0.06192757934331894, 'timestamp': '2025-10-02 01:10:24.375647', 'step': 33113, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:24.432862', 'step': 33113, 'epoch': 3}
{'type': 'loss', 'content': 0.027732638642191887, 'timestamp': '2025-10-02 01:10:24.436283', 'step': 33114, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:24.491764', 'step': 33114, 'epoch': 3}
{'type': 'loss', 'content': 0.07895073294639587, 'timestamp': '2025-10-02 01:10:24.505169', 'step': 33115, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:24.565807', 'step': 33115, 'epoch': 3}
{'type': 'loss', 'content': 0.055814143270254135, 'timestamp': '2025-10-02 01:10:24.572385', 'step': 33116, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:24.656943', 'step': 33116, 'epoch': 3}
{'type': 'loss', 'content': 0.019564826041460037, 'timestamp': '2025-10-02 01:10:24.664693', 'step': 33117, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:24.739085', 'step': 33117, 'epoch': 3}
{'type': 'loss', 'content': 0.01840425841510296, 'timestamp': '2025-10-02 01:10:24.746926', 'step': 33118, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:24.845869', 'step': 33118, 'epoch': 3}
{'type': 'loss', 'content': 0.003188614035025239, 'timestamp': '2025-10-02 01:10:24.849621', 'step': 33119, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:24.922393', 'step': 33119, 'epoch': 3}
{'type': 'loss', 'content': 0.06993240118026733, 'timestamp': '2025-10-02 01:10:24.941079', 'step': 33120, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:24.998801', 'step': 33120, 'epoch': 3}
{'type': 'loss', 'content': 0.04478532820940018, 'timestamp': '2025-10-02 01:10:25.009078', 'step': 33121, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:25.068861', 'step': 33121, 'epoch': 3}
{'type': 'loss', 'content': 0.05111744627356529, 'timestamp': '2025-10-02 01:10:25.073115', 'step': 33122, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:25.152931', 'step': 33122, 'epoch': 3}
{'type': 'loss', 'content': 0.02126389369368553, 'timestamp': '2025-10-02 01:10:25.157601', 'step': 33123, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:25.224781', 'step': 33123, 'epoch': 3}
{'type': 'loss', 'content': 0.03791959956288338, 'timestamp': '2025-10-02 01:10:25.241154', 'step': 33124, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:25.328240', 'step': 33124, 'epoch': 3}
{'type': 'loss', 'content': 0.0017672888934612274, 'timestamp': '2025-10-02 01:10:25.341393', 'step': 33125, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:25.410215', 'step': 33125, 'epoch': 3}
{'type': 'loss', 'content': 0.012437032535672188, 'timestamp': '2025-10-02 01:10:25.416230', 'step': 33126, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:25.481640', 'step': 33126, 'epoch': 3}
{'type': 'loss', 'content': 0.01739288866519928, 'timestamp': '2025-10-02 01:10:25.485386', 'step': 33127, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:25.564984', 'step': 33127, 'epoch': 3}
{'type': 'loss', 'content': 0.06851668655872345, 'timestamp': '2025-10-02 01:10:25.580962', 'step': 33128, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:25.667201', 'step': 33128, 'epoch': 3}
{'type': 'loss', 'content': 0.07036805897951126, 'timestamp': '2025-10-02 01:10:25.671819', 'step': 33129, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:25.740298', 'step': 33129, 'epoch': 3}
{'type': 'loss', 'content': 0.05608738213777542, 'timestamp': '2025-10-02 01:10:25.748254', 'step': 33130, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:25.818283', 'step': 33130, 'epoch': 3}
{'type': 'loss', 'content': 0.03896613046526909, 'timestamp': '2025-10-02 01:10:25.822335', 'step': 33131, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:25.886523', 'step': 33131, 'epoch': 3}
{'type': 'loss', 'content': 0.048092953860759735, 'timestamp': '2025-10-02 01:10:25.893802', 'step': 33132, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:25.967308', 'step': 33132, 'epoch': 3}
{'type': 'loss', 'content': 0.010572743602097034, 'timestamp': '2025-10-02 01:10:25.974642', 'step': 33133, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:26.039027', 'step': 33133, 'epoch': 3}
{'type': 'loss', 'content': 0.06805860251188278, 'timestamp': '2025-10-02 01:10:26.043327', 'step': 33134, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:26.114271', 'step': 33134, 'epoch': 3}
{'type': 'loss', 'content': 0.021899394690990448, 'timestamp': '2025-10-02 01:10:26.123334', 'step': 33135, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:26.188711', 'step': 33135, 'epoch': 3}
{'type': 'loss', 'content': 0.013199313543736935, 'timestamp': '2025-10-02 01:10:26.200195', 'step': 33136, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:26.273755', 'step': 33136, 'epoch': 3}
{'type': 'loss', 'content': 0.025982098653912544, 'timestamp': '2025-10-02 01:10:26.278880', 'step': 33137, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:26.353091', 'step': 33137, 'epoch': 3}
{'type': 'loss', 'content': 0.1137549877166748, 'timestamp': '2025-10-02 01:10:26.362102', 'step': 33138, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:26.426655', 'step': 33138, 'epoch': 3}
{'type': 'loss', 'content': 0.036082081496715546, 'timestamp': '2025-10-02 01:10:26.434481', 'step': 33139, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:26.507402', 'step': 33139, 'epoch': 3}
{'type': 'loss', 'content': 0.013246467337012291, 'timestamp': '2025-10-02 01:10:26.514021', 'step': 33140, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:26.589816', 'step': 33140, 'epoch': 3}
{'type': 'loss', 'content': 0.05755758285522461, 'timestamp': '2025-10-02 01:10:26.593283', 'step': 33141, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:26.657311', 'step': 33141, 'epoch': 3}
{'type': 'loss', 'content': 0.059123601764440536, 'timestamp': '2025-10-02 01:10:26.666864', 'step': 33142, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:26.736268', 'step': 33142, 'epoch': 3}
{'type': 'loss', 'content': 0.018486054614186287, 'timestamp': '2025-10-02 01:10:26.745597', 'step': 33143, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:26.830795', 'step': 33143, 'epoch': 3}
{'type': 'loss', 'content': 0.006176212802529335, 'timestamp': '2025-10-02 01:10:26.843182', 'step': 33144, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:26.917200', 'step': 33144, 'epoch': 3}
{'type': 'loss', 'content': 0.027504414319992065, 'timestamp': '2025-10-02 01:10:26.925484', 'step': 33145, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:26.992330', 'step': 33145, 'epoch': 3}
{'type': 'loss', 'content': 0.0029523810371756554, 'timestamp': '2025-10-02 01:10:27.001690', 'step': 33146, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:27.076651', 'step': 33146, 'epoch': 3}
{'type': 'loss', 'content': 0.0870356410741806, 'timestamp': '2025-10-02 01:10:27.084231', 'step': 33147, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:27.152279', 'step': 33147, 'epoch': 3}
{'type': 'loss', 'content': 0.012578998692333698, 'timestamp': '2025-10-02 01:10:27.166257', 'step': 33148, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:27.236510', 'step': 33148, 'epoch': 3}
{'type': 'loss', 'content': 0.02502368576824665, 'timestamp': '2025-10-02 01:10:27.242535', 'step': 33149, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:27.297799', 'step': 33149, 'epoch': 3}
{'type': 'loss', 'content': 0.04056498780846596, 'timestamp': '2025-10-02 01:10:27.300103', 'step': 33150, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:27.355082', 'step': 33150, 'epoch': 3}
{'type': 'loss', 'content': 0.030506715178489685, 'timestamp': '2025-10-02 01:10:27.357760', 'step': 33151, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:27.413462', 'step': 33151, 'epoch': 3}
{'type': 'loss', 'content': 0.03235916048288345, 'timestamp': '2025-10-02 01:10:27.423792', 'step': 33152, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:27.477656', 'step': 33152, 'epoch': 3}
{'type': 'loss', 'content': 0.029083633795380592, 'timestamp': '2025-10-02 01:10:27.480169', 'step': 33153, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:27.536119', 'step': 33153, 'epoch': 3}
{'type': 'loss', 'content': 0.00021065100736450404, 'timestamp': '2025-10-02 01:10:27.545608', 'step': 33154, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:27.600011', 'step': 33154, 'epoch': 3}
{'type': 'loss', 'content': 0.02751823328435421, 'timestamp': '2025-10-02 01:10:27.609355', 'step': 33155, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:27.664777', 'step': 33155, 'epoch': 3}
{'type': 'loss', 'content': 0.04075752571225166, 'timestamp': '2025-10-02 01:10:27.675136', 'step': 33156, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:27.729160', 'step': 33156, 'epoch': 3}
{'type': 'loss', 'content': 0.009032013826072216, 'timestamp': '2025-10-02 01:10:27.731816', 'step': 33157, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:27.787022', 'step': 33157, 'epoch': 3}
{'type': 'loss', 'content': 0.045033857226371765, 'timestamp': '2025-10-02 01:10:27.789254', 'step': 33158, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:27.842811', 'step': 33158, 'epoch': 3}
{'type': 'loss', 'content': 0.23351040482521057, 'timestamp': '2025-10-02 01:10:27.845139', 'step': 33159, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:27.899755', 'step': 33159, 'epoch': 3}
{'type': 'loss', 'content': 0.013488895259797573, 'timestamp': '2025-10-02 01:10:27.905534', 'step': 33160, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:27.958351', 'step': 33160, 'epoch': 3}
{'type': 'loss', 'content': 0.06659628450870514, 'timestamp': '2025-10-02 01:10:27.960855', 'step': 33161, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:28.014766', 'step': 33161, 'epoch': 3}
{'type': 'loss', 'content': 0.13313812017440796, 'timestamp': '2025-10-02 01:10:28.017142', 'step': 33162, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:28.070438', 'step': 33162, 'epoch': 3}
{'type': 'loss', 'content': 0.057906825095415115, 'timestamp': '2025-10-02 01:10:28.072982', 'step': 33163, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:28.127158', 'step': 33163, 'epoch': 3}
{'type': 'loss', 'content': 0.02417265810072422, 'timestamp': '2025-10-02 01:10:28.133012', 'step': 33164, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:28.186766', 'step': 33164, 'epoch': 3}
{'type': 'loss', 'content': 0.027633771300315857, 'timestamp': '2025-10-02 01:10:28.188949', 'step': 33165, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:28.242916', 'step': 33165, 'epoch': 3}
{'type': 'loss', 'content': 0.1189189925789833, 'timestamp': '2025-10-02 01:10:28.245516', 'step': 33166, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:28.300707', 'step': 33166, 'epoch': 3}
{'type': 'loss', 'content': 0.026359468698501587, 'timestamp': '2025-10-02 01:10:28.303481', 'step': 33167, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:28.358186', 'step': 33167, 'epoch': 3}
{'type': 'loss', 'content': 0.06356096267700195, 'timestamp': '2025-10-02 01:10:28.364218', 'step': 33168, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:28.418600', 'step': 33168, 'epoch': 3}
{'type': 'loss', 'content': 0.10672084242105484, 'timestamp': '2025-10-02 01:10:28.420750', 'step': 33169, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:28.474777', 'step': 33169, 'epoch': 3}
{'type': 'loss', 'content': 0.03693077713251114, 'timestamp': '2025-10-02 01:10:28.478547', 'step': 33170, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:28.532204', 'step': 33170, 'epoch': 3}
{'type': 'loss', 'content': 0.017961954697966576, 'timestamp': '2025-10-02 01:10:28.538085', 'step': 33171, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:28.592286', 'step': 33171, 'epoch': 3}
{'type': 'loss', 'content': 0.013399981893599033, 'timestamp': '2025-10-02 01:10:28.602439', 'step': 33172, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:28.661725', 'step': 33172, 'epoch': 3}
{'type': 'loss', 'content': 0.058140408247709274, 'timestamp': '2025-10-02 01:10:28.672977', 'step': 33173, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:28.735704', 'step': 33173, 'epoch': 3}
{'type': 'loss', 'content': 0.004388100933283567, 'timestamp': '2025-10-02 01:10:28.746305', 'step': 33174, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:28.802230', 'step': 33174, 'epoch': 3}
{'type': 'loss', 'content': 0.017655394971370697, 'timestamp': '2025-10-02 01:10:28.807953', 'step': 33175, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:28.868728', 'step': 33175, 'epoch': 3}
{'type': 'loss', 'content': 0.011147284880280495, 'timestamp': '2025-10-02 01:10:28.879957', 'step': 33176, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:28.933651', 'step': 33176, 'epoch': 3}
{'type': 'loss', 'content': 0.02380894497036934, 'timestamp': '2025-10-02 01:10:28.939339', 'step': 33177, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:28.997761', 'step': 33177, 'epoch': 3}
{'type': 'loss', 'content': 0.08307167887687683, 'timestamp': '2025-10-02 01:10:29.003835', 'step': 33178, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:29.059447', 'step': 33178, 'epoch': 3}
{'type': 'loss', 'content': 0.08814828097820282, 'timestamp': '2025-10-02 01:10:29.064922', 'step': 33179, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:29.118209', 'step': 33179, 'epoch': 3}
{'type': 'loss', 'content': 0.08889696002006531, 'timestamp': '2025-10-02 01:10:29.124383', 'step': 33180, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:29.179328', 'step': 33180, 'epoch': 3}
{'type': 'loss', 'content': 0.008663012646138668, 'timestamp': '2025-10-02 01:10:29.185275', 'step': 33181, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:29.247161', 'step': 33181, 'epoch': 3}
{'type': 'loss', 'content': 0.006562335416674614, 'timestamp': '2025-10-02 01:10:29.257912', 'step': 33182, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:29.312227', 'step': 33182, 'epoch': 3}
{'type': 'loss', 'content': 0.09903459995985031, 'timestamp': '2025-10-02 01:10:29.314265', 'step': 33183, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:29.367463', 'step': 33183, 'epoch': 3}
{'type': 'loss', 'content': 0.010625681839883327, 'timestamp': '2025-10-02 01:10:29.375835', 'step': 33184, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:29.429040', 'step': 33184, 'epoch': 3}
{'type': 'loss', 'content': 0.0752139538526535, 'timestamp': '2025-10-02 01:10:29.431243', 'step': 33185, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:29.486413', 'step': 33185, 'epoch': 3}
{'type': 'loss', 'content': 0.043593037873506546, 'timestamp': '2025-10-02 01:10:29.495942', 'step': 33186, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:29.549786', 'step': 33186, 'epoch': 3}
{'type': 'loss', 'content': 0.008842067793011665, 'timestamp': '2025-10-02 01:10:29.552832', 'step': 33187, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:29.608987', 'step': 33187, 'epoch': 3}
{'type': 'loss', 'content': 0.0894039198756218, 'timestamp': '2025-10-02 01:10:29.614757', 'step': 33188, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-10-02 01:10:29.684350', 'step': 33188, 'epoch': 3}
{'type': 'loss', 'content': 0.032931145280599594, 'timestamp': '2025-10-02 01:10:29.698126', 'step': 33189, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:29.754032', 'step': 33189, 'epoch': 3}
{'type': 'loss', 'content': 0.024054234847426414, 'timestamp': '2025-10-02 01:10:29.763671', 'step': 33190, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:29.817716', 'step': 33190, 'epoch': 3}
{'type': 'loss', 'content': 0.05191214010119438, 'timestamp': '2025-10-02 01:10:29.819375', 'step': 33191, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:29.873716', 'step': 33191, 'epoch': 3}
{'type': 'loss', 'content': 0.01628980226814747, 'timestamp': '2025-10-02 01:10:29.882015', 'step': 33192, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:29.934683', 'step': 33192, 'epoch': 3}
{'type': 'loss', 'content': 0.09985965490341187, 'timestamp': '2025-10-02 01:10:29.944360', 'step': 33193, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:29.999845', 'step': 33193, 'epoch': 3}
{'type': 'loss', 'content': 0.02301354892551899, 'timestamp': '2025-10-02 01:10:30.002502', 'step': 33194, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:30.056289', 'step': 33194, 'epoch': 3}
{'type': 'loss', 'content': 0.07226601988077164, 'timestamp': '2025-10-02 01:10:30.058976', 'step': 33195, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:30.119951', 'step': 33195, 'epoch': 3}
{'type': 'loss', 'content': 0.0468696728348732, 'timestamp': '2025-10-02 01:10:30.126911', 'step': 33196, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:30.185404', 'step': 33196, 'epoch': 3}
{'type': 'loss', 'content': 0.0349055677652359, 'timestamp': '2025-10-02 01:10:30.196371', 'step': 33197, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:30.252313', 'step': 33197, 'epoch': 3}
{'type': 'loss', 'content': 0.06643623858690262, 'timestamp': '2025-10-02 01:10:30.254050', 'step': 33198, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:30.307866', 'step': 33198, 'epoch': 3}
{'type': 'loss', 'content': 0.04330650717020035, 'timestamp': '2025-10-02 01:10:30.309643', 'step': 33199, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:30.364736', 'step': 33199, 'epoch': 3}
{'type': 'loss', 'content': 0.024054808542132378, 'timestamp': '2025-10-02 01:10:30.370353', 'step': 33200, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:30.424183', 'step': 33200, 'epoch': 3}
{'type': 'loss', 'content': 0.028040630742907524, 'timestamp': '2025-10-02 01:10:30.426502', 'step': 33201, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:30.480067', 'step': 33201, 'epoch': 3}
{'type': 'loss', 'content': 0.0662776380777359, 'timestamp': '2025-10-02 01:10:30.482806', 'step': 33202, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:30.539271', 'step': 33202, 'epoch': 3}
{'type': 'loss', 'content': 0.05753987282514572, 'timestamp': '2025-10-02 01:10:30.548761', 'step': 33203, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:30.603490', 'step': 33203, 'epoch': 3}
{'type': 'loss', 'content': 0.02514200657606125, 'timestamp': '2025-10-02 01:10:30.613899', 'step': 33204, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:30.667244', 'step': 33204, 'epoch': 3}
{'type': 'loss', 'content': 0.06600455194711685, 'timestamp': '2025-10-02 01:10:30.669118', 'step': 33205, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:30.723928', 'step': 33205, 'epoch': 3}
{'type': 'loss', 'content': 0.016166282817721367, 'timestamp': '2025-10-02 01:10:30.731517', 'step': 33206, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:30.786327', 'step': 33206, 'epoch': 3}
{'type': 'loss', 'content': 0.08751322329044342, 'timestamp': '2025-10-02 01:10:30.788013', 'step': 33207, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:30.841769', 'step': 33207, 'epoch': 3}
{'type': 'loss', 'content': 0.00571046955883503, 'timestamp': '2025-10-02 01:10:30.847775', 'step': 33208, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:30.902847', 'step': 33208, 'epoch': 3}
{'type': 'loss', 'content': 0.0968758687376976, 'timestamp': '2025-10-02 01:10:30.908876', 'step': 33209, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:30.963865', 'step': 33209, 'epoch': 3}
{'type': 'loss', 'content': 0.01180018950253725, 'timestamp': '2025-10-02 01:10:30.973172', 'step': 33210, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:31.029178', 'step': 33210, 'epoch': 3}
{'type': 'loss', 'content': 0.08318062871694565, 'timestamp': '2025-10-02 01:10:31.031488', 'step': 33211, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:31.087339', 'step': 33211, 'epoch': 3}
{'type': 'loss', 'content': 0.03595994785428047, 'timestamp': '2025-10-02 01:10:31.093389', 'step': 33212, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:31.147243', 'step': 33212, 'epoch': 3}
{'type': 'loss', 'content': 0.06876682490110397, 'timestamp': '2025-10-02 01:10:31.155642', 'step': 33213, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:31.223284', 'step': 33213, 'epoch': 3}
{'type': 'loss', 'content': 0.024387940764427185, 'timestamp': '2025-10-02 01:10:31.236188', 'step': 33214, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:31.319403', 'step': 33214, 'epoch': 3}
{'type': 'loss', 'content': 0.08028170466423035, 'timestamp': '2025-10-02 01:10:31.327981', 'step': 33215, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:31.414485', 'step': 33215, 'epoch': 3}
{'type': 'loss', 'content': 0.026049168780446053, 'timestamp': '2025-10-02 01:10:31.437951', 'step': 33216, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:31.508331', 'step': 33216, 'epoch': 3}
{'type': 'loss', 'content': 0.029176510870456696, 'timestamp': '2025-10-02 01:10:31.518302', 'step': 33217, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:31.605574', 'step': 33217, 'epoch': 3}
{'type': 'loss', 'content': 0.0332271009683609, 'timestamp': '2025-10-02 01:10:31.617146', 'step': 33218, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:31.695661', 'step': 33218, 'epoch': 3}
{'type': 'loss', 'content': 0.04931272193789482, 'timestamp': '2025-10-02 01:10:31.704368', 'step': 33219, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:31.786904', 'step': 33219, 'epoch': 3}
{'type': 'loss', 'content': 0.008907531388103962, 'timestamp': '2025-10-02 01:10:31.796586', 'step': 33220, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:31.891942', 'step': 33220, 'epoch': 3}
{'type': 'loss', 'content': 0.03570690378546715, 'timestamp': '2025-10-02 01:10:31.903608', 'step': 33221, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:31.980593', 'step': 33221, 'epoch': 3}
{'type': 'loss', 'content': 0.058160148561000824, 'timestamp': '2025-10-02 01:10:31.986565', 'step': 33222, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:32.078492', 'step': 33222, 'epoch': 3}
{'type': 'loss', 'content': 0.03203374892473221, 'timestamp': '2025-10-02 01:10:32.095303', 'step': 33223, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:32.174667', 'step': 33223, 'epoch': 3}
{'type': 'loss', 'content': 0.016684506088495255, 'timestamp': '2025-10-02 01:10:32.187202', 'step': 33224, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:32.278579', 'step': 33224, 'epoch': 3}
{'type': 'loss', 'content': 0.03258190304040909, 'timestamp': '2025-10-02 01:10:32.288717', 'step': 33225, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:32.368817', 'step': 33225, 'epoch': 3}
{'type': 'loss', 'content': 0.12264871597290039, 'timestamp': '2025-10-02 01:10:32.371404', 'step': 33226, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:32.425754', 'step': 33226, 'epoch': 3}
{'type': 'loss', 'content': 0.0878637507557869, 'timestamp': '2025-10-02 01:10:32.428119', 'step': 33227, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:32.483013', 'step': 33227, 'epoch': 3}
{'type': 'loss', 'content': 0.001150344149209559, 'timestamp': '2025-10-02 01:10:32.493099', 'step': 33228, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:32.547546', 'step': 33228, 'epoch': 3}
{'type': 'loss', 'content': 0.15602558851242065, 'timestamp': '2025-10-02 01:10:32.550168', 'step': 33229, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:32.604876', 'step': 33229, 'epoch': 3}
{'type': 'loss', 'content': 0.12408306449651718, 'timestamp': '2025-10-02 01:10:32.607419', 'step': 33230, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:32.662981', 'step': 33230, 'epoch': 3}
{'type': 'loss', 'content': 0.02043362893164158, 'timestamp': '2025-10-02 01:10:32.672555', 'step': 33231, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:32.727765', 'step': 33231, 'epoch': 3}
{'type': 'loss', 'content': 0.018687155097723007, 'timestamp': '2025-10-02 01:10:32.733648', 'step': 33232, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:32.788393', 'step': 33232, 'epoch': 3}
{'type': 'loss', 'content': 0.03594542294740677, 'timestamp': '2025-10-02 01:10:32.797825', 'step': 33233, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:32.855905', 'step': 33233, 'epoch': 3}
{'type': 'loss', 'content': 0.011189526878297329, 'timestamp': '2025-10-02 01:10:32.858604', 'step': 33234, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:32.912674', 'step': 33234, 'epoch': 3}
{'type': 'loss', 'content': 0.06758444756269455, 'timestamp': '2025-10-02 01:10:32.915177', 'step': 33235, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:32.970272', 'step': 33235, 'epoch': 3}
{'type': 'loss', 'content': 0.0215609148144722, 'timestamp': '2025-10-02 01:10:32.976295', 'step': 33236, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:33.031876', 'step': 33236, 'epoch': 3}
{'type': 'loss', 'content': 0.07895315438508987, 'timestamp': '2025-10-02 01:10:33.034133', 'step': 33237, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:33.088248', 'step': 33237, 'epoch': 3}
{'type': 'loss', 'content': 0.10405910015106201, 'timestamp': '2025-10-02 01:10:33.090453', 'step': 33238, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:33.144213', 'step': 33238, 'epoch': 3}
{'type': 'loss', 'content': 0.027421094477176666, 'timestamp': '2025-10-02 01:10:33.146286', 'step': 33239, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:33.207798', 'step': 33239, 'epoch': 3}
{'type': 'loss', 'content': 0.059582360088825226, 'timestamp': '2025-10-02 01:10:33.219245', 'step': 33240, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:33.272984', 'step': 33240, 'epoch': 3}
{'type': 'loss', 'content': 0.07416832447052002, 'timestamp': '2025-10-02 01:10:33.275337', 'step': 33241, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:33.330207', 'step': 33241, 'epoch': 3}
{'type': 'loss', 'content': 0.047940175980329514, 'timestamp': '2025-10-02 01:10:33.332802', 'step': 33242, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:33.388183', 'step': 33242, 'epoch': 3}
{'type': 'loss', 'content': 0.061213552951812744, 'timestamp': '2025-10-02 01:10:33.397526', 'step': 33243, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:33.452118', 'step': 33243, 'epoch': 3}
{'type': 'loss', 'content': 0.020023351535201073, 'timestamp': '2025-10-02 01:10:33.460384', 'step': 33244, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:33.514408', 'step': 33244, 'epoch': 3}
{'type': 'loss', 'content': 0.0038904971443116665, 'timestamp': '2025-10-02 01:10:33.523938', 'step': 33245, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:33.578332', 'step': 33245, 'epoch': 3}
{'type': 'loss', 'content': 0.05500679463148117, 'timestamp': '2025-10-02 01:10:33.581110', 'step': 33246, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:33.635518', 'step': 33246, 'epoch': 3}
{'type': 'loss', 'content': 0.06759554147720337, 'timestamp': '2025-10-02 01:10:33.638038', 'step': 33247, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:33.693215', 'step': 33247, 'epoch': 3}
{'type': 'loss', 'content': 0.03974577784538269, 'timestamp': '2025-10-02 01:10:33.699643', 'step': 33248, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:33.754354', 'step': 33248, 'epoch': 3}
{'type': 'loss', 'content': 0.011098183691501617, 'timestamp': '2025-10-02 01:10:33.760246', 'step': 33249, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:33.819462', 'step': 33249, 'epoch': 3}
{'type': 'loss', 'content': 0.0529923178255558, 'timestamp': '2025-10-02 01:10:33.829613', 'step': 33250, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:33.885998', 'step': 33250, 'epoch': 3}
{'type': 'loss', 'content': 0.039574213325977325, 'timestamp': '2025-10-02 01:10:33.888029', 'step': 33251, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:33.943537', 'step': 33251, 'epoch': 3}
{'type': 'loss', 'content': 0.04201654717326164, 'timestamp': '2025-10-02 01:10:33.951758', 'step': 33252, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:34.006783', 'step': 33252, 'epoch': 3}
{'type': 'loss', 'content': 0.028645776212215424, 'timestamp': '2025-10-02 01:10:34.014278', 'step': 33253, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:34.076986', 'step': 33253, 'epoch': 3}
{'type': 'loss', 'content': 0.02455761656165123, 'timestamp': '2025-10-02 01:10:34.087627', 'step': 33254, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:34.142434', 'step': 33254, 'epoch': 3}
{'type': 'loss', 'content': 0.019968068227171898, 'timestamp': '2025-10-02 01:10:34.145081', 'step': 33255, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:34.199015', 'step': 33255, 'epoch': 3}
{'type': 'loss', 'content': 0.10025816410779953, 'timestamp': '2025-10-02 01:10:34.204864', 'step': 33256, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:34.259063', 'step': 33256, 'epoch': 3}
{'type': 'loss', 'content': 0.009389134123921394, 'timestamp': '2025-10-02 01:10:34.261501', 'step': 33257, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:10:34.323804', 'step': 33257, 'epoch': 3}
{'type': 'loss', 'content': 0.0391305573284626, 'timestamp': '2025-10-02 01:10:34.334668', 'step': 33258, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:34.388996', 'step': 33258, 'epoch': 3}
{'type': 'loss', 'content': 0.05785157531499863, 'timestamp': '2025-10-02 01:10:34.391373', 'step': 33259, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:34.445796', 'step': 33259, 'epoch': 3}
{'type': 'loss', 'content': 0.007662433199584484, 'timestamp': '2025-10-02 01:10:34.451897', 'step': 33260, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:34.507283', 'step': 33260, 'epoch': 3}
{'type': 'loss', 'content': 0.01627551019191742, 'timestamp': '2025-10-02 01:10:34.514879', 'step': 33261, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:34.570384', 'step': 33261, 'epoch': 3}
{'type': 'loss', 'content': 0.10217971354722977, 'timestamp': '2025-10-02 01:10:34.572852', 'step': 33262, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:34.627761', 'step': 33262, 'epoch': 3}
{'type': 'loss', 'content': 0.013164197094738483, 'timestamp': '2025-10-02 01:10:34.630323', 'step': 33263, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:34.685243', 'step': 33263, 'epoch': 3}
{'type': 'loss', 'content': 0.06984541565179825, 'timestamp': '2025-10-02 01:10:34.691416', 'step': 33264, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:34.751510', 'step': 33264, 'epoch': 3}
{'type': 'loss', 'content': 0.0480273999273777, 'timestamp': '2025-10-02 01:10:34.762876', 'step': 33265, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:34.817642', 'step': 33265, 'epoch': 3}
{'type': 'loss', 'content': 0.04673553630709648, 'timestamp': '2025-10-02 01:10:34.819929', 'step': 33266, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:34.882121', 'step': 33266, 'epoch': 3}
{'type': 'loss', 'content': 0.02725752256810665, 'timestamp': '2025-10-02 01:10:34.892778', 'step': 33267, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:34.950405', 'step': 33267, 'epoch': 3}
{'type': 'loss', 'content': 0.02897879295051098, 'timestamp': '2025-10-02 01:10:34.960699', 'step': 33268, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:35.019873', 'step': 33268, 'epoch': 3}
{'type': 'loss', 'content': 0.006229984108358622, 'timestamp': '2025-10-02 01:10:35.022990', 'step': 33269, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:35.078518', 'step': 33269, 'epoch': 3}
{'type': 'loss', 'content': 0.029151225462555885, 'timestamp': '2025-10-02 01:10:35.086253', 'step': 33270, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:35.141778', 'step': 33270, 'epoch': 3}
{'type': 'loss', 'content': 0.014554161578416824, 'timestamp': '2025-10-02 01:10:35.151108', 'step': 33271, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:35.207887', 'step': 33271, 'epoch': 3}
{'type': 'loss', 'content': 0.04205865412950516, 'timestamp': '2025-10-02 01:10:35.214089', 'step': 33272, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:35.270484', 'step': 33272, 'epoch': 3}
{'type': 'loss', 'content': 0.008966035209596157, 'timestamp': '2025-10-02 01:10:35.280067', 'step': 33273, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:35.335695', 'step': 33273, 'epoch': 3}
{'type': 'loss', 'content': 0.03086693584918976, 'timestamp': '2025-10-02 01:10:35.345046', 'step': 33274, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:35.402391', 'step': 33274, 'epoch': 3}
{'type': 'loss', 'content': 0.04644666239619255, 'timestamp': '2025-10-02 01:10:35.408195', 'step': 33275, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:35.463827', 'step': 33275, 'epoch': 3}
{'type': 'loss', 'content': 0.05896281450986862, 'timestamp': '2025-10-02 01:10:35.470535', 'step': 33276, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:35.526875', 'step': 33276, 'epoch': 3}
{'type': 'loss', 'content': 0.01598343439400196, 'timestamp': '2025-10-02 01:10:35.529415', 'step': 33277, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:35.587187', 'step': 33277, 'epoch': 3}
{'type': 'loss', 'content': 0.01778249815106392, 'timestamp': '2025-10-02 01:10:35.596526', 'step': 33278, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:35.652411', 'step': 33278, 'epoch': 3}
{'type': 'loss', 'content': 0.08815060555934906, 'timestamp': '2025-10-02 01:10:35.655867', 'step': 33279, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:35.712978', 'step': 33279, 'epoch': 3}
{'type': 'loss', 'content': 0.018454618752002716, 'timestamp': '2025-10-02 01:10:35.719672', 'step': 33280, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:35.776545', 'step': 33280, 'epoch': 3}
{'type': 'loss', 'content': 0.0031651987228542566, 'timestamp': '2025-10-02 01:10:35.778906', 'step': 33281, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:35.833985', 'step': 33281, 'epoch': 3}
{'type': 'loss', 'content': 0.1389777958393097, 'timestamp': '2025-10-02 01:10:35.837124', 'step': 33282, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:35.894814', 'step': 33282, 'epoch': 3}
{'type': 'loss', 'content': 0.05201505497097969, 'timestamp': '2025-10-02 01:10:35.897035', 'step': 33283, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:35.952106', 'step': 33283, 'epoch': 3}
{'type': 'loss', 'content': 0.054502859711647034, 'timestamp': '2025-10-02 01:10:35.958766', 'step': 33284, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:36.015879', 'step': 33284, 'epoch': 3}
{'type': 'loss', 'content': 0.023434309288859367, 'timestamp': '2025-10-02 01:10:36.025546', 'step': 33285, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:36.080654', 'step': 33285, 'epoch': 3}
{'type': 'loss', 'content': 0.05593375489115715, 'timestamp': '2025-10-02 01:10:36.083515', 'step': 33286, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:36.139438', 'step': 33286, 'epoch': 3}
{'type': 'loss', 'content': 0.04575800150632858, 'timestamp': '2025-10-02 01:10:36.143157', 'step': 33287, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:36.203903', 'step': 33287, 'epoch': 3}
{'type': 'loss', 'content': 0.03054964542388916, 'timestamp': '2025-10-02 01:10:36.214886', 'step': 33288, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:36.270186', 'step': 33288, 'epoch': 3}
{'type': 'loss', 'content': 0.03583742678165436, 'timestamp': '2025-10-02 01:10:36.277756', 'step': 33289, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-10-02 01:10:36.354140', 'step': 33289, 'epoch': 3}
{'type': 'loss', 'content': 0.07102395594120026, 'timestamp': '2025-10-02 01:10:36.367926', 'step': 33290, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:36.430192', 'step': 33290, 'epoch': 3}
{'type': 'loss', 'content': 0.009556319564580917, 'timestamp': '2025-10-02 01:10:36.440666', 'step': 33291, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:36.495627', 'step': 33291, 'epoch': 3}
{'type': 'loss', 'content': 0.06396308541297913, 'timestamp': '2025-10-02 01:10:36.501731', 'step': 33292, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:36.556001', 'step': 33292, 'epoch': 3}
{'type': 'loss', 'content': 0.04912359640002251, 'timestamp': '2025-10-02 01:10:36.558613', 'step': 33293, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:36.613279', 'step': 33293, 'epoch': 3}
{'type': 'loss', 'content': 0.10779237747192383, 'timestamp': '2025-10-02 01:10:36.615725', 'step': 33294, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:36.670423', 'step': 33294, 'epoch': 3}
{'type': 'loss', 'content': 0.03164660558104515, 'timestamp': '2025-10-02 01:10:36.679754', 'step': 33295, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:36.733764', 'step': 33295, 'epoch': 3}
{'type': 'loss', 'content': 0.0896243005990982, 'timestamp': '2025-10-02 01:10:36.739839', 'step': 33296, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:36.793302', 'step': 33296, 'epoch': 3}
{'type': 'loss', 'content': 0.0648961290717125, 'timestamp': '2025-10-02 01:10:36.795535', 'step': 33297, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:10:36.849374', 'step': 33297, 'epoch': 3}
{'type': 'loss', 'content': 0.22519329190254211, 'timestamp': '2025-10-02 01:10:36.852426', 'step': 33298, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:36.906926', 'step': 33298, 'epoch': 3}
{'type': 'loss', 'content': 0.018338561058044434, 'timestamp': '2025-10-02 01:10:36.909507', 'step': 33299, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:36.963698', 'step': 33299, 'epoch': 3}
{'type': 'loss', 'content': 0.10120129585266113, 'timestamp': '2025-10-02 01:10:36.969346', 'step': 33300, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:37.023497', 'step': 33300, 'epoch': 3}
{'type': 'loss', 'content': 0.08062379062175751, 'timestamp': '2025-10-02 01:10:37.025699', 'step': 33301, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:37.080272', 'step': 33301, 'epoch': 3}
{'type': 'loss', 'content': 0.02170388773083687, 'timestamp': '2025-10-02 01:10:37.089646', 'step': 33302, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:37.144206', 'step': 33302, 'epoch': 3}
{'type': 'loss', 'content': 0.0774456113576889, 'timestamp': '2025-10-02 01:10:37.146512', 'step': 33303, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:10:37.209337', 'step': 33303, 'epoch': 3}
{'type': 'loss', 'content': 0.0454382486641407, 'timestamp': '2025-10-02 01:10:37.220962', 'step': 33304, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:37.275196', 'step': 33304, 'epoch': 3}
{'type': 'loss', 'content': 0.04234476387500763, 'timestamp': '2025-10-02 01:10:37.277487', 'step': 33305, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:37.332355', 'step': 33305, 'epoch': 3}
{'type': 'loss', 'content': 0.006860282272100449, 'timestamp': '2025-10-02 01:10:37.340014', 'step': 33306, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:37.395088', 'step': 33306, 'epoch': 3}
{'type': 'loss', 'content': 0.03351481258869171, 'timestamp': '2025-10-02 01:10:37.400951', 'step': 33307, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:37.455662', 'step': 33307, 'epoch': 3}
{'type': 'loss', 'content': 0.038294725120067596, 'timestamp': '2025-10-02 01:10:37.461900', 'step': 33308, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:37.515593', 'step': 33308, 'epoch': 3}
{'type': 'loss', 'content': 0.06526273488998413, 'timestamp': '2025-10-02 01:10:37.517973', 'step': 33309, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:37.572341', 'step': 33309, 'epoch': 3}
{'type': 'loss', 'content': 0.015352770686149597, 'timestamp': '2025-10-02 01:10:37.580059', 'step': 33310, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:37.634526', 'step': 33310, 'epoch': 3}
{'type': 'loss', 'content': 0.06870301067829132, 'timestamp': '2025-10-02 01:10:37.636918', 'step': 33311, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:37.691615', 'step': 33311, 'epoch': 3}
{'type': 'loss', 'content': 0.01741703972220421, 'timestamp': '2025-10-02 01:10:37.697230', 'step': 33312, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:37.750589', 'step': 33312, 'epoch': 3}
{'type': 'loss', 'content': 0.049155231565237045, 'timestamp': '2025-10-02 01:10:37.756609', 'step': 33313, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-10-02 01:10:37.810955', 'step': 33313, 'epoch': 3}
{'type': 'loss', 'content': 0.050520408898591995, 'timestamp': '2025-10-02 01:10:37.813368', 'step': 33314, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:37.868768', 'step': 33314, 'epoch': 3}
{'type': 'loss', 'content': 0.03897002339363098, 'timestamp': '2025-10-02 01:10:37.878280', 'step': 33315, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:37.940793', 'step': 33315, 'epoch': 3}
{'type': 'loss', 'content': 0.009199566207826138, 'timestamp': '2025-10-02 01:10:37.952246', 'step': 33316, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:38.007489', 'step': 33316, 'epoch': 3}
{'type': 'loss', 'content': 0.07157177478075027, 'timestamp': '2025-10-02 01:10:38.013522', 'step': 33317, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:38.068407', 'step': 33317, 'epoch': 3}
{'type': 'loss', 'content': 0.1011427566409111, 'timestamp': '2025-10-02 01:10:38.070898', 'step': 33318, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:38.129752', 'step': 33318, 'epoch': 3}
{'type': 'loss', 'content': 0.012936095707118511, 'timestamp': '2025-10-02 01:10:38.139900', 'step': 33319, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:38.194145', 'step': 33319, 'epoch': 3}
{'type': 'loss', 'content': 0.06137934327125549, 'timestamp': '2025-10-02 01:10:38.200161', 'step': 33320, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:38.254118', 'step': 33320, 'epoch': 3}
{'type': 'loss', 'content': 0.018096642568707466, 'timestamp': '2025-10-02 01:10:38.256478', 'step': 33321, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:38.311338', 'step': 33321, 'epoch': 3}
{'type': 'loss', 'content': 0.027381600812077522, 'timestamp': '2025-10-02 01:10:38.314112', 'step': 33322, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:38.369201', 'step': 33322, 'epoch': 3}
{'type': 'loss', 'content': 0.08832955360412598, 'timestamp': '2025-10-02 01:10:38.372377', 'step': 33323, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:38.426080', 'step': 33323, 'epoch': 3}
{'type': 'loss', 'content': 0.0324053019285202, 'timestamp': '2025-10-02 01:10:38.432163', 'step': 33324, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:38.485687', 'step': 33324, 'epoch': 3}
{'type': 'loss', 'content': 0.019841589033603668, 'timestamp': '2025-10-02 01:10:38.487948', 'step': 33325, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:38.543342', 'step': 33325, 'epoch': 3}
{'type': 'loss', 'content': 0.05398581176996231, 'timestamp': '2025-10-02 01:10:38.551023', 'step': 33326, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:38.605610', 'step': 33326, 'epoch': 3}
{'type': 'loss', 'content': 0.046578239649534225, 'timestamp': '2025-10-02 01:10:38.608247', 'step': 33327, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:38.663015', 'step': 33327, 'epoch': 3}
{'type': 'loss', 'content': 0.005031946115195751, 'timestamp': '2025-10-02 01:10:38.669212', 'step': 33328, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:38.723029', 'step': 33328, 'epoch': 3}
{'type': 'loss', 'content': 0.11089024692773819, 'timestamp': '2025-10-02 01:10:38.725394', 'step': 33329, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:38.779431', 'step': 33329, 'epoch': 3}
{'type': 'loss', 'content': 0.050033919513225555, 'timestamp': '2025-10-02 01:10:38.781490', 'step': 33330, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:38.835233', 'step': 33330, 'epoch': 3}
{'type': 'loss', 'content': 0.03594265878200531, 'timestamp': '2025-10-02 01:10:38.837771', 'step': 33331, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:38.893393', 'step': 33331, 'epoch': 3}
{'type': 'loss', 'content': 0.004963199142366648, 'timestamp': '2025-10-02 01:10:38.901732', 'step': 33332, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:10:38.969622', 'step': 33332, 'epoch': 3}
{'type': 'loss', 'content': 0.04805774986743927, 'timestamp': '2025-10-02 01:10:38.982983', 'step': 33333, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:39.038112', 'step': 33333, 'epoch': 3}
{'type': 'loss', 'content': 0.04410000890493393, 'timestamp': '2025-10-02 01:10:39.045616', 'step': 33334, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:39.100363', 'step': 33334, 'epoch': 3}
{'type': 'loss', 'content': 0.056615766137838364, 'timestamp': '2025-10-02 01:10:39.102701', 'step': 33335, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:39.158623', 'step': 33335, 'epoch': 3}
{'type': 'loss', 'content': 0.015037338249385357, 'timestamp': '2025-10-02 01:10:39.167038', 'step': 33336, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:39.226411', 'step': 33336, 'epoch': 3}
{'type': 'loss', 'content': 0.008365128189325333, 'timestamp': '2025-10-02 01:10:39.228778', 'step': 33337, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:39.283390', 'step': 33337, 'epoch': 3}
{'type': 'loss', 'content': 0.09049554169178009, 'timestamp': '2025-10-02 01:10:39.285806', 'step': 33338, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:39.340607', 'step': 33338, 'epoch': 3}
{'type': 'loss', 'content': 0.0790042132139206, 'timestamp': '2025-10-02 01:10:39.343723', 'step': 33339, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:10:39.405437', 'step': 33339, 'epoch': 3}
{'type': 'loss', 'content': 0.026960216462612152, 'timestamp': '2025-10-02 01:10:39.416660', 'step': 33340, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:39.471131', 'step': 33340, 'epoch': 3}
{'type': 'loss', 'content': 0.07408193498849869, 'timestamp': '2025-10-02 01:10:39.473768', 'step': 33341, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:39.529231', 'step': 33341, 'epoch': 3}
{'type': 'loss', 'content': 0.03349653631448746, 'timestamp': '2025-10-02 01:10:39.531817', 'step': 33342, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:39.590523', 'step': 33342, 'epoch': 3}
{'type': 'loss', 'content': 0.06440876424312592, 'timestamp': '2025-10-02 01:10:39.600655', 'step': 33343, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:39.654260', 'step': 33343, 'epoch': 3}
{'type': 'loss', 'content': 0.08297798782587051, 'timestamp': '2025-10-02 01:10:39.660040', 'step': 33344, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-10-02 01:10:39.727381', 'step': 33344, 'epoch': 3}
{'type': 'loss', 'content': 0.02595684863626957, 'timestamp': '2025-10-02 01:10:39.740829', 'step': 33345, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:39.798298', 'step': 33345, 'epoch': 3}
{'type': 'loss', 'content': 0.03183169290423393, 'timestamp': '2025-10-02 01:10:39.807595', 'step': 33346, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:39.863755', 'step': 33346, 'epoch': 3}
{'type': 'loss', 'content': 0.03553120791912079, 'timestamp': '2025-10-02 01:10:39.866238', 'step': 33347, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:39.920724', 'step': 33347, 'epoch': 3}
{'type': 'loss', 'content': 0.031224267557263374, 'timestamp': '2025-10-02 01:10:39.926509', 'step': 33348, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:39.985479', 'step': 33348, 'epoch': 3}
{'type': 'loss', 'content': 0.05195866525173187, 'timestamp': '2025-10-02 01:10:39.988045', 'step': 33349, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:40.045206', 'step': 33349, 'epoch': 3}
{'type': 'loss', 'content': 0.05689826235175133, 'timestamp': '2025-10-02 01:10:40.051305', 'step': 33350, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:40.106136', 'step': 33350, 'epoch': 3}
{'type': 'loss', 'content': 0.10497238487005234, 'timestamp': '2025-10-02 01:10:40.108482', 'step': 33351, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:40.162263', 'step': 33351, 'epoch': 3}
{'type': 'loss', 'content': 0.10087329149246216, 'timestamp': '2025-10-02 01:10:40.168383', 'step': 33352, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:40.222159', 'step': 33352, 'epoch': 3}
{'type': 'loss', 'content': 0.06195930019021034, 'timestamp': '2025-10-02 01:10:40.224459', 'step': 33353, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:40.278020', 'step': 33353, 'epoch': 3}
{'type': 'loss', 'content': 0.05630843713879585, 'timestamp': '2025-10-02 01:10:40.280757', 'step': 33354, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:40.334517', 'step': 33354, 'epoch': 3}
{'type': 'loss', 'content': 0.022243166342377663, 'timestamp': '2025-10-02 01:10:40.336763', 'step': 33355, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:40.390754', 'step': 33355, 'epoch': 3}
{'type': 'loss', 'content': 0.04515531286597252, 'timestamp': '2025-10-02 01:10:40.397140', 'step': 33356, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:40.451215', 'step': 33356, 'epoch': 3}
{'type': 'loss', 'content': 0.06971540302038193, 'timestamp': '2025-10-02 01:10:40.453635', 'step': 33357, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:40.507669', 'step': 33357, 'epoch': 3}
{'type': 'loss', 'content': 0.04047522693872452, 'timestamp': '2025-10-02 01:10:40.513813', 'step': 33358, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:40.568663', 'step': 33358, 'epoch': 3}
{'type': 'loss', 'content': 0.017626864835619926, 'timestamp': '2025-10-02 01:10:40.570861', 'step': 33359, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:40.625821', 'step': 33359, 'epoch': 3}
{'type': 'loss', 'content': 0.01612074114382267, 'timestamp': '2025-10-02 01:10:40.635906', 'step': 33360, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:40.691168', 'step': 33360, 'epoch': 3}
{'type': 'loss', 'content': 0.05988303944468498, 'timestamp': '2025-10-02 01:10:40.693332', 'step': 33361, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:10:40.747694', 'step': 33361, 'epoch': 3}
{'type': 'loss', 'content': 0.006145610008388758, 'timestamp': '2025-10-02 01:10:40.757049', 'step': 33362, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:40.811432', 'step': 33362, 'epoch': 3}
{'type': 'loss', 'content': 0.11189646273851395, 'timestamp': '2025-10-02 01:10:40.814411', 'step': 33363, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:40.870055', 'step': 33363, 'epoch': 3}
{'type': 'loss', 'content': 0.013361379504203796, 'timestamp': '2025-10-02 01:10:40.875835', 'step': 33364, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:40.929706', 'step': 33364, 'epoch': 3}
{'type': 'loss', 'content': 0.06752355396747589, 'timestamp': '2025-10-02 01:10:40.939952', 'step': 33365, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:40.995064', 'step': 33365, 'epoch': 3}
{'type': 'loss', 'content': 0.053156785666942596, 'timestamp': '2025-10-02 01:10:40.997793', 'step': 33366, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:41.053114', 'step': 33366, 'epoch': 3}
{'type': 'loss', 'content': 0.025194484740495682, 'timestamp': '2025-10-02 01:10:41.055502', 'step': 33367, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:41.109399', 'step': 33367, 'epoch': 3}
{'type': 'loss', 'content': 0.05767187476158142, 'timestamp': '2025-10-02 01:10:41.116300', 'step': 33368, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:41.170062', 'step': 33368, 'epoch': 3}
{'type': 'loss', 'content': 0.06914060562849045, 'timestamp': '2025-10-02 01:10:41.172639', 'step': 33369, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:41.227349', 'step': 33369, 'epoch': 3}
{'type': 'loss', 'content': 0.02053639106452465, 'timestamp': '2025-10-02 01:10:41.229681', 'step': 33370, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:41.285011', 'step': 33370, 'epoch': 3}
{'type': 'loss', 'content': 0.09048804640769958, 'timestamp': '2025-10-02 01:10:41.287306', 'step': 33371, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:41.341804', 'step': 33371, 'epoch': 3}
{'type': 'loss', 'content': 0.019740117713809013, 'timestamp': '2025-10-02 01:10:41.347800', 'step': 33372, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-10-02 01:10:41.416232', 'step': 33372, 'epoch': 3}
{'type': 'loss', 'content': 0.025874579325318336, 'timestamp': '2025-10-02 01:10:41.429780', 'step': 33373, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:41.484125', 'step': 33373, 'epoch': 3}
{'type': 'loss', 'content': 0.1278175413608551, 'timestamp': '2025-10-02 01:10:41.486563', 'step': 33374, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:41.541176', 'step': 33374, 'epoch': 3}
{'type': 'loss', 'content': 0.06632577627897263, 'timestamp': '2025-10-02 01:10:41.543736', 'step': 33375, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:41.599445', 'step': 33375, 'epoch': 3}
{'type': 'loss', 'content': 0.003403322072699666, 'timestamp': '2025-10-02 01:10:41.605485', 'step': 33376, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:41.660371', 'step': 33376, 'epoch': 3}
{'type': 'loss', 'content': 0.03476310148835182, 'timestamp': '2025-10-02 01:10:41.662447', 'step': 33377, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:10:41.717659', 'step': 33377, 'epoch': 3}
{'type': 'loss', 'content': 0.07838902622461319, 'timestamp': '2025-10-02 01:10:41.719941', 'step': 33378, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:41.779094', 'step': 33378, 'epoch': 3}
{'type': 'loss', 'content': 0.06520882993936539, 'timestamp': '2025-10-02 01:10:41.789265', 'step': 33379, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:41.843539', 'step': 33379, 'epoch': 3}
{'type': 'loss', 'content': 0.02529444731771946, 'timestamp': '2025-10-02 01:10:41.851826', 'step': 33380, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:41.905457', 'step': 33380, 'epoch': 3}
{'type': 'loss', 'content': 0.10041488707065582, 'timestamp': '2025-10-02 01:10:41.907778', 'step': 33381, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:41.961984', 'step': 33381, 'epoch': 3}
{'type': 'loss', 'content': 0.044014155864715576, 'timestamp': '2025-10-02 01:10:41.967991', 'step': 33382, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:42.022352', 'step': 33382, 'epoch': 3}
{'type': 'loss', 'content': 0.002351784147322178, 'timestamp': '2025-10-02 01:10:42.024874', 'step': 33383, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:42.078872', 'step': 33383, 'epoch': 3}
{'type': 'loss', 'content': 0.06134843826293945, 'timestamp': '2025-10-02 01:10:42.084640', 'step': 33384, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:42.138732', 'step': 33384, 'epoch': 3}
{'type': 'loss', 'content': 0.015470116399228573, 'timestamp': '2025-10-02 01:10:42.148978', 'step': 33385, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:42.204215', 'step': 33385, 'epoch': 3}
{'type': 'loss', 'content': 0.026874758303165436, 'timestamp': '2025-10-02 01:10:42.213761', 'step': 33386, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:42.269327', 'step': 33386, 'epoch': 3}
{'type': 'loss', 'content': 0.019649816676974297, 'timestamp': '2025-10-02 01:10:42.275110', 'step': 33387, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:10:42.334795', 'step': 33387, 'epoch': 3}
{'type': 'loss', 'content': 0.06901302933692932, 'timestamp': '2025-10-02 01:10:42.345739', 'step': 33388, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:42.404000', 'step': 33388, 'epoch': 3}
{'type': 'loss', 'content': 0.03655301406979561, 'timestamp': '2025-10-02 01:10:42.407041', 'step': 33389, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:10:42.462522', 'step': 33389, 'epoch': 3}
{'type': 'loss', 'content': 0.07900915294885635, 'timestamp': '2025-10-02 01:10:42.476226', 'step': 33390, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-10-02 01:10:42.557284', 'step': 33390, 'epoch': 3}
{'type': 'loss', 'content': 0.0010372959077358246, 'timestamp': '2025-10-02 01:10:42.569253', 'step': 33391, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:10:42.690758', 'step': 33391, 'epoch': 3}
{'type': 'loss', 'content': 0.047667860984802246, 'timestamp': '2025-10-02 01:10:42.711612', 'step': 33392, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:10:42.770167', 'step': 33392, 'epoch': 3}
{'type': 'loss', 'content': 0.013442104682326317, 'timestamp': '2025-10-02 01:10:42.782962', 'step': 33393, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:42.882199', 'step': 33393, 'epoch': 3}
{'type': 'loss', 'content': 0.03345755860209465, 'timestamp': '2025-10-02 01:10:42.894301', 'step': 33394, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:42.986351', 'step': 33394, 'epoch': 3}
{'type': 'loss', 'content': 5.7124620070680976e-05, 'timestamp': '2025-10-02 01:10:43.002291', 'step': 33395, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:43.061997', 'step': 33395, 'epoch': 3}
{'type': 'loss', 'content': 0.02593746967613697, 'timestamp': '2025-10-02 01:10:43.079891', 'step': 33396, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:43.137521', 'step': 33396, 'epoch': 3}
{'type': 'loss', 'content': 0.11038444936275482, 'timestamp': '2025-10-02 01:10:43.141114', 'step': 33397, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:10:43.224782', 'step': 33397, 'epoch': 3}
{'type': 'loss', 'content': 0.005653885193169117, 'timestamp': '2025-10-02 01:10:43.232557', 'step': 33398, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:10:43.301133', 'step': 33398, 'epoch': 3}
{'type': 'loss', 'content': 0.009882957674562931, 'timestamp': '2025-10-02 01:10:43.311813', 'step': 33399, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:10:43.419303', 'step': 33399, 'epoch': 3}
{'type': 'loss', 'content': 0.023879921063780785, 'timestamp': '2025-10-02 01:10:43.427991', 'step': 33400, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:10:43.488045', 'step': 33400, 'epoch': 3}
{'type': 'loss', 'content': 0.02672627381980419, 'timestamp': '2025-10-02 01:10:43.501840', 'step': 33401, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:10:43.576773', 'step': 33401, 'epoch': 3}
{'type': 'loss', 'content': 0.060156743973493576, 'timestamp': '2025-10-02 01:10:43.581427', 'step': 33402, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:10:43.641236', 'step': 33402, 'epoch': 3}
{'type': 'loss', 'content': 0.0090601472184062, 'timestamp': '2025-10-02 01:10:43.645597', 'step': 33403, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:10:43.729778', 'step': 33403, 'epoch': 3}
{'type': 'loss', 'content': 0.010091762989759445, 'timestamp': '2025-10-02 01:10:43.737228', 'step': 33404, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:10:43.796019', 'step': 33404, 'epoch': 3}
{'type': 'loss', 'content': 0.052072346210479736, 'timestamp': '2025-10-02 01:10:43.806219', 'step': 33405, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:10:43.876644', 'step': 33405, 'epoch': 3}
{'type': 'loss', 'content': 0.02985110506415367, 'timestamp': '2025-10-02 01:10:43.880125', 'step': 33406, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:10:43.938631', 'step': 33406, 'epoch': 3}
{'type': 'loss', 'content': 0.044204700738191605, 'timestamp': '2025-10-02 01:10:43.942747', 'step': 33407, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:10:43.999719', 'step': 33407, 'epoch': 3}
{'type': 'loss', 'content': 0.03616383671760559, 'timestamp': '2025-10-02 01:10:44.006679', 'step': 33408, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 01:11:11.270608', 'step': 33408, 'epoch': 3}
{'type': 'pplx', 'content': 89.06784214578045, 'timestamp': '2025-10-02 01:11:11.274458', 'step': 33408, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:11:11.330415', 'step': 33408, 'epoch': 3}
{'type': 'loss', 'content': 0.03525663912296295, 'timestamp': '2025-10-02 01:11:11.332893', 'step': 33409, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:11:11.387564', 'step': 33409, 'epoch': 3}
{'type': 'loss', 'content': 0.016368793323636055, 'timestamp': '2025-10-02 01:11:11.389829', 'step': 33410, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:11:11.444843', 'step': 33410, 'epoch': 3}
{'type': 'loss', 'content': 0.040085867047309875, 'timestamp': '2025-10-02 01:11:11.447477', 'step': 33411, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:11:11.501171', 'step': 33411, 'epoch': 3}
{'type': 'loss', 'content': 0.04331984743475914, 'timestamp': '2025-10-02 01:11:11.507688', 'step': 33412, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-10-02 01:11:11.561484', 'step': 33412, 'epoch': 3}
{'type': 'loss', 'content': 0.027842147275805473, 'timestamp': '2025-10-02 01:11:11.571702', 'step': 33413, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:11:11.626275', 'step': 33413, 'epoch': 3}
{'type': 'loss', 'content': 0.05822167173027992, 'timestamp': '2025-10-02 01:11:11.632253', 'step': 33414, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:11:11.687688', 'step': 33414, 'epoch': 3}
{'type': 'loss', 'content': 0.0031093598809093237, 'timestamp': '2025-10-02 01:11:11.689518', 'step': 33415, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:11:11.742685', 'step': 33415, 'epoch': 3}
{'type': 'loss', 'content': 0.03637007623910904, 'timestamp': '2025-10-02 01:11:11.748769', 'step': 33416, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:11:11.803022', 'step': 33416, 'epoch': 3}
{'type': 'loss', 'content': 0.10874883830547333, 'timestamp': '2025-10-02 01:11:11.805105', 'step': 33417, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-10-02 01:11:11.866568', 'step': 33417, 'epoch': 3}
{'type': 'loss', 'content': 0.02861226536333561, 'timestamp': '2025-10-02 01:11:11.877068', 'step': 33418, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:11:11.931879', 'step': 33418, 'epoch': 3}
{'type': 'loss', 'content': 0.04479464143514633, 'timestamp': '2025-10-02 01:11:11.934330', 'step': 33419, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-10-02 01:11:11.992614', 'step': 33419, 'epoch': 3}
{'type': 'loss', 'content': 0.029932936653494835, 'timestamp': '2025-10-02 01:11:12.003590', 'step': 33420, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:11:12.057286', 'step': 33420, 'epoch': 3}
{'type': 'loss', 'content': 0.010277477093040943, 'timestamp': '2025-10-02 01:11:12.064840', 'step': 33421, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-10-02 01:11:12.118775', 'step': 33421, 'epoch': 3}
{'type': 'loss', 'content': 0.08408976346254349, 'timestamp': '2025-10-02 01:11:12.121239', 'step': 33422, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:11:12.174910', 'step': 33422, 'epoch': 3}
{'type': 'loss', 'content': 0.03280554711818695, 'timestamp': '2025-10-02 01:11:12.177474', 'step': 33423, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-10-02 01:11:12.231917', 'step': 33423, 'epoch': 3}
{'type': 'loss', 'content': 0.0007477538310922682, 'timestamp': '2025-10-02 01:11:12.238060', 'step': 33424, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:11:12.291184', 'step': 33424, 'epoch': 3}
{'type': 'loss', 'content': 0.05208932235836983, 'timestamp': '2025-10-02 01:11:12.293614', 'step': 33425, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:11:12.356358', 'step': 33425, 'epoch': 3}
{'type': 'loss', 'content': 0.007340873591601849, 'timestamp': '2025-10-02 01:11:12.366960', 'step': 33426, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:11:12.421083', 'step': 33426, 'epoch': 3}
{'type': 'loss', 'content': 0.0309921707957983, 'timestamp': '2025-10-02 01:11:12.423393', 'step': 33427, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-10-02 01:11:12.477700', 'step': 33427, 'epoch': 3}
{'type': 'loss', 'content': 0.040355730801820755, 'timestamp': '2025-10-02 01:11:12.483991', 'step': 33428, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-10-02 01:11:12.538040', 'step': 33428, 'epoch': 3}
{'type': 'loss', 'content': 0.04661790281534195, 'timestamp': '2025-10-02 01:11:12.540417', 'step': 33429, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-10-02 01:11:12.594127', 'step': 33429, 'epoch': 3}
{'type': 'loss', 'content': 0.10203142464160919, 'timestamp': '2025-10-02 01:11:12.597440', 'step': 33430, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-10-02 01:11:12.651213', 'step': 33430, 'epoch': 3}
{'type': 'loss', 'content': 0.042292654514312744, 'timestamp': '2025-10-02 01:11:12.653346', 'step': 33431, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-10-02 01:11:12.707172', 'step': 33431, 'epoch': 3}
{'type': 'loss', 'content': 0.02198701724410057, 'timestamp': '2025-10-02 01:11:12.712911', 'step': 33432, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:11:12.767061', 'step': 33432, 'epoch': 3}
{'type': 'loss', 'content': 0.03307703137397766, 'timestamp': '2025-10-02 01:11:12.769652', 'step': 33433, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-10-02 01:11:12.823910', 'step': 33433, 'epoch': 3}
{'type': 'loss', 'content': 0.06698329001665115, 'timestamp': '2025-10-02 01:11:12.826475', 'step': 33434, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-10-02 01:11:12.887901', 'step': 33434, 'epoch': 3}
{'type': 'loss', 'content': 0.014764675870537758, 'timestamp': '2025-10-02 01:11:12.898452', 'step': 33435, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-10-02 01:11:12.962341', 'step': 33435, 'epoch': 3}
{'type': 'loss', 'content': 0.012061272747814655, 'timestamp': '2025-10-02 01:11:12.973943', 'step': 33436, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-10-02 01:11:13.027275', 'step': 33436, 'epoch': 3}
{'type': 'loss', 'content': 0.10000099986791611, 'timestamp': '2025-10-02 01:11:13.029830', 'step': 33437, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-10-02 01:11:13.083707', 'step': 33437, 'epoch': 3}
{'type': 'loss', 'content': 0.02088944800198078, 'timestamp': '2025-10-02 01:11:13.091226', 'step': 33438, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-10-02 01:11:13.147346', 'step': 33438, 'epoch': 3}
{'type': 'loss', 'content': 0.028128519654273987, 'timestamp': '2025-10-02 01:11:13.156748', 'step': 33439, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-10-02 01:11:13.211267', 'step': 33439, 'epoch': 3}
{'type': 'loss', 'content': 0.03865787386894226, 'timestamp': '2025-10-02 01:11:13.217166', 'step': 33440, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:11:13.272685', 'step': 33440, 'epoch': 3}
{'type': 'loss', 'content': 0.006493811961263418, 'timestamp': '2025-10-02 01:11:13.275089', 'step': 33441, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-10-02 01:11:13.329172', 'step': 33441, 'epoch': 3}
{'type': 'loss', 'content': 0.03429416939616203, 'timestamp': '2025-10-02 01:11:13.331645', 'step': 33442, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-10-02 01:11:13.386240', 'step': 33442, 'epoch': 3}
{'type': 'loss', 'content': 0.061687618494033813, 'timestamp': '2025-10-02 01:11:13.392106', 'step': 33443, 'epoch': 3}
{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 112], 'flops': 560003483248.0}, 'timestamp': '2025-10-02 01:11:13.445295', 'step': 33443, 'epoch': 3}
{'type': 'loss', 'content': 0.00897121336311102, 'timestamp': '2025-10-02 01:11:13.451001', 'step': 33444, 'epoch': 3}
{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [1, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-10-02 01:11:41.370280', 'step': 33444, 'epoch': 3}
{'type': 'pplx', 'content': 90.06290423051196, 'timestamp': '2025-10-02 01:11:41.374431', 'step': 33444, 'epoch': 3}
{'type': 'best_pplx', 'content': 81.1445249185832, 'timestamp': '2025-10-02 01:11:41.376577', 'step': 33444, 'epoch': 3}
{'type': 'best_step', 'content': 1392, 'timestamp': '2025-10-02 01:11:41.378079', 'step': 33444, 'epoch': 3}
{'type': 'total_pplx_flops', 'content': 164338467233043200, 'timestamp': '2025-10-02 01:11:41.379687', 'step': 33444, 'epoch': 3}
{'type': 'total_train_flops', 'content': 1.5685031443005248e+17, 'timestamp': '2025-10-02 01:11:41.381659', 'step': 33444, 'epoch': 3}
